trying block size fix

prishajain1 · prishajain1 · commit 513a0c35089b · 2026-02-28T17:16:32.000+05:30
diff --git a/src/maxdiffusion/configs/ltx2_video.yml b/src/maxdiffusion/configs/ltx2_video.yml
@@ -1,7 +1,7 @@
 #hardware
 hardware: 'tpu'
 skip_jax_distributed_system: False
-attention: 'dot_product'
+attention: 'flash'
 attention_sharding_uniform: True 
 precision: 'bf16'
 data_sharding: ['data', 'fsdp', 'context', 'tensor']
diff --git a/src/maxdiffusion/models/attention_flax.py b/src/maxdiffusion/models/attention_flax.py
@@ -235,8 +235,10 @@ def _tpu_flash_attention(
   q_max_block_size = 1024 if dtype == jnp.bfloat16 else 512
   # This is the case for cross-attn.
   if key.shape[1] != query.shape[1]:
-    assert key.shape[1] % 128 == 0
-    kv_max_block_size = key.shape[1]
+    if key.shape[1] % 128 != 0:
+      kv_max_block_size = ((key.shape[1] + 127) // 128) * 128
+    else:
+      kv_max_block_size = key.shape[1]
   else:
     kv_max_block_size = q_max_block_size
   # ensure that for cross attention we override the block sizes.