test for transformer

prishajain1 · prishajain1 · commit 2207e08c1c34 · 2026-02-11T09:51:52.000+05:30
diff --git a/src/maxdiffusion/tests/ltx_2_transformer_test.py b/src/maxdiffusion/tests/ltx_2_transformer_test.py
@@ -97,23 +97,7 @@ def test_ltx2_rope(self):
         base_width=base_width,
         modality="video"
     )
-    
-    # Create dummy grid
-    # For video: (B, T, H, W) -> flattened indices? No, RoPE takes `ids` often, or computes them internally?
-    # LTX2RotaryPosEmbed.__call__ takes `ids`.
-    # Let's check how it's called in `transformer_ltx2.py`.
-    # It seems `transformer_ltx2.py` calls `pool_patches` or similar to generate grid?
-    # Actually `LTX2RotaryPosEmbed` seems to have logic to generate embeddings from indices.
-    
-    # Let's try calling it with dummy IDs if expected, or if it generates them.
-    # Looking at `test_attention_ltx2.py`, it passes `ids` or similar.
-    # In `transformer_ltx2.py`, `prepare_video_coords` generates coordinates.
-    # But `LTX2RotaryPosEmbed` forward might take `ids`.
-    # Wait, `transformer_ltx2.py` defines `self.rope`.
-    # Let's verify `LTX2RotaryPosEmbed` signature in `test_attention_ltx2.py` or implementation.
-    # `test_attention_ltx2.py`: `rope_jax(jnp.array(np_ids))`
-    
-    ids = jnp.ones((1, 10, 3)) # (B, S, 3) for 3D coords
+    ids = jnp.ones((1, 3, 10)) # (B, Axes, S) for 3D coords
     cos, sin = rope(ids)
     
     # Check output shape
@@ -224,15 +208,15 @@ def test_ltx2_transformer_model(self):
           num_attention_heads=self.num_heads,
           attention_head_dim=self.head_dim,
           cross_attention_dim=self.cross_dim,
-          caption_channels=32, # kept small for now, or match parity if needed
+          caption_channels=32,
           audio_in_channels=audio_in_channels,
           audio_out_channels=audio_in_channels,
           audio_num_attention_heads=self.audio_num_heads,
           audio_attention_head_dim=self.audio_head_dim,
           audio_cross_attention_dim=self.audio_cross_dim,
           num_layers=1,
           mesh=self.mesh,
-          attention_kernel="dot_product" # Force dot_product for test stability on CPU/small config
+          attention_kernel="dot_product"
       )
       
       batch_size = self.batch_size