modifying test for testing rope = split

prishajain1 · prishajain1 · commit 1fd5338897f8 · 2026-02-11T11:09:41.000+05:30
diff --git a/src/maxdiffusion/tests/ltx2_parity_test.py b/src/maxdiffusion/tests/ltx2_parity_test.py
@@ -86,8 +86,6 @@ def test_transformer_block_shapes(self):
           audio_attention_head_dim=128,
           audio_cross_attention_dim=cross_dim,
           activation_fn="gelu",
-          qk_norm="rms_norm_across_heads",
-          qk_norm="rms_norm_across_heads",
           mesh=self.mesh,
       )
 
@@ -194,32 +192,6 @@ def test_transformer_3d_model_instantiation_and_forward(self):
           mesh=self.mesh,
       )
 
-    # Inputs
-    # hidden_states: (B, F, H, W, C) or (B, L, C)?
-    # Diffusers `forward` takes `hidden_states` usually as latents.
-    # If it's 3D, it might expect (B, C, F, H, W) or (B, F, C, H, W)?
-    # Checking `transformer_ltx2.py` `__call__` Line 680:
-    # `hidden_states = self.proj_in(hidden_states)`
-    # `proj_in` is nnx.Linear.
-    # This implies `hidden_states` input is ALREADY flattened/sequenced or `proj_in` assumes channel-last inputs.
-    # If `proj_in` is Linear, input must be compatible with matrix mult.
-    # Usually Transformers expect (B, L, D) or (B, N, D).
-    # But `prepare_video_coords` logic suggests it handles spatial awareness.
-    # The PROMPT usually implies `latents` of shape (B, C, F, H, W).
-    # BUT `nnx.Linear` (Dense) applies to the last dimension.
-    # If input is (B, C, F, H, W), Linear would act on W. That's wrong.
-    # Diffusers LTX usually patchifies EXTERNALLY or has a conv patch embed?
-    # In my definition (Line 491): `self.proj_in = nnx.Linear(...)`.
-    # This differs from Conv3d.
-    # This implies the user MUST pass flattened tokens?
-    # Re-checking Diffusers implementation...
-    # If `LTX2VideoTransformer3DModel` in Diffusers uses `patch_embed` (Conv), it takes 5D.
-    # Verify `transformer_ltx2.py` user edits...
-    # Step 426 (Original) had `nnx.Conv`.
-    # Step 491 (New) has `nnx.Linear`.
-    # This suggests input is EXPECTED to be flattened/patchified already OR raw channel-last (B, ..., C).
-    # IMPORTANT: if `proj_in` is Linear, we pass (B, L, C).
-
     # Let's pass (B, L, C).
     hidden_states = jnp.zeros((self.batch_size, self.seq_len, self.in_channels))
     audio_hidden_states = jnp.zeros((self.batch_size, 128, self.audio_in_channels))
@@ -352,15 +324,6 @@ def test_scan_remat_parity(self):
       model_loop = LTX2VideoTransformer3DModel(**args, scan_layers=False, mesh=self.mesh)
       model_remat = LTX2VideoTransformer3DModel(**args, scan_layers=True, remat_policy="full", mesh=self.mesh)
 
-    # 2. Sync weights (crucial for parity)
-    # We can just copy params from scan to loop/remat
-    # Assuming identical structure, nnx.state(model) should be compatible?
-    # scan_layers=True uses `nnx.scan` which might change state structure (Scan variable?)
-    # Actually maxdiffusion `transformer_wan.py` shows they are compatible if variable structure is clean.
-    # But `nnx.scan` lifts variables into `Scan` collections sometimes?
-    # Let's try simple state transfer or just basic shape check if transfer fails.
-    # Ideally we want exact numerical parity.
-
     # Inputs
     hidden_states = jnp.ones((self.batch_size, self.seq_len, self.in_channels)) * 0.5
     audio_hidden_states = jnp.ones((self.batch_size, 128, self.audio_in_channels)) * 0.5
diff --git a/src/maxdiffusion/tests/ltx_2_transformer_test.py b/src/maxdiffusion/tests/ltx_2_transformer_test.py
@@ -128,14 +128,10 @@ def test_ltx2_rope_split(self):
     cos, sin = rope(ids)
     
     # Check output shape
-    # Split RoPE returns concatenated [cos, cos] to match dim
-    self.assertEqual(cos.shape, (1, 10, dim))
-    self.assertEqual(sin.shape, (1, 10, dim))
-    
-    # Verify values are concatenated
-    cos1, cos2 = jnp.split(cos, 2, axis=-1)
-    # They should be identical
-    self.assertTrue(jnp.allclose(cos1, cos2))
+    # Split RoPE returns [B, H, S, D//2]
+    # dim=1024, heads=32 => head_dim=32 => D//2 = 16
+    self.assertEqual(cos.shape, (1, 32, 10, 16))
+    self.assertEqual(sin.shape, (1, 32, 10, 16))
 
 
   def test_ltx2_ada_layer_norm_single(self):