Reformatted with pyink

prishajain1 · prishajain1 · commit fd90736d4282 · 2026-02-11T13:46:51.000Z
diff --git a/src/maxdiffusion/models/ltx2/attention_ltx2.py b/src/maxdiffusion/models/ltx2/attention_ltx2.py
@@ -59,49 +59,48 @@ def apply_split_rotary_emb(x: Array, freqs: Tuple[Array, Array]) -> Array:
   """
   Applies Split RoPE to input x.
   Logic matches Diffusers apply_split_rotary_emb.
-  
+
   Args:
-      x: Input tensor. 
+      x: Input tensor.
          If ndim=3 [B, S, D], it will be reshaped to satisfy cos/sin shapes if needed.
-      freqs: Tuple of (cos, sin). 
+      freqs: Tuple of (cos, sin).
              Expected to be [B, H, S, D//2] if coming from LTX2RotaryPosEmbed(split).
   """
   cos, sin = freqs
-  
+
   x_dtype = x.dtype
   needed_reshape = False
   original_shape = x.shape
-  
+
   if x.ndim != 4 and cos.ndim == 4:
-      b = x.shape[0]
-      h, s, r = cos.shape[1], cos.shape[2], cos.shape[3]
-      x = x.reshape(b, s, h, -1).transpose(0, 2, 1, 3)
-      needed_reshape = True
-      
+    b = x.shape[0]
+    h, s, r = cos.shape[1], cos.shape[2], cos.shape[3]
+    x = x.reshape(b, s, h, -1).transpose(0, 2, 1, 3)
+    needed_reshape = True
+
   last_dim = x.shape[-1]
   r = last_dim // 2
-  
+
   split_x = x.reshape(*x.shape[:-1], 2, r)
-  
+
   first_x = split_x[..., 0, :]
   second_x = split_x[..., 1, :]
-  
+
   cos_u = jnp.expand_dims(cos, axis=-2)
   sin_u = jnp.expand_dims(sin, axis=-2)
-  
+
   out = split_x * cos_u
-  
+
   out_first = out[..., 0, :] - second_x * sin_u.squeeze(-2)
   out_second = out[..., 1, :] + first_x * sin_u.squeeze(-2)
-  
+
   out = jnp.stack([out_first, out_second], axis=-2)
   out = out.reshape(*out.shape[:-2], last_dim)
-  
+
   if needed_reshape:
-      out = out.transpose(0, 2, 1, 3).reshape(original_shape)
-      
-  return out.astype(x_dtype)
+    out = out.transpose(0, 2, 1, 3).reshape(original_shape)
 
+  return out.astype(x_dtype)
 
 
 class LTX2RotaryPosEmbed(nnx.Module):
@@ -308,11 +307,10 @@ def __call__(self, coords: Array) -> Tuple[Array, Array]:
         cos_freq = jnp.concatenate([cos_padding, cos_freq], axis=-1)
         sin_freq = jnp.concatenate([sin_padding, sin_freq], axis=-1)
 
-      
       b = cos_freq.shape[0]
       s = cos_freq.shape[1]
       h = self.num_attention_heads
-      
+
       cos_freqs = cos_freq.reshape(b, s, h, -1).transpose(0, 2, 1, 3)
       sin_freqs = sin_freq.reshape(b, s, h, -1).transpose(0, 2, 1, 3)
 
@@ -352,8 +350,12 @@ def __init__(
     self.to_v = nnx.Linear(kv_dim, self.inner_dim, use_bias=bias, rngs=rngs, dtype=dtype)
 
     # 2. Normalization (Applied to full inner_dim, NOT per-head)
-    self.norm_q = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs)
-    self.norm_k = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs)
+    self.norm_q = nnx.RMSNorm(
+        self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs
+    )
+    self.norm_k = nnx.RMSNorm(
+        self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs
+    )
 
     # 3. Output
     self.to_out = nnx.Linear(self.inner_dim, query_dim, use_bias=out_bias, rngs=rngs, dtype=dtype)
@@ -397,23 +399,23 @@ def __call__(
     # 3. Apply RoPE
     if rotary_emb is not None:
       if hasattr(self, "rope_type") and self.rope_type == "split":
-         # Split RoPE: passing full freqs [B, H, S, D//2]
-         # apply_split_rotary_emb handles reshaping query/key
-         
-         query = apply_split_rotary_emb(query, rotary_emb)
-         
-         if k_rotary_emb is not None:
-             key = apply_split_rotary_emb(key, k_rotary_emb)
-         elif encoder_hidden_states is None:
-             key = apply_split_rotary_emb(key, rotary_emb)
-             
+        # Split RoPE: passing full freqs [B, H, S, D//2]
+        # apply_split_rotary_emb handles reshaping query/key
+
+        query = apply_split_rotary_emb(query, rotary_emb)
+
+        if k_rotary_emb is not None:
+          key = apply_split_rotary_emb(key, k_rotary_emb)
+        elif encoder_hidden_states is None:
+          key = apply_split_rotary_emb(key, rotary_emb)
+
       else:
-         # Interleaved (Default)
-         query = apply_rotary_emb(query, rotary_emb)
-         if k_rotary_emb is not None:
-           key = apply_rotary_emb(key, k_rotary_emb)
-         elif encoder_hidden_states is None:
-           key = apply_rotary_emb(key, rotary_emb)
+        # Interleaved (Default)
+        query = apply_rotary_emb(query, rotary_emb)
+        if k_rotary_emb is not None:
+          key = apply_rotary_emb(key, k_rotary_emb)
+        elif encoder_hidden_states is None:
+          key = apply_rotary_emb(key, rotary_emb)
 
     # 4. Attention
     # NNXAttentionOp expects flattened input [B, S, InnerDim] for flash kernel
diff --git a/src/maxdiffusion/models/ltx2/transformer_ltx2.py b/src/maxdiffusion/models/ltx2/transformer_ltx2.py
@@ -208,7 +208,12 @@ def __init__(
 
     # 3. Audio-to-Video (a2v) and Video-to-Audio (v2a) Cross-Attention
     self.audio_to_video_norm = nnx.RMSNorm(
-        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
+        dim,
+        epsilon=self.norm_eps,
+        use_scale=self.norm_elementwise_affine,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.audio_to_video_attn = LTX2Attention(
         rngs=rngs,
@@ -252,7 +257,12 @@ def __init__(
 
     # 4. Feed Forward
     self.norm3 = nnx.RMSNorm(
-        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
+        dim,
+        epsilon=self.norm_eps,
+        use_scale=self.norm_elementwise_affine,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.ff = NNXSimpleFeedForward(
         rngs=rngs,
diff --git a/src/maxdiffusion/tests/ltx2_parity_test.py b/src/maxdiffusion/tests/ltx2_parity_test.py
@@ -419,7 +419,7 @@ def test_import_parity_comparison(self):
           num_layers=1,
           mesh=self.mesh,
           attention_kernel="dot_product",
-          rope_type="interleaved"
+          rope_type="interleaved",
       )
 
     # 2. Convert Weights (PyTorch -> Flax NNX)
diff --git a/src/maxdiffusion/tests/ltx2_transformer_test.py b/src/maxdiffusion/tests/ltx2_transformer_test.py
@@ -112,7 +112,7 @@ def test_ltx2_rope_split(self):
     base_num_frames = 8
     base_height = 32
     base_width = 32
-    
+
     # Video RoPE Split
     rope = LTX2RotaryPosEmbed(
         dim=dim,
@@ -122,18 +122,17 @@ def test_ltx2_rope_split(self):
         base_height=base_height,
         base_width=base_width,
         modality="video",
-        rope_type="split"
+        rope_type="split",
     )
-    ids = jnp.ones((1, 3, 10)) # (B, Axes, S)
+    ids = jnp.ones((1, 3, 10))  # (B, Axes, S)
     cos, sin = rope(ids)
-    
+
     # Check output shape
     # Split RoPE returns [B, H, S, D//2]
     # dim=1024, heads=32 => head_dim=32 => D//2 = 16
     self.assertEqual(cos.shape, (1, 32, 10, 16))
     self.assertEqual(sin.shape, (1, 32, 10, 16))
 
-
   def test_ltx2_ada_layer_norm_single(self):
     """Tests LTX2AdaLayerNormSingle initialization and execution."""
     key = jax.random.key(0)

Original file line number	Diff line number	Diff line change
`@@ -419,7 +419,7 @@ def test_import_parity_comparison(self):`
`419`	`419`	`num_layers=1,`
`420`	`420`	`mesh=self.mesh,`
`421`	`421`	`attention_kernel="dot_product",`
`422`		`- rope_type="interleaved"`
	`422`	`+ rope_type="interleaved",`
`423`	`423`	`)`
`424`	`424`
`425`	`425`	`# 2. Convert Weights (PyTorch -> Flax NNX)`