reformatted

prishajain1 · prishajain1 · commit b595666befd1 · 2026-04-08T03:16:28.000Z
diff --git a/src/maxdiffusion/models/ltx2/attention_ltx2.py b/src/maxdiffusion/models/ltx2/attention_ltx2.py
@@ -455,7 +455,7 @@ def __call__(
       query = self.to_q(hidden_states)
       key = self.to_k(context)
       value = self.to_v(context)
-    
+
     with jax.named_scope("QKV Norm"):
       query = self.norm_q(query)
       key = self.norm_k(key)
diff --git a/src/maxdiffusion/models/ltx2/transformer_ltx2.py b/src/maxdiffusion/models/ltx2/transformer_ltx2.py
@@ -1026,24 +1026,24 @@ def scan_fn(carry, block):
         )(carry, self.transformer_blocks)
       else:
         for block in self.transformer_blocks:
-            hidden_states, audio_hidden_states = block(
-                hidden_states=hidden_states,
-                audio_hidden_states=audio_hidden_states,
-                encoder_hidden_states=encoder_hidden_states,
-                audio_encoder_hidden_states=audio_encoder_hidden_states,
-                temb=temb,
-                temb_audio=temb_audio,
-                temb_ca_scale_shift=video_cross_attn_scale_shift,
-                temb_ca_audio_scale_shift=audio_cross_attn_scale_shift,
-                temb_ca_gate=video_cross_attn_a2v_gate,
-                temb_ca_audio_gate=audio_cross_attn_v2a_gate,
-                video_rotary_emb=video_rotary_emb,
-                audio_rotary_emb=audio_rotary_emb,
-                ca_video_rotary_emb=video_cross_attn_rotary_emb,
-                ca_audio_rotary_emb=audio_cross_attn_rotary_emb,
-                encoder_attention_mask=encoder_attention_mask,
-                audio_encoder_attention_mask=audio_encoder_attention_mask,
-            )
+          hidden_states, audio_hidden_states = block(
+              hidden_states=hidden_states,
+              audio_hidden_states=audio_hidden_states,
+              encoder_hidden_states=encoder_hidden_states,
+              audio_encoder_hidden_states=audio_encoder_hidden_states,
+              temb=temb,
+              temb_audio=temb_audio,
+              temb_ca_scale_shift=video_cross_attn_scale_shift,
+              temb_ca_audio_scale_shift=audio_cross_attn_scale_shift,
+              temb_ca_gate=video_cross_attn_a2v_gate,
+              temb_ca_audio_gate=audio_cross_attn_v2a_gate,
+              video_rotary_emb=video_rotary_emb,
+              audio_rotary_emb=audio_rotary_emb,
+              ca_video_rotary_emb=video_cross_attn_rotary_emb,
+              ca_audio_rotary_emb=audio_cross_attn_rotary_emb,
+              encoder_attention_mask=encoder_attention_mask,
+              audio_encoder_attention_mask=audio_encoder_attention_mask,
+          )
 
     # 6. Output layers
     with jax.named_scope("Output Projection & Norm"):
diff --git a/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py b/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py
@@ -1233,10 +1233,11 @@ def run_connectors(graphdef, state, hidden_states, attention_mask):
       )
 
       import time
+
       timesteps_jax = jnp.array(timesteps, dtype=jnp.float32)
       for i, t_val in enumerate(timesteps):
         t = timesteps_jax[i]
-        
+
         # Isolate input sharding to scan_layers=False to avoid affecting the standard path
         latents_jax_sharded = latents_jax
         audio_latents_jax_sharded = audio_latents_jax
@@ -1340,12 +1341,11 @@ def run_connectors(graphdef, state, hidden_states, attention_mask):
       mesh = latents.sharding.mesh
       replicated_sharding = NamedSharding(mesh, P())
       latents = jax.lax.with_sharding_constraint(latents, replicated_sharding)
-      
+
       # Replicate VAE weights
       graphdef, state = nnx.split(self.vae)
       state = jax.tree_util.tree_map(
-          lambda x: jax.lax.with_sharding_constraint(x, replicated_sharding) if isinstance(x, jax.Array) else x, 
-          state
+          lambda x: jax.lax.with_sharding_constraint(x, replicated_sharding) if isinstance(x, jax.Array) else x, state
       )
       self.vae = nnx.merge(graphdef, state)
     except Exception as e: