weight dtype fix in pipeline

prishajain1 · prishajain1 · commit 76dec0ed05cb · 2026-03-06T23:10:42.000+05:30
diff --git a/src/maxdiffusion/models/ltx2/transformer_ltx2.py b/src/maxdiffusion/models/ltx2/transformer_ltx2.py
@@ -20,20 +20,7 @@
 import flax.linen as nn
 import numpy as np
 
-printed_count = 0
-def print_shape(name, tensor):
-    global printed_count
-    if printed_count > 1000:
-        return
-    if tensor is not None:
-        def _print_fn(n, t):
-            t_np = np.array(t, dtype=np.float32)
-            print(f"[{n}] min: {t_np.min():.5f}, max: {t_np.max():.5f}, mean: {t_np.mean():.5f}, std: {t_np.std():.5f}")
-        if isinstance(tensor, jax.core.Tracer):
-            jax.debug.callback(_print_fn, name, tensor)
-        else:
-            _print_fn(name, tensor)
-        printed_count += 1
+
 
 from maxdiffusion.models.ltx2.attention_ltx2 import LTX2Attention, LTX2RotaryPosEmbed
 from maxdiffusion.models.attention_flax import NNXSimpleFeedForward
@@ -358,16 +345,7 @@ def __call__(
   ) -> Tuple[jax.Array, jax.Array]:
     batch_size = hidden_states.shape[0]
 
-    print_shape("Block Input hidden_states", hidden_states)
-    print_shape("Block Input audio_hidden_states", audio_hidden_states)
-    print_shape("Block Input encoder_hidden_states", encoder_hidden_states)
-    print_shape("Block Input audio_encoder_hidden_states", audio_encoder_hidden_states)
-    print_shape("Block Input temb", temb)
-    print_shape("Block Input temb_audio", temb_audio)
-    print_shape("Block Input temb_ca_scale_shift", temb_ca_scale_shift)
-    print_shape("Block Input temb_ca_audio_scale_shift", temb_ca_audio_scale_shift)
-    print_shape("Block Input temb_ca_gate", temb_ca_gate)
-    print_shape("Block Input temb_ca_audio_gate", temb_ca_audio_gate)
+
 
     axis_names = nn.logical_to_mesh_axes(("activation_batch", "activation_length", "activation_embed"))
     hidden_states = jax.lax.with_sharding_constraint(hidden_states, axis_names)
@@ -397,12 +375,8 @@ def __call__(
     scale_mlp = ada_values[:, :, 4, :]
     gate_mlp = ada_values[:, :, 5, :]
 
-    print_shape("shift_msa", shift_msa)
-    print_shape("scale_msa", scale_msa)
-    print_shape("gate_msa", gate_msa)
-    print_shape("shift_mlp", shift_mlp)
-    print_shape("scale_mlp", scale_mlp)
-    print_shape("gate_mlp", gate_mlp)
+
+
 
     norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa
 
@@ -923,11 +897,7 @@ def __call__(
         audio_encoder_attention_mask = jnp.expand_dims(audio_encoder_attention_mask, axis=1)
 
     batch_size = hidden_states.shape[0]
-    print_shape("Model Input hidden_states", hidden_states)
-    print_shape("Model Input audio_hidden_states", audio_hidden_states)
-    print_shape("Model Input encoder_hidden_states", encoder_hidden_states)
-    print_shape("Model Input audio_encoder_hidden_states", audio_encoder_hidden_states)
-    print_shape("Model Input timestep", timestep)
+
 
     # 1. Prepare RoPE positional embeddings
     if video_coords is None:
diff --git a/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py b/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py
@@ -303,7 +303,7 @@ def create_model(rngs: nnx.Rngs, config: HyperParameters):
               subfolder="connectors",
               rngs=rngs,
               mesh=mesh,
-              dtype=jnp.float32,
+              dtype=config.weights_dtype if hasattr(config, "weights_dtype") else jnp.float32,
               weights_dtype=config.weights_dtype if hasattr(config, "weights_dtype") else jnp.float32,
           )
           return connectors

Original file line number	Diff line number	Diff line change
`@@ -303,7 +303,7 @@ def create_model(rngs: nnx.Rngs, config: HyperParameters):`
`303`	`303`	`subfolder="connectors",`
`304`	`304`	`rngs=rngs,`
`305`	`305`	`mesh=mesh,`
`306`		`- dtype=jnp.float32,`
	`306`	`+ dtype=config.weights_dtype if hasattr(config, "weights_dtype") else jnp.float32,`
`307`	`307`	`weights_dtype=config.weights_dtype if hasattr(config, "weights_dtype") else jnp.float32,`
`308`	`308`	`)`
`309`	`309`	`return connectors`