debug for tiled decode+ audio

prishajain1 · prishajain1 · commit d1f1e7cfeb68 · 2026-04-11T08:12:29.000+05:30
diff --git a/src/maxdiffusion/models/ltx2/autoencoder_kl_ltx2.py b/src/maxdiffusion/models/ltx2/autoencoder_kl_ltx2.py
@@ -1277,6 +1277,7 @@ def enable_tiling(
 
   def blend_v(self, a: jax.Array, b: jax.Array, blend_extent: int) -> jax.Array:
     blend_extent = min(a.shape[2], b.shape[2], blend_extent)
+    print(f"DEBUG: blend_v called with a.shape={a.shape}, b.shape={b.shape}, blend_extent={blend_extent}")
     if blend_extent <= 0:
       return b
 
@@ -1289,6 +1290,7 @@ def blend_v(self, a: jax.Array, b: jax.Array, blend_extent: int) -> jax.Array:
 
   def blend_h(self, a: jax.Array, b: jax.Array, blend_extent: int) -> jax.Array:
     blend_extent = min(a.shape[3], b.shape[3], blend_extent)
+    print(f"DEBUG: blend_h called with a.shape={a.shape}, b.shape={b.shape}, blend_extent={blend_extent}")
     if blend_extent <= 0:
       return b
 
diff --git a/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py b/src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py
@@ -469,7 +469,6 @@ def create_model(rngs: nnx.Rngs, config: HyperParameters):
             mesh=mesh,
             **vae_kwargs,
         )
-      vae.tile_sample_min_width = 1024
       return vae
 
     p_model_factory = partial(create_model, config=config)
@@ -1717,6 +1716,7 @@ def convert_to_vel(lat, x0):
       latents = (1 - decode_noise_scale) * latents + decode_noise_scale * noise
 
       latents = latents.astype(self.vae.dtype)
+      print(f"DEBUG: latents shape before VAE decode: {latents.shape}")
       video = self.vae.decode(latents, temb=timestep, return_dict=False)[0]
     else:
       latents = latents.astype(self.vae.dtype)
@@ -1742,6 +1742,9 @@ def convert_to_vel(lat, x0):
     # Convert audio to numpy
     audio = np.array(audio)
     print(f"DEBUG: final audio shape: {audio.shape}")
+    print(f"DEBUG: audio min: {audio.min()}")
+    print(f"DEBUG: audio max: {audio.max()}")
+    print(f"DEBUG: audio mean: {audio.mean()}")
 
     return LTX2PipelineOutput(frames=video, audio=audio)