debug added for shape mismatch

prishajain1 · prishajain1 · commit 3fb489a57998 · 2025-12-29T10:56:14.000+05:30
diff --git a/src/maxdiffusion/pipelines/wan/wan_pipeline.py b/src/maxdiffusion/pipelines/wan/wan_pipeline.py
@@ -602,6 +602,7 @@ def _prepare_model_inputs_i2v(
     if prompt is not None and isinstance(prompt, str):
         prompt = [prompt]
     batch_size = len(prompt) if prompt is not None else prompt_embeds.shape[0] // num_videos_per_prompt
+    print(f"[DEBUG PREP] num_prompts={batch_size}, num_videos_per_prompt={num_videos_per_prompt}")
     effective_batch_size = batch_size * num_videos_per_prompt
 
     # 1. Encode Prompts
@@ -613,6 +614,8 @@ def _prepare_model_inputs_i2v(
         prompt_embeds=prompt_embeds,
         negative_prompt_embeds=negative_prompt_embeds,
     )
+    print(f"[DEBUG PREP] prompt_embeds shape after encode_prompt: {prompt_embeds.shape}")
+
 
     # 2. Encode Image
     if image_embeds is None:
@@ -622,9 +625,11 @@ def _prepare_model_inputs_i2v(
         else:
             images_to_encode = [image, last_image]
         image_embeds = self.encode_image(images_to_encode, num_videos_per_prompt=num_videos_per_prompt)
+    print(f"[DEBUG PREP] image_embeds shape after encode_image: {image_embeds.shape}")
 
     if batch_size > 1:
         image_embeds = jnp.tile(image_embeds, (batch_size, 1, 1))
+    print(f"[DEBUG PREP] image_embeds shape after tile: {image_embeds.shape}")
     
     transformer_dtype = self.config.activations_dtype
     image_embeds = image_embeds.astype(transformer_dtype)
@@ -633,11 +638,21 @@ def _prepare_model_inputs_i2v(
       negative_prompt_embeds = negative_prompt_embeds.astype(transformer_dtype)
 
     data_sharding = NamedSharding(self.mesh, P(*self.config.data_sharding))
+    print(f"[DEBUG PREP] data_sharding spec: {self.config.data_sharding}")
 
     prompt_embeds = jax.device_put(prompt_embeds, data_sharding)
     negative_prompt_embeds = jax.device_put(negative_prompt_embeds, data_sharding)
     image_embeds = jax.device_put(image_embeds, data_sharding)
 
+    print(f"[DEBUG PREP] SHARDED prompt_embeds.shape: {prompt_embeds.shape}")
+    print(f"[DEBUG PREP] SHARDED image_embeds.shape: {image_embeds.shape}")
+    print(f"[DEBUG PREP] jax.process_index(): {jax.process_index()}")
+
+    if image_embeds.addressable_shards:
+        print(f"[DEBUG PREP] LOCAL image_embeds shape: {image_embeds.addressable_shards[0].data.shape}")
+    if prompt_embeds.addressable_shards:
+        print(f"[DEBUG PREP] LOCAL prompt_embeds shape: {prompt_embeds.addressable_shards[0].data.shape}")
+
     return prompt_embeds, negative_prompt_embeds, image_embeds, effective_batch_size
 
 
diff --git a/src/maxdiffusion/pipelines/wan/wan_pipeline_i2v_2p1.py b/src/maxdiffusion/pipelines/wan/wan_pipeline_i2v_2p1.py
@@ -267,6 +267,12 @@ def loop_body(step, vals):
     rng, timestep_rng = jax.random.split(rng)
     t = jnp.array(scheduler_state.timesteps, dtype=jnp.int32)[step]
 
+    print(f"[DEBUG LOOP {step}] on process {jax.process_index()}:")
+    print(f"[DEBUG LOOP {step}]   initial latents local shape: {latents.shape}")
+    print(f"[DEBUG LOOP {step}]   initial prompt_embeds local shape: {prompt_embeds.shape}")
+    print(f"[DEBUG LOOP {step}]   initial image_embeds local shape: {image_embeds.shape}")
+
+
     latents_input = latents
     if do_classifier_free_guidance:
         latents_input = jnp.concatenate([latents, latents], axis=0)
@@ -281,7 +287,8 @@ def loop_body(step, vals):
         prompt_embeds_input = jnp.concatenate([prompt_embeds, negative_prompt_embeds], axis=0)
         if image_embeds is not None:
              image_embeds_input = jnp.concatenate([image_embeds, image_embeds], axis=0)
-
+    print(f"[DEBUG LOOP {step}]   prompt_embeds_input local shape: {prompt_embeds_input.shape}")
+    print(f"[DEBUG LOOP {step}]   image_embeds_input local shape: {image_embeds_input.shape}")
 
     noise_pred, latents = transformer_forward_pass(
         graphdef, sharded_state, rest_of_state,