Skip to content

Commit fd7fa20

Browse files
committed
debug for other components
1 parent 184edb8 commit fd7fa20

1 file changed

Lines changed: 10 additions & 0 deletions

File tree

src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,8 @@ def __call__(
10931093
)
10941094

10951095
# 2. Encode inputs (Text)
1096+
import time
1097+
s_text = time.perf_counter()
10961098
prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask = self.encode_prompt(
10971099
prompt,
10981100
negative_prompt,
@@ -1105,6 +1107,8 @@ def __call__(
11051107
max_sequence_length=max_sequence_length,
11061108
dtype=dtype,
11071109
)
1110+
t_text = time.perf_counter() - s_text
1111+
max_logging.log(f"[Tuning] Prompt encoding took: {t_text:.4f} seconds")
11081112

11091113
# 3. Prepare latents
11101114
batch_size = prompt_embeds[0].shape[0] if isinstance(prompt_embeds, list) else prompt_embeds.shape[0]
@@ -1326,6 +1330,7 @@ def run_connectors(graphdef, state, hidden_states, attention_mask):
13261330
if output_type == "latent":
13271331
return LTX2PipelineOutput(frames=latents, audio=audio_latents)
13281332

1333+
s_vae = time.perf_counter()
13291334
if getattr(self.vae.config, "timestep_conditioning", False):
13301335
noise = jax.random.normal(generator, latents.shape, dtype=latents.dtype)
13311336

@@ -1346,6 +1351,8 @@ def run_connectors(graphdef, state, hidden_states, attention_mask):
13461351
else:
13471352
latents = latents.astype(self.vae.dtype)
13481353
video = self.vae.decode(latents, return_dict=False)[0]
1354+
t_vae = time.perf_counter() - s_vae
1355+
max_logging.log(f"[Tuning] VAE decoding took: {t_vae:.4f} seconds")
13491356
# Post-process video (converts to numpy/PIL)
13501357
# VAE outputs (B, T, H, W, C), but video processor expects (B, C, T, H, W)
13511358
video_np = np.array(video).transpose(0, 4, 1, 2, 3)
@@ -1357,7 +1364,10 @@ def run_connectors(graphdef, state, hidden_states, attention_mask):
13571364

13581365
# Audio VAE outputs (B, T, F, C), Vocoder expects (B, Channels, Time, MelBins)
13591366
generated_mel_spectrograms = generated_mel_spectrograms.transpose(0, 3, 1, 2)
1367+
s_vocoder = time.perf_counter()
13601368
audio = self.vocoder(generated_mel_spectrograms)
1369+
t_vocoder = time.perf_counter() - s_vocoder
1370+
max_logging.log(f"[Tuning] Vocoder took: {t_vocoder:.4f} seconds")
13611371

13621372
# Convert audio to numpy
13631373
audio = np.array(audio)

0 commit comments

Comments
 (0)