Skip to content

Commit 753ae2c

Browse files
committed
vocoder+ rope type fix
1 parent c2d9473 commit 753ae2c

1 file changed

Lines changed: 4 additions & 1 deletion

File tree

src/maxdiffusion/pipelines/ltx2/ltx2_pipeline.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ def create_model(rngs: nnx.Rngs, ltx2_config: dict):
132132
else:
133133
ltx2_config = LTX2VideoTransformer3DModel.load_config(config.pretrained_model_name_or_path, subfolder=subfolder)
134134

135+
# Align RoPE type with connectors
136+
ltx2_config["rope_type"] = "split"
137+
135138
if ltx2_config.get("activation_fn") == "gelu-approximate":
136139
ltx2_config["activation_fn"] = "gelu"
137140

@@ -1725,7 +1728,7 @@ def convert_to_vel(lat, x0):
17251728
generated_mel_spectrograms = self.audio_vae.decode(audio_latents, return_dict=False)[0]
17261729

17271730
# Audio VAE outputs (B, T, F, C), Vocoder expects (B, Channels, Time, MelBins)
1728-
generated_mel_spectrograms = generated_mel_spectrograms.transpose(0, 3, 1, 2)
1731+
generated_mel_spectrograms = generated_mel_spectrograms.transpose(0, 3, 2, 1)
17291732
audio = self.vocoder(generated_mel_spectrograms)
17301733

17311734
# Convert audio to numpy

0 commit comments

Comments
 (0)