Skip to content

Commit 07bbd6b

Browse files
committed
fix
1 parent 6422e93 commit 07bbd6b

2 files changed

Lines changed: 7 additions & 8 deletions

File tree

src/maxdiffusion/models/ltx2/ltx2_utils.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,12 @@ def rename_for_ltx2_transformer(key):
3030
key = key.replace("patchify_proj", "proj_in")
3131
key = key.replace("audio_patchify_proj", "audio_proj_in")
3232

33-
if "caption_projection" in key:
34-
key = key.replace("caption_projection", "audio_caption_projection")
33+
# if "caption_projection" in key:
34+
# key = key.replace("caption_projection", "audio_caption_projection")
3535

3636
# Handle audio_ff.net_0.proj -> audio_ff.net_0
3737
if "audio_ff" in key and "proj" in key:
3838
key = key.replace(".proj", "")
39-
40-
# This line was redundant, keeping it as a no-op or removing it is fine.
41-
# The instruction implies it should be `return key` at the end.
42-
key = key.replace("transformer_blocks", "transformer_blocks")
4339

4440
# Handle to_out.0 -> to_out for LTX2Attention
4541
if "to_out.0" in key:
@@ -241,6 +237,9 @@ def load_vae_weights(
241237
pt_list.append(str(idx))
242238
else:
243239
pt_list.append(part)
240+
elif part in ["conv1", "conv2", "conv_in", "conv_out", "conv_shortcut", "conv"]:
241+
pt_list.append(part)
242+
pt_list.append("conv")
244243
else:
245244
pt_list.append(part)
246245

src/maxdiffusion/tests/test_ltx2_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ def test_load_transformer_weights(self):
5757
patch_size_t=self.config.patch_size_t,
5858
num_attention_heads=self.config.num_attention_heads,
5959
attention_head_dim=self.config.attention_head_dim,
60-
cross_attention_dim=self.config.cross_attention_dim,
60+
cross_attention_dim=4096, # T5-XXL uses 4096
6161
audio_in_channels=self.config.audio_in_channels,
6262
audio_out_channels=self.config.audio_out_channels,
6363
audio_patch_size=self.config.audio_patch_size,
6464
audio_patch_size_t=self.config.audio_patch_size_t,
6565
audio_num_attention_heads=self.config.audio_num_attention_heads,
66-
audio_attention_head_dim=128, # Match Config/Checkpoint
66+
audio_attention_head_dim=64, # Match Checkpoint (2048 / 32)
6767
audio_cross_attention_dim=self.config.audio_cross_attention_dim,
6868
num_layers=self.config.num_layers,
6969
scan_layers=True,

0 commit comments

Comments
 (0)