transformer weight

prishajain1 · prishajain1 · commit b9ac9eba02ac · 2026-04-09T14:45:27.000+05:30
diff --git a/src/maxdiffusion/models/ltx2/ltx2_utils.py b/src/maxdiffusion/models/ltx2/ltx2_utils.py
@@ -98,6 +98,7 @@ def rename_for_ltx2_transformer(key):
   # Add missing mappings
   key = key.replace("av_ca_video_scale_shift_adaln_single", "av_cross_attn_video_scale_shift")
   key = key.replace("av_ca_a2v_gate_adaln_single", "av_cross_attn_video_a2v_gate")
+  key = key.replace("adaln_single", "time_embed")
   key = key.replace("av_ca_audio_scale_shift_adaln_single", "av_cross_attn_audio_scale_shift")
   key = key.replace("av_ca_v2a_gate_adaln_single", "av_cross_attn_audio_v2a_gate")
   key = key.replace("scale_shift_table_a2v_ca_video", "video_a2v_cross_attn_scale_shift_table")
diff --git a/src/maxdiffusion/models/ltx2/transformer_ltx2.py b/src/maxdiffusion/models/ltx2/transformer_ltx2.py
@@ -612,6 +612,8 @@ def __init__(
       qk_norm: str = "rms_norm_across_heads",
       flash_block_sizes: BlockSizes = None,
       flash_min_seq_length: int = 4096,
+      gated_attn: bool = False,
+      cross_attn_mod: bool = False,
       **kwargs,
   ):
     self.in_channels = in_channels
@@ -658,6 +660,8 @@ def __init__(
     self.names_which_can_be_offloaded = names_which_can_be_offloaded
     self.scan_layers = scan_layers
     self.attention_kernel = attention_kernel
+    self.gated_attn = gated_attn
+    self.cross_attn_mod = cross_attn_mod
     self.a2v_attention_kernel = a2v_attention_kernel
     self.v2a_attention_kernel = v2a_attention_kernel
     self.flash_min_seq_length = flash_min_seq_length
@@ -845,6 +849,8 @@ def init_block(rngs):
           norm_elementwise_affine=self.norm_elementwise_affine,
           norm_eps=self.norm_eps,
           rope_type=self.rope_type,
+          gated_attn=self.gated_attn,
+          cross_attn_mod=self.cross_attn_mod,
           dtype=self.dtype,
           weights_dtype=self.weights_dtype,
           mesh=self.mesh,