changed the weight initialization for to_q, to_k, and to_v from (embed, heads) to (None, heads)

prishajain1 · prishajain1 · commit 691863b375e9 · 2026-04-16T22:18:08.000+05:30
diff --git a/src/maxdiffusion/models/ltx2/attention_ltx2.py b/src/maxdiffusion/models/ltx2/attention_ltx2.py
@@ -358,7 +358,7 @@ def __init__(
 
     # 1. Define Partitioned Initializers (Logical Axes)
     # Q, K, V kernels: [in_features (embed), out_features (heads)]
-    qkv_kernel_init = nnx.with_partitioning(nnx.initializers.lecun_normal(), ("embed", "heads"))
+    qkv_kernel_init = nnx.with_partitioning(nnx.initializers.lecun_normal(), (None, "heads"))
     # Q, K, V biases: [out_features (heads)]
     qkv_bias_init = nnx.with_partitioning(nnx.initializers.zeros_init(), ("heads",))