clean up, ensuring layers are in fp32

prishajain1 · prishajain1 · commit 2d55e8745aa9 · 2026-02-11T18:20:15.000+05:30
diff --git a/src/maxdiffusion/models/ltx2/attention_ltx2.py b/src/maxdiffusion/models/ltx2/attention_ltx2.py
@@ -398,8 +398,8 @@ def __init__(
     self.to_v = nnx.Linear(kv_dim, self.inner_dim, use_bias=bias, rngs=rngs, dtype=dtype)
 
     # 2. Normalization (Applied to full inner_dim, NOT per-head)
-    self.norm_q = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=dtype, use_scale=True, rngs=rngs)
-    self.norm_k = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=dtype, use_scale=True, rngs=rngs)
+    self.norm_q = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs)
+    self.norm_k = nnx.RMSNorm(self.inner_dim, epsilon=eps, dtype=jnp.float32, param_dtype=jnp.float32, use_scale=True, rngs=rngs)
 
     # 3. Output
     self.to_out = nnx.Linear(self.inner_dim, query_dim, use_bias=out_bias, rngs=rngs, dtype=dtype)
diff --git a/src/maxdiffusion/models/ltx2/transformer_ltx2.py b/src/maxdiffusion/models/ltx2/transformer_ltx2.py
@@ -1,6 +1,17 @@
 """
-This is a test file used for ensuring numerical parity between pytorch and jax implementation of LTX2.
-This is to be ignored and will not be pushed when commiting to main branch.
+Copyright 2025 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """
 from typing import Optional, Tuple, Any, Dict, Union
 import jax
@@ -105,8 +116,9 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.attn1 = LTX2Attention(
         rngs=rngs,
@@ -128,8 +140,9 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.audio_attn1 = LTX2Attention(
         rngs=rngs,
@@ -152,8 +165,9 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.attn2 = LTX2Attention(
         rngs=rngs,
@@ -176,8 +190,9 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.audio_attn2 = LTX2Attention(
         rngs=rngs,
@@ -197,7 +212,7 @@ def __init__(
 
     # 3. Audio-to-Video (a2v) and Video-to-Audio (v2a) Cross-Attention
     self.audio_to_video_norm = nnx.RMSNorm(
-        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=dtype, param_dtype=weights_dtype
+        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
     )
     self.audio_to_video_attn = LTX2Attention(
         rngs=rngs,
@@ -220,8 +235,9 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        rngs=rngs,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.video_to_audio_attn = LTX2Attention(
         rngs=rngs,
@@ -241,7 +257,7 @@ def __init__(
 
     # 4. Feed Forward
     self.norm3 = nnx.RMSNorm(
-        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=dtype, param_dtype=weights_dtype
+        dim, epsilon=self.norm_eps, use_scale=self.norm_elementwise_affine, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
     )
     self.ff = NNXSimpleFeedForward(
         rngs=rngs,
@@ -257,8 +273,8 @@ def __init__(
         epsilon=self.norm_eps,
         use_scale=self.norm_elementwise_affine,
         rngs=rngs,
-        dtype=dtype,
-        param_dtype=weights_dtype,
+        dtype=jnp.float32,
+        param_dtype=jnp.float32,
     )
     self.audio_ff = NNXSimpleFeedForward(
         rngs=rngs, dim=audio_dim, dim_out=audio_dim, activation_fn=activation_fn, dtype=dtype, weights_dtype=weights_dtype
@@ -776,7 +792,7 @@ def init_block(rngs):
     # 6. Output layers
     self.gradient_checkpoint = GradientCheckpointType.from_str(remat_policy)
     self.norm_out = nnx.LayerNorm(
-        inner_dim, epsilon=1e-6, use_scale=False, rngs=rngs, dtype=self.dtype, param_dtype=self.weights_dtype
+        inner_dim, epsilon=1e-6, use_scale=False, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
     )
     self.proj_out = nnx.Linear(
         inner_dim,
@@ -789,7 +805,7 @@ def init_block(rngs):
     )
 
     self.audio_norm_out = nnx.LayerNorm(
-        audio_inner_dim, epsilon=1e-6, use_scale=False, rngs=rngs, dtype=self.dtype, param_dtype=self.weights_dtype
+        audio_inner_dim, epsilon=1e-6, use_scale=False, rngs=rngs, dtype=jnp.float32, param_dtype=jnp.float32
     )
     self.audio_proj_out = nnx.Linear(
         audio_inner_dim,
diff --git a/src/maxdiffusion/tests/ltx2_parity_test.py b/src/maxdiffusion/tests/ltx2_parity_test.py
@@ -1,3 +1,7 @@
+"""
+This is a test file used for ensuring numerical parity between pytorch and jax implementation of LTX2.
+This is to be ignored and will not be pushed when commiting to main branch.
+"""
 import unittest
 import jax
 import jax.numpy as jnp