later

Serenagu525 · Serenagu525 · commit 4bcffd11b5b5 · 2025-07-11T18:01:04.000Z
diff --git a/src/maxdiffusion/models/ltx_video/transformers/transformer3d.py b/src/maxdiffusion/models/ltx_video/transformers/transformer3d.py
@@ -153,8 +153,7 @@ def scale_shift_table_init(key):
                 weight_dtype=self.weight_dtype,
                 matmul_precision=self.matmul_precision,
             )
-    def init_weights(self, key, in_channels, caption_channels, eval_only=True):
-        import pdb; pdb.set_trace()
+    def init_weights(self, in_channels, key, caption_channels, eval_only=True):
         example_inputs = {}
         batch_size, num_tokens = 4, 256
         input_shapes = {
@@ -169,16 +168,15 @@ def init_weights(self, key, in_channels, caption_channels, eval_only=True):
             example_inputs[name] = jnp.ones(
                 shape, dtype=jnp.float32 if name not in ["attention_mask", "encoder_attention_mask"] else jnp.bool
             )
-    
+
         if eval_only:
             return jax.eval_shape(
                 self.init,
-                key, ##need to change?
+                key,
                 **example_inputs,
             )["params"]
         else:
-            return self.init(key, **example_inputs)['params']
-    
+            return self.init(key, **example_inputs)["params"]
     def create_skip_layer_mask(
         self,
         batch_size: int,
diff --git a/src/maxdiffusion/models/ltx_video/xora_v1.2-13B-balanced-128.json b/src/maxdiffusion/models/ltx_video/xora_v1.2-13B-balanced-128.json
@@ -1,5 +1,5 @@
 {
-    "ckpt_path": "",
+    "ckpt_path": "/mnt/disks/diffusionproj/jax_weights",
     "activation_fn": "gelu-approximate",
     "attention_bias": true,
     "attention_head_dim": 128,
diff --git a/src/maxdiffusion/pipelines/ltx_video/ltx_video_pipeline.py b/src/maxdiffusion/pipelines/ltx_video/ltx_video_pipeline.py
@@ -226,12 +226,9 @@ def load_transformer(cls, config):
             **model_config, dtype=jnp.float32, gradient_checkpointing="matmul_without_batch", sharding_mesh=mesh)
         
         weights_init_fn = functools.partial(
-            transformer.init_weights,
-            jax.random.PRNGKey(42),
-            in_channels,
-            model_config['caption_channels'],
-            eval_only=True
+            transformer.init_weights, in_channels, jax.random.PRNGKey(42), model_config["caption_channels"], eval_only=True
         )
+        
         absolute_ckpt_path = os.path.abspath(relative_ckpt_path)
 
         checkpoint_manager = ocp.CheckpointManager(absolute_ckpt_path)
@@ -240,7 +237,7 @@ def load_transformer(cls, config):
             tx=None,
             config=config,
             mesh=mesh,
-            weights_init_fn=None,
+            weights_init_fn=weights_init_fn,
             checkpoint_manager=checkpoint_manager,
             checkpoint_item=" ",
             model_params=None,

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`		`- "ckpt_path": "",`
	`2`	`+ "ckpt_path": "/mnt/disks/diffusionproj/jax_weights",`
`3`	`3`	`"activation_fn": "gelu-approximate",`
`4`	`4`	`"attention_bias": true,`
`5`	`5`	`"attention_head_dim": 128,`