AI-Hypercomputer
diff --git a/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 8 additions & 4 deletions b/‎src/maxdiffusion/checkpointing/ltx2_checkpointer.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/maxdiffusion/compare.py‎
Lines changed: 75 additions & 0 deletions b/‎src/maxdiffusion/compare.py‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 10 additions & 1 deletion b/‎src/maxdiffusion/configs/ltx2_video.yml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎src/maxdiffusion/generate_ltx2.py‎
Lines changed: 13 additions & 5 deletions b/‎src/maxdiffusion/generate_ltx2.py‎
Lines changed: 13 additions & 5 deletions
@@ -79,19 +79,23 @@ def load_ltx2_configs_from_orbax(self, step: Optional[int]) -> Tuple[Optional[di
     return restored_checkpoint, step
 
   def load_checkpoint(
-      self, step=None, vae_only=False, load_transformer=True
+      self, step=None, vae_only=False, load_transformer=True, load_upsampler=False
   ) -> Tuple[LTX2Pipeline, Optional[dict], Optional[int]]:
     restored_checkpoint, step = self.load_ltx2_configs_from_orbax(step)
     opt_state = None
 
     if restored_checkpoint:
       max_logging.log("Loading LTX2 pipeline from checkpoint")
-      pipeline = LTX2Pipeline.from_checkpoint(self.config, restored_checkpoint, vae_only, load_transformer)
+      pipeline = LTX2Pipeline.from_checkpoint(
+          self.config, restored_checkpoint, vae_only, load_transformer, load_upsampler
+      )
       if "opt_state" in restored_checkpoint.ltx2_state.keys():
         opt_state = restored_checkpoint.ltx2_state["opt_state"]
     else:
       max_logging.log("No checkpoint found, loading pipeline from pretrained hub")
-      pipeline = LTX2Pipeline.from_pretrained(self.config, vae_only, load_transformer)
+      pipeline = LTX2Pipeline.from_pretrained(
+          self.config, vae_only, load_transformer, load_upsampler
+      )
 
     return pipeline, opt_state, step
 
@@ -110,4 +114,4 @@ def config_to_json(model_or_config):
 
     # Save the checkpoint
     self.checkpoint_manager.save(train_step, args=ocp.args.Composite(**items))
-    max_logging.log(f"Checkpoint for step {train_step} saved.")
+    max_logging.log(f"Checkpoint for step {train_step} saved.")
@@ -0,0 +1,75 @@
+import torch
+import jax
+import jax.numpy as jnp
+import numpy as np
+
+# 1. ALIAS THE IMPORTS to prevent name collisions!
+from diffusers.pipelines.ltx2.latent_upsampler import LTX2LatentUpsamplerModel as PT_Upsampler
+from maxdiffusion.models.ltx2.latent_upsampler_ltx2 import LTX2LatentUpsamplerModel as JAX_Upsampler
+from maxdiffusion.models.ltx2.ltx2_utils import load_upsampler_weights 
+
+def test_side_by_side():
+    # --- Setup PyTorch ---
+    print("Initializing PyTorch Model...")
+    # Load the real pretrained weights
+    pt_model = PT_Upsampler.from_pretrained("Lightricks/LTX-2", subfolder="latent_upsampler")
+    pt_model.eval()
+
+    # --- Setup JAX ---
+    print("Initializing JAX Model...")
+    jax_model = JAX_Upsampler()
+    
+    print("Loading JAX Weights from HuggingFace...")
+    # Use your actual conversion script to load the exact same weights
+    flax_params = load_upsampler_weights(
+        pretrained_model_name_or_path="Lightricks/LTX-2",
+        eval_shapes=None,
+        device="cpu", # Load into CPU for comparison
+        subfolder="latent_upsampler"
+    ) 
+
+    # for key, value in jax.tree_util.tree_flatten(flax_params)[0]:
+    #   if hasattr(value, 'dtype'):
+    #     print(f"{key}: {value.dtype}, shape: {value.shape}")
+
+    # --- Generate Identical Dummy Data ---
+    # Shape: Batch=1, Channels=128, Frames=8, Height=32, Width=32
+    print("Generating identical random inputs...")
+    torch.manual_seed(42)
+    pt_input = torch.randn(1, 128, 8, 32, 32, dtype=torch.float32)
+    
+    # Convert PyTorch NCDHW -> JAX NDHWC
+    # (0, 2, 3, 4, 1) maps (B, C, F, H, W) -> (B, F, H, W, C)
+    jax_input_np = pt_input.permute(0, 2, 3, 4, 1).numpy()
+    jax_input = jnp.array(jax_input_np)
+
+    # --- Run Forward Passes ---
+    print("Running PyTorch pass...")
+    with torch.no_grad():
+        pt_output = pt_model(pt_input)
+    
+    print("Running JAX pass...")
+    jax_output = jax_model.apply({'params': flax_params}, jax_input)
+
+    # --- Compare Results ---
+    # Convert JAX output back to PyTorch shape: NDHWC -> NCDHW
+    # (0, 4, 1, 2, 3) maps (B, F, H, W, C) -> (B, C, F, H, W)
+    jax_output_converted = torch.tensor(np.array(jax_output)).permute(0, 4, 1, 2, 3)
+
+    # Calculate Mean Squared Error (MSE) and Max Absolute Difference
+    mse = torch.nn.functional.mse_loss(pt_output, jax_output_converted)
+    max_diff = (pt_output - jax_output_converted).abs().max()
+
+    print("\n" + "="*30)
+    print("      COMPARISON RESULTS      ")
+    print("="*30)
+    print(f"Mean Squared Error: {mse.item():.8f}")
+    print(f"Max Absolute Error: {max_diff.item():.8f}")
+    
+    if max_diff.item() < 1e-3:
+        print("\n✅ SUCCESS: The models are mathematically identical!")
+    else:
+        print("\n❌ FAILED: The models diverge. There is a bug in the math/weights.")
+
+if __name__ == "__main__":
+    test_side_by_side()
@@ -9,7 +9,7 @@ names_which_can_be_saved: []
 names_which_can_be_offloaded: []
 remat_policy: "NONE"
 
-jax_cache_dir: ''
+jax_cache_dir: '/mnt/disks/mehdy-disk1/maxdiffusion_hf_cache'
 weights_dtype: 'bfloat16'
 activations_dtype: 'bfloat16'
 
@@ -92,3 +92,12 @@ jit_initializers: True
 enable_single_replica_ckpt_restoring: False
 seed: 0
 audio_format: "s16"
+
+# LTX-2 Latent Upsampler
+run_latent_upsampler: False
+upsampler_model_path: "Lightricks/LTX-2"
+upsampler_spatial_patch_size: 1
+upsampler_temporal_patch_size: 1
+upsampler_adain_factor: 0.0
+upsampler_tone_map_compression_ratio: 0.0
+upsampler_rational_spatial_scale: 2.0
@@ -15,6 +15,7 @@
 from typing import Sequence
 import jax
 import jax.numpy as jnp
+import numpy as np
 import time
 import os
 import subprocess
@@ -81,9 +82,9 @@ def get_git_commit_hash():
 
 
 def call_pipeline(config, pipeline, prompt, negative_prompt):
-  # Set default generation arguments
   generator = jax.random.key(config.seed) if hasattr(config, "seed") else jax.random.key(0)
   guidance_scale = config.guidance_scale if hasattr(config, "guidance_scale") else 3.0
+  output_type = "pil"
 
   out = pipeline(
       prompt=prompt,
@@ -99,7 +100,9 @@ def call_pipeline(config, pipeline, prompt, negative_prompt):
       decode_noise_scale=getattr(config, "decode_noise_scale", None),
       max_sequence_length=getattr(config, "max_sequence_length", 1024),
       dtype=jnp.bfloat16 if getattr(config, "activations_dtype", "bfloat16") == "bfloat16" else jnp.float32,
+      output_type=output_type,
   )
+  
   return out
 
 
@@ -114,9 +117,11 @@ def run(config, pipeline=None, filename_prefix="", commit_hash=None):
     else:
       max_logging.log("Could not retrieve Git commit hash.")
 
+  checkpoint_loader = LTX2Checkpointer(config=config)
   if pipeline is None:
-    checkpoint_loader = LTX2Checkpointer(config=config)
-    pipeline, _, _ = checkpoint_loader.load_checkpoint()
+    # Use the config flag to determine if the upsampler should be loaded
+    run_latent_upsampler = getattr(config, "run_latent_upsampler", False)
+    pipeline, _, _ = checkpoint_loader.load_checkpoint(load_upsampler=run_latent_upsampler)
 
   pipeline.enable_vae_slicing()
   pipeline.enable_vae_tiling()
@@ -133,8 +138,9 @@ def run(config, pipeline=None, filename_prefix="", commit_hash=None):
   max_logging.log(
       f"Num steps: {config.num_inference_steps}, height: {config.height}, width: {config.width}, frames: {config.num_frames}"
   )
-
+  
   out = call_pipeline(config, pipeline, prompt, negative_prompt)
+  
   # out should have .frames and .audio
   videos = out.frames if hasattr(out, "frames") else out[0]
   audios = out.audio if hasattr(out, "audio") else None
@@ -143,6 +149,8 @@ def run(config, pipeline=None, filename_prefix="", commit_hash=None):
   max_logging.log(f"model name: {getattr(config, 'model_name', 'ltx-video')}")
   max_logging.log(f"model path: {config.pretrained_model_name_or_path}")
   max_logging.log(f"model type: {getattr(config, 'model_type', 'T2V')}")
+  if getattr(config, "run_latent_upsampler", False):
+      max_logging.log(f"upsampler model path: {config.upsampler_model_path}")
   max_logging.log(f"hardware: {jax.devices()[0].platform}")
   max_logging.log(f"number of devices: {jax.device_count()}")
   max_logging.log(f"per_device_batch_size: {config.per_device_batch_size}")
@@ -218,4 +226,4 @@ def main(argv: Sequence[str]) -> None:
 
 
 if __name__ == "__main__":
-  app.run(main)
+  app.run(main)