AI-Hypercomputer
diff --git a/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 1 addition & 1 deletion b/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 98 additions & 104 deletions b/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 98 additions & 104 deletions
diff --git a/‎src/maxdiffusion/max_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/maxdiffusion/max_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_conv3d.py‎
Lines changed: 0 additions & 63 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_conv3d.py‎
Lines changed: 0 additions & 63 deletions
@@ -217,7 +217,7 @@ def load_state_if_possible(
           return checkpoint_manager.restore(latest_step, args=ocp.args.StandardRestore(abstract_unboxed_pre_state))
         else:
           item = {checkpoint_item: orbax.checkpoint.args.PyTreeRestore(item=abstract_unboxed_pre_state)}
-          return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item)) 
+          return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item))
 
       def map_to_pspec(data):
         pspec = data.sharding.spec
 
@@ -24,7 +24,7 @@ sampler: "from_checkpoint"
 
 # Generation parameters
 pipeline_type: multi-scale
-prompt: "A man walks towards a window, looks out, and then turns around. He has short, dark hair, dark skin, and is wearing a brown coat over a red and gray scarf. He walks from left to right towards a window, his gaze fixed on something outside. The camera follows him from behind at a medium distance. The room is brightly lit, with white walls and a large window covered by a white curtain. As he approaches the window, he turns his head slightly to the left, then back to the right. He then turns his entire body to the right, facing the window. The camera remains stationary as he stands in front of the window. The scene is captured in real-life footage."
+prompt: "A man in a dimly lit room talks on a vintage telephone, hangs up, and looks down with a sad expression. He holds the black rotary phone to his right ear with his right hand, his left hand holding a rocks glass with amber liquid. He wears a brown suit jacket over a white shirt, and a gold ring on his left ring finger. His short hair is neatly combed, and he has light skin with visible wrinkles around his eyes. The camera remains stationary, focused on his face and upper body. The room is dark, lit only by a warm light source off-screen to the left, casting shadows on the wall behind him. The scene appears to be from a movie."
 height: 512
 width: 512
 num_frames: 88 #344
 
@@ -4,11 +4,8 @@
 from maxdiffusion.pipelines.ltx_video.ltx_video_pipeline import LTXVideoPipeline
 from maxdiffusion.pipelines.ltx_video.ltx_video_pipeline import LTXMultiScalePipeline
 from maxdiffusion import pyconfig
-from maxdiffusion.models.ltx_video.autoencoders.latent_upsampler import LatentUpsampler
-from huggingface_hub import hf_hub_download
 import imageio
 from datetime import datetime
-
 import os
 import torch
 from pathlib import Path
@@ -18,52 +15,45 @@ def calculate_padding(
     source_height: int, source_width: int, target_height: int, target_width: int
 ) -> tuple[int, int, int, int]:
 
-    # Calculate total padding needed
-    pad_height = target_height - source_height
-    pad_width = target_width - source_width
+  # Calculate total padding needed
+  pad_height = target_height - source_height
+  pad_width = target_width - source_width
 
-    # Calculate padding for each side
-    pad_top = pad_height // 2
-    pad_bottom = pad_height - pad_top  # Handles odd padding
-    pad_left = pad_width // 2
-    pad_right = pad_width - pad_left  # Handles odd padding
+  # Calculate padding for each side
+  pad_top = pad_height // 2
+  pad_bottom = pad_height - pad_top  # Handles odd padding
+  pad_left = pad_width // 2
+  pad_right = pad_width - pad_left  # Handles odd padding
 
-    # Return padded tensor
-    # Padding format is (left, right, top, bottom)
-    padding = (pad_left, pad_right, pad_top, pad_bottom)
-    return padding
+  # Return padded tensor
+  # Padding format is (left, right, top, bottom)
+  padding = (pad_left, pad_right, pad_top, pad_bottom)
+  return padding
 
 
 def convert_prompt_to_filename(text: str, max_len: int = 20) -> str:
-    # Remove non-letters and convert to lowercase
-    clean_text = "".join(
-        char.lower() for char in text if char.isalpha() or char.isspace()
-    )
+  # Remove non-letters and convert to lowercase
+  clean_text = "".join(char.lower() for char in text if char.isalpha() or char.isspace())
 
-    # Split into words
-    words = clean_text.split()
+  # Split into words
+  words = clean_text.split()
 
-    # Build result string keeping track of length
-    result = []
-    current_length = 0
+  # Build result string keeping track of length
+  result = []
+  current_length = 0
 
-    for word in words:
-        # Add word length plus 1 for underscore (except for first word)
-        new_length = current_length + len(word)
+  for word in words:
+    # Add word length plus 1 for underscore (except for first word)
+    new_length = current_length + len(word)
 
-        if new_length <= max_len:
-            result.append(word)
-            current_length += len(word)
-        else:
-            break
+    if new_length <= max_len:
+      result.append(word)
+      current_length += len(word)
+    else:
+      break
 
-    return "-".join(result)
+  return "-".join(result)
 
-def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
-    latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
-    latent_upsampler.to(device)
-    latent_upsampler.eval()
-    return latent_upsampler
 
 def get_unique_filename(
     base: str,
@@ -75,78 +65,82 @@ def get_unique_filename(
     endswith=None,
     index_range=1000,
 ) -> Path:
-    base_filename = f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
-    for i in range(index_range):
-        filename = dir / \
-            f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
-        if not os.path.exists(filename):
-            return filename
-    raise FileExistsError(
-        f"Could not find a unique filename after {index_range} attempts."
-    )
+  base_filename = (
+      f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
+  )
+  for i in range(index_range):
+    filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
+    if not os.path.exists(filename):
+      return filename
+  raise FileExistsError(f"Could not find a unique filename after {index_range} attempts.")
 
 
 def run(config):
-    height_padded = ((config.height - 1) // 32 + 1) * 32
-    width_padded = ((config.width - 1) // 32 + 1) * 32
-    num_frames_padded = ((config.num_frames - 2) // 8 + 1) * 8 + 1
-    padding = calculate_padding(
-        config.height, config.width, height_padded, width_padded)
-
-    seed = 10 
-    generator = torch.Generator().manual_seed(seed)
-    pipeline = LTXVideoPipeline.from_pretrained(config, enhance_prompt = False)
-    pipeline = LTXMultiScalePipeline(pipeline)
-    images = pipeline(height=height_padded, width=width_padded, num_frames=num_frames_padded, output_type='pt', generator=generator, config = config)
-    (pad_left, pad_right, pad_top, pad_bottom) = padding
-    pad_bottom = -pad_bottom
-    pad_right = -pad_right
-    if pad_bottom == 0:
-        pad_bottom = images.shape[3]
-    if pad_right == 0:
-        pad_right = images.shape[4]
-    images = images[:, :, :config.num_frames,
-                    pad_top:pad_bottom, pad_left:pad_right]
-    output_dir = Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
-    output_dir.mkdir(parents=True, exist_ok=True)
-    for i in range(images.shape[0]):
-        # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
-        video_np = images[i].permute(1, 2, 3, 0).detach().float().numpy()
-        # Unnormalizing images to [0, 255] range
-        video_np = (video_np * 255).astype(np.uint8)
-        fps = config.frame_rate
-        height, width = video_np.shape[1:3]
-        # In case a single image is generated
-        if video_np.shape[0] == 1:
-            output_filename = get_unique_filename(
-                f"image_output_{i}",
-                ".png",
-                prompt=config.prompt,
-                seed=seed,
-                resolution=(height, width, config.num_frames),
-                dir=output_dir,
-            )
-            imageio.imwrite(output_filename, video_np[0])
-        else:
-            output_filename = get_unique_filename(
-                f"video_output_{i}",
-                ".mp4",
-                prompt=config.prompt,
-                seed=seed,
-                resolution=(height, width, config.num_frames),
-                dir=output_dir,
-            )
-            print(output_filename)
-            # Write video
-            with imageio.get_writer(output_filename, fps=fps) as video:
-                for frame in video_np:
-                    video.append_data(frame)
+  height_padded = ((config.height - 1) // 32 + 1) * 32
+  width_padded = ((config.width - 1) // 32 + 1) * 32
+  num_frames_padded = ((config.num_frames - 2) // 8 + 1) * 8 + 1
+  padding = calculate_padding(config.height, config.width, height_padded, width_padded)
+
+  seed = 10
+  generator = torch.Generator().manual_seed(seed)
+  pipeline = LTXVideoPipeline.from_pretrained(config, enhance_prompt=False)
+  pipeline = LTXMultiScalePipeline(pipeline)
+  images = pipeline(
+      height=height_padded,
+      width=width_padded,
+      num_frames=num_frames_padded,
+      output_type="pt",
+      generator=generator,
+      config=config,
+  )
+  (pad_left, pad_right, pad_top, pad_bottom) = padding
+  pad_bottom = -pad_bottom
+  pad_right = -pad_right
+  if pad_bottom == 0:
+    pad_bottom = images.shape[3]
+  if pad_right == 0:
+    pad_right = images.shape[4]
+  images = images[:, :, : config.num_frames, pad_top:pad_bottom, pad_left:pad_right]
+  output_dir = Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
+  output_dir.mkdir(parents=True, exist_ok=True)
+  for i in range(images.shape[0]):
+    # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
+    video_np = images[i].permute(1, 2, 3, 0).detach().float().numpy()
+    # Unnormalizing images to [0, 255] range
+    video_np = (video_np * 255).astype(np.uint8)
+    fps = config.frame_rate
+    height, width = video_np.shape[1:3]
+    # In case a single image is generated
+    if video_np.shape[0] == 1:
+      output_filename = get_unique_filename(
+          f"image_output_{i}",
+          ".png",
+          prompt=config.prompt,
+          seed=seed,
+          resolution=(height, width, config.num_frames),
+          dir=output_dir,
+      )
+      imageio.imwrite(output_filename, video_np[0])
+    else:
+      output_filename = get_unique_filename(
+          f"video_output_{i}",
+          ".mp4",
+          prompt=config.prompt,
+          seed=seed,
+          resolution=(height, width, config.num_frames),
+          dir=output_dir,
+      )
+      print(output_filename)
+      # Write video
+      with imageio.get_writer(output_filename, fps=fps) as video:
+        for frame in video_np:
+          video.append_data(frame)
 
 
 def main(argv: Sequence[str]) -> None:
-    pyconfig.initialize(argv)
-    run(pyconfig.config)
+  pyconfig.initialize(argv)
+  run(pyconfig.config)
 
 
 if __name__ == "__main__":
-    app.run(main)
+  app.run(main)
@@ -612,4 +612,4 @@ def maybe_initialize_jax_distributed_system(raw_keys):
     initialize_jax_for_gpu()
     max_logging.log("Jax distributed system initialized on GPU!")
   else:
-    jax.distributed.initialize()
+    jax.distributed.initialize()