AI-Hypercomputer
diff --git a/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 2 additions & 3 deletions b/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 99 additions & 98 deletions b/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 99 additions & 98 deletions
diff --git a/‎src/maxdiffusion/max_utils.py‎
Lines changed: 3 additions & 1 deletion b/‎src/maxdiffusion/max_utils.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/maxdiffusion/models/attention_flax.py‎
Lines changed: 1 addition & 1 deletion b/‎src/maxdiffusion/models/attention_flax.py‎
Lines changed: 1 addition & 1 deletion
@@ -217,7 +217,7 @@ def load_state_if_possible(
           return checkpoint_manager.restore(latest_step, args=ocp.args.StandardRestore(abstract_unboxed_pre_state))
         else:
           item = {checkpoint_item: orbax.checkpoint.args.PyTreeRestore(item=abstract_unboxed_pre_state)}
-          return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item)) 
+          return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item))
 
       def map_to_pspec(data):
         pspec = data.sharding.spec
 
@@ -6,7 +6,6 @@ jax_cache_dir: ''
 weights_dtype: 'bfloat16'
 activations_dtype: 'bfloat16'
 
-
 run_name: ''
 output_dir: 'ltx-video-output'
 save_config_to_gcs: False
@@ -21,9 +20,9 @@ frame_rate: 30
 
 
 # Generation parameters
+ckpt_path: "/mnt/disks/diffusionproj"
 prompt: "A man in a dimly lit room talks on a vintage telephone, hangs up, and looks down with a sad expression. He holds the black rotary phone to his right ear with his right hand, his left hand holding a rocks glass with amber liquid. He wears a brown suit jacket over a white shirt, and a gold ring on his left ring finger. His short hair is neatly combed, and he has light skin with visible wrinkles around his eyes. The camera remains stationary, focused on his face and upper body. The room is dark, lit only by a warm light source off-screen to the left, casting shadows on the wall behind him. The scene appears to be from a movie."
 #negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
-#do_classifier_free_guidance: True
 height: 512
 width: 512
 num_frames: 88
@@ -63,7 +62,7 @@ ici_sequence_parallelism: 1
 
 
 learning_rate_schedule_steps: -1
-max_train_steps: 500 #TODO: change this
+max_train_steps: 500 
 pretrained_model_name_or_path: ''
 unet_checkpoint: ''
 dataset_name: 'diffusers/pokemon-gpt4-captions'
 
@@ -3,57 +3,57 @@
 from typing import Sequence
 from maxdiffusion.pipelines.ltx_video.ltx_video_pipeline import LTXVideoPipeline
 from maxdiffusion import pyconfig
-import jax.numpy as jnp
 import imageio
 from datetime import datetime
 import os
-import json
 import torch
 from pathlib import Path
+
+
 def calculate_padding(
     source_height: int, source_width: int, target_height: int, target_width: int
 ) -> tuple[int, int, int, int]:
 
-    # Calculate total padding needed
-    pad_height = target_height - source_height
-    pad_width = target_width - source_width
-
-    # Calculate padding for each side
-    pad_top = pad_height // 2
-    pad_bottom = pad_height - pad_top  # Handles odd padding
-    pad_left = pad_width // 2
-    pad_right = pad_width - pad_left  # Handles odd padding
-
-    # Return padded tensor
-    # Padding format is (left, right, top, bottom)
-    padding = (pad_left, pad_right, pad_top, pad_bottom)
-    return padding
-  
+  # Calculate total padding needed
+  pad_height = target_height - source_height
+  pad_width = target_width - source_width
+
+  # Calculate padding for each side
+  pad_top = pad_height // 2
+  pad_bottom = pad_height - pad_top  # Handles odd padding
+  pad_left = pad_width // 2
+  pad_right = pad_width - pad_left  # Handles odd padding
+
+  # Return padded tensor
+  # Padding format is (left, right, top, bottom)
+  padding = (pad_left, pad_right, pad_top, pad_bottom)
+  return padding
+
+
 def convert_prompt_to_filename(text: str, max_len: int = 20) -> str:
-    # Remove non-letters and convert to lowercase
-    clean_text = "".join(
-        char.lower() for char in text if char.isalpha() or char.isspace()
-    )
-
-    # Split into words
-    words = clean_text.split()
-
-    # Build result string keeping track of length
-    result = []
-    current_length = 0
-
-    for word in words:
-        # Add word length plus 1 for underscore (except for first word)
-        new_length = current_length + len(word)
-
-        if new_length <= max_len:
-            result.append(word)
-            current_length += len(word)
-        else:
-            break
-
-    return "-".join(result)
-  
+  # Remove non-letters and convert to lowercase
+  clean_text = "".join(char.lower() for char in text if char.isalpha() or char.isspace())
+
+  # Split into words
+  words = clean_text.split()
+
+  # Build result string keeping track of length
+  result = []
+  current_length = 0
+
+  for word in words:
+    # Add word length plus 1 for underscore (except for first word)
+    new_length = current_length + len(word)
+
+    if new_length <= max_len:
+      result.append(word)
+      current_length += len(word)
+    else:
+      break
+
+  return "-".join(result)
+
+
 def get_unique_filename(
     base: str,
     ext: str,
@@ -64,79 +64,80 @@ def get_unique_filename(
     endswith=None,
     index_range=1000,
 ) -> Path:
-    base_filename = f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
-    for i in range(index_range):
-        filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
-        if not os.path.exists(filename):
-            return filename
-    raise FileExistsError(
-        f"Could not find a unique filename after {index_range} attempts."
-    )
+  base_filename = (
+      f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{seed}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
+  )
+  for i in range(index_range):
+    filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
+    if not os.path.exists(filename):
+      return filename
+  raise FileExistsError(f"Could not find a unique filename after {index_range} attempts.")
+
+
 def run(config):
-  
+
   height_padded = ((config.height - 1) // 32 + 1) * 32
   width_padded = ((config.width - 1) // 32 + 1) * 32
   num_frames_padded = ((config.num_frames - 2) // 8 + 1) * 8 + 1
   padding = calculate_padding(config.height, config.width, height_padded, width_padded)
   prompt_enhancement_words_threshold = config.prompt_enhancement_words_threshold
   prompt_word_count = len(config.prompt.split())
-  enhance_prompt = (
-    prompt_enhancement_words_threshold > 0 and prompt_word_count < prompt_enhancement_words_threshold
-  )
-  
-  seed = 10 #change this, generator in pytorch, used in prepare_latents
+  enhance_prompt = prompt_enhancement_words_threshold > 0 and prompt_word_count < prompt_enhancement_words_threshold
+
+  seed = 10
   generator = torch.Generator().manual_seed(seed)
   pipeline = LTXVideoPipeline.from_pretrained(config, enhance_prompt)
-  images = pipeline(height=height_padded, width=width_padded, num_frames=num_frames_padded, is_video=True, output_type='pt', generator = generator).images
-  
+  images = pipeline(
+      height=height_padded,
+      width=width_padded,
+      num_frames=num_frames_padded,
+      is_video=True,
+      output_type="pt",
+      generator=generator,
+  ).images
+
   (pad_left, pad_right, pad_top, pad_bottom) = padding
   pad_bottom = -pad_bottom
   pad_right = -pad_right
   if pad_bottom == 0:
-      pad_bottom = images.shape[3]
+    pad_bottom = images.shape[3]
   if pad_right == 0:
-      pad_right = images.shape[4]
-  images = images[:, :, :config.num_frames, pad_top:pad_bottom, pad_left:pad_right]
+    pad_right = images.shape[4]
+  images = images[:, :, : config.num_frames, pad_top:pad_bottom, pad_left:pad_right]
   output_dir = Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
   output_dir.mkdir(parents=True, exist_ok=True)
   for i in range(images.shape[0]):
-      # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
-      video_np = images[i].permute(1, 2, 3, 0).detach().float().numpy()
-      # Unnormalizing images to [0, 255] range
-      video_np = (video_np * 255).astype(np.uint8)
-      fps = config.frame_rate
-      height, width = video_np.shape[1:3]
-      # In case a single image is generated
-      if video_np.shape[0] == 1:
-          output_filename = get_unique_filename(
-              f"image_output_{i}",
-              ".png",
-              prompt=config.prompt,
-              seed=seed,
-              resolution=(height, width, config.num_frames),
-              dir=output_dir,
-          )
-          imageio.imwrite(output_filename, video_np[0])
-      else:
-          output_filename = get_unique_filename(
-              f"video_output_{i}",
-              ".mp4",
-              prompt=config.prompt,
-              seed=seed,
-              resolution=(height, width, config.num_frames),
-              dir=output_dir,
-          )
-          print(output_filename)
-          # Write video
-          with imageio.get_writer(output_filename, fps=fps) as video:
-              for frame in video_np:
-                  video.append_data(frame)
-
-  
-  
-  
-  
-  
+    # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
+    video_np = images[i].permute(1, 2, 3, 0).detach().float().numpy()
+    # Unnormalizing images to [0, 255] range
+    video_np = (video_np * 255).astype(np.uint8)
+    fps = config.frame_rate
+    height, width = video_np.shape[1:3]
+    # In case a single image is generated
+    if video_np.shape[0] == 1:
+      output_filename = get_unique_filename(
+          f"image_output_{i}",
+          ".png",
+          prompt=config.prompt,
+          seed=seed,
+          resolution=(height, width, config.num_frames),
+          dir=output_dir,
+      )
+      imageio.imwrite(output_filename, video_np[0])
+    else:
+      output_filename = get_unique_filename(
+          f"video_output_{i}",
+          ".mp4",
+          prompt=config.prompt,
+          seed=seed,
+          resolution=(height, width, config.num_frames),
+          dir=output_dir,
+      )
+      print(output_filename)
+      # Write video
+      with imageio.get_writer(output_filename, fps=fps) as video:
+        for frame in video_np:
+          video.append_data(frame)
 
 
 def main(argv: Sequence[str]) -> None:
@@ -145,4 +146,4 @@ def main(argv: Sequence[str]) -> None:
 
 
 if __name__ == "__main__":
-  app.run(main)
+  app.run(main)
@@ -251,6 +251,7 @@ def fill_unspecified_mesh_axes(parallelism_vals, target_product, parallelism_typ
 
   return parallelism_vals
 
+
 def create_device_mesh(config, devices=None):
   """Creates a device mesh with each slice in its own data parallel group. If there is only one slice, uses two replicas"""
   if devices is None:
@@ -269,6 +270,7 @@ def create_device_mesh(config, devices=None):
 
   return mesh
 
+
 def unbox_logicallypartioned_trainstate(boxed_train_state: train_state.TrainState):
   """Unboxes the flax.LogicallyPartitioned pieces in a train state.
 
@@ -590,4 +592,4 @@ def maybe_initialize_jax_distributed_system(raw_keys):
     initialize_jax_for_gpu()
     max_logging.log("Jax distributed system initialized on GPU!")
   else:
-    jax.distributed.initialize()
+    jax.distributed.initialize()
@@ -1188,4 +1188,4 @@ def setup(self):
   def __call__(self, hidden_states, deterministic=True):
     hidden_states = self.proj(hidden_states)
     hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
-    return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
+    return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)