AI-Hypercomputer
diff --git a/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 5 additions & 5 deletions b/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 35 additions & 29 deletions b/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 35 additions & 29 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_conv3d.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_conv3d.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_video_autoencoder.py‎
Lines changed: 18 additions & 2 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/causal_video_autoencoder.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/conv_nd_factory.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/conv_nd_factory.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/dual_conv3d.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/dual_conv3d.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/latent_upsampler.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/latent_upsampler.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/pixel_norm.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/pixel_norm.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/autoencoders/pixel_shuffle.py‎
Lines changed: 16 additions & 0 deletions b/‎src/maxdiffusion/models/ltx_video/autoencoders/pixel_shuffle.py‎
Lines changed: 16 additions & 0 deletions
@@ -8,7 +8,7 @@ activations_dtype: 'bfloat16'
 
 
 run_name: ''
-output_dir: 'ltx-video-output'
+output_dir: '/mnt/disks/diffusionproj'
 save_config_to_gcs: False
 
 #Checkpoints
@@ -21,19 +21,19 @@ sampler: "from_checkpoint"
 
 # Generation parameters
 pipeline_type: multi-scale
-prompt: "A woman with light skin, wearing a blue jacket and a black hat with a veil, looks down and to her right, then back up as she speaks; she has brown hair styled in an updo, light brown eyebrows, and is wearing a white collared shirt under her jacket; the camera remains stationary on her face as she speaks; the background is out of focus, but shows trees and people in period clothing; the scene is captured in real-life footage."
+prompt: "A man in a dimly lit room talks on a vintage telephone, hangs up, and looks down with a sad expression. He holds the black rotary phone to his right ear with his right hand, his left hand holding a rocks glass with amber liquid. He wears a brown suit jacket over a white shirt, and a gold ring on his left ring finger. His short hair is neatly combed, and he has light skin with visible wrinkles around his eyes. The camera remains stationary, focused on his face and upper body. The room is dark, lit only by a warm light source off-screen to the left, casting shadows on the wall behind him. The scene appears to be from a movie. "
+#negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
 height: 512
 width: 512
-num_frames: 88 #344
+num_frames: 88
 flow_shift: 5.0
-fps: 24
 downscale_factor: 0.6666666
 spatial_upscaler_model_path: "ltxv-spatial-upscaler-0.9.7.safetensors"
 prompt_enhancement_words_threshold: 120
 stg_mode: "attention_values"
 decode_timestep: 0.05
 decode_noise_scale: 0.025
-models_dir: "/mnt/disks/diffusionproj" #where safetensor file is
+seed: 10
 
 
 first_pass:
 
@@ -1,16 +1,28 @@
+"""
+ Copyright 2025 Google LLC
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ """
+
 import numpy as np
 from absl import app
 from typing import Sequence
 from maxdiffusion.pipelines.ltx_video.ltx_video_pipeline import LTXVideoPipeline
 from maxdiffusion.pipelines.ltx_video.ltx_video_pipeline import LTXMultiScalePipeline
 from maxdiffusion import pyconfig
-from maxdiffusion.models.ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-from huggingface_hub import hf_hub_download
 import imageio
 from datetime import datetime
-
 import os
-import torch
 import time
 from pathlib import Path
 
@@ -28,9 +40,6 @@ def calculate_padding(
   pad_bottom = pad_height - pad_top  # Handles odd padding
   pad_left = pad_width // 2
   pad_right = pad_width - pad_left  # Handles odd padding
-
-  # Return padded tensor
-  # Padding format is (left, right, top, bottom)
   padding = (pad_left, pad_right, pad_top, pad_bottom)
   return padding
 
@@ -59,8 +68,6 @@ def convert_prompt_to_filename(text: str, max_len: int = 20) -> str:
   return "-".join(result)
 
 
-
-
 def get_unique_filename(
     base: str,
     ext: str,
@@ -70,9 +77,7 @@ def get_unique_filename(
     endswith=None,
     index_range=1000,
 ) -> Path:
-  base_filename = (
-      f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
-  )
+  base_filename = f"{base}_{convert_prompt_to_filename(prompt, max_len=30)}_{resolution[0]}x{resolution[1]}x{resolution[2]}"
   for i in range(index_range):
     filename = dir / f"{base_filename}_{i}{endswith if endswith else ''}{ext}"
     if not os.path.exists(filename):
@@ -87,13 +92,23 @@ def run(config):
   padding = calculate_padding(config.height, config.width, height_padded, width_padded)
   prompt_enhancement_words_threshold = config.prompt_enhancement_words_threshold
   prompt_word_count = len(config.prompt.split())
-  enhance_prompt = (
-      prompt_enhancement_words_threshold > 0 and prompt_word_count < prompt_enhancement_words_threshold
-  )
+  enhance_prompt = prompt_enhancement_words_threshold > 0 and prompt_word_count < prompt_enhancement_words_threshold
 
   pipeline = LTXVideoPipeline.from_pretrained(config, enhance_prompt=enhance_prompt)
-  if config.pipeline_type == "multi-scale": 
+  if config.pipeline_type == "multi-scale":
     pipeline = LTXMultiScalePipeline(pipeline)
+  # s0 = time.perf_counter()
+  # images = pipeline(
+  #     height=height_padded,
+  #     width=width_padded,
+  #     num_frames=num_frames_padded,
+  #     is_video=True,
+  #     output_type="pt",
+  #     config=config,
+  #     enhance_prompt=enhance_prompt,
+  #     seed = config.seed
+  # )
+  # print("compile time: ", (time.perf_counter() - s0))
   s0 = time.perf_counter()
   images = pipeline(
       height=height_padded,
@@ -102,21 +117,11 @@ def run(config):
       is_video=True,
       output_type="pt",
       config=config,
-      enhance_prompt = False
-  )
-  print("compile time: ", (time.perf_counter() - s0))
-  s0 = time.perf_counter()
-  images = pipeline(
-      height=height_padded,
-      width=width_padded,
-      num_frames=num_frames_padded,
-      is_video=True,
-      output_type="pt",
-      config=config,
-      enhance_prompt = False
+      enhance_prompt=enhance_prompt,
+      seed=config.seed,
   )
   print("generation time: ", (time.perf_counter() - s0))
-  
+
   (pad_left, pad_right, pad_top, pad_bottom) = padding
   pad_bottom = -pad_bottom
   pad_right = -pad_right
@@ -127,6 +132,7 @@ def run(config):
   images = images[:, :, : config.num_frames, pad_top:pad_bottom, pad_left:pad_right]
   output_dir = Path(f"outputs/{datetime.today().strftime('%Y-%m-%d')}")
   output_dir.mkdir(parents=True, exist_ok=True)
+
   for i in range(images.shape[0]):
     # Gathering from B, C, F, H, W to C, F, H, W and then permuting to F, H, W, C
     video_np = images[i].permute(1, 2, 3, 0).detach().float().numpy()
 
@@ -0,0 +1,16 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 from typing import Tuple, Union
 
 import torch
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 import json
 import os
 from functools import partial
@@ -218,11 +234,11 @@ def to_json_string(self) -> str:
     return json.dumps(self.config.__dict__)
 
   def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
-    if any([key.startswith("vae.") for key in state_dict.keys()]):
+    if any([key.startswith("vae.") for key in state_dict.keys()]):  # noqa: C419
       state_dict = {key.replace("vae.", ""): value for key, value in state_dict.items() if key.startswith("vae.")}
     ckpt_state_dict = {key: value for key, value in state_dict.items() if not key.startswith(PER_CHANNEL_STATISTICS_PREFIX)}
 
-    model_keys = set(name for name, _ in self.named_modules())
+    model_keys = set(name for name, _ in self.named_modules())  # noqa: C401
 
     key_mapping = {
         ".resnets.": ".res_blocks.",
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 from typing import Tuple, Union
 
 import torch
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 import math
 from typing import Tuple, Union
 
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 from typing import Optional, Union
 from pathlib import Path
 import os
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 import torch
 from torch import nn
 
 
@@ -1,3 +1,19 @@
+# Copyright 2025 Lightricks Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/Lightricks/LTX-Video/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This implementation is based on the Torch version available at:
+# https://github.com/Lightricks/LTX-Video/tree/main
 import torch.nn as nn
 from einops import rearrange