AI-Hypercomputer
diff --git a/‎src/maxdiffusion/__init__.py‎
Lines changed: 358 additions & 365 deletions b/‎src/maxdiffusion/__init__.py‎
Lines changed: 358 additions & 365 deletions
diff --git a/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 24 additions & 36 deletions b/‎src/maxdiffusion/generate_ltx_video.py‎
Lines changed: 24 additions & 36 deletions
diff --git a/‎src/maxdiffusion/models/__init__.py‎
Lines changed: 8 additions & 9 deletions b/‎src/maxdiffusion/models/__init__.py‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎src/maxdiffusion/models/ltx_video/gradient_checkpoint.py‎
Lines changed: 51 additions & 51 deletions b/‎src/maxdiffusion/models/ltx_video/gradient_checkpoint.py‎
Lines changed: 51 additions & 51 deletions
@@ -14,67 +14,55 @@
  limitations under the License.
 """
 
-
 from absl import app
 from typing import Sequence
 import jax
 import json
 from maxdiffusion.models.ltx_video.transformers.transformer3d import Transformer3DModel
 import os
-import functools
 import jax.numpy as jnp
 from maxdiffusion import pyconfig
 from maxdiffusion.max_utils import (
     create_device_mesh,
-    setup_initial_state,
 )
-from jax.sharding import Mesh, PartitionSpec as P
 
 
 def validate_transformer_inputs(prompt_embeds, fractional_coords, latents, noise_cond):
-    print("prompts_embeds.shape: ", prompt_embeds.shape, prompt_embeds.dtype)
-    print("fractional_coords.shape: ",
-          fractional_coords.shape, fractional_coords.dtype)
-    print("latents.shape: ", latents.shape, latents.dtype)
-    print("noise_cond.shape: ", noise_cond.shape, noise_cond.dtype)
+  print("prompts_embeds.shape: ", prompt_embeds.shape, prompt_embeds.dtype)
+  print("fractional_coords.shape: ", fractional_coords.shape, fractional_coords.dtype)
+  print("latents.shape: ", latents.shape, latents.dtype)
+  print("noise_cond.shape: ", noise_cond.shape, noise_cond.dtype)
 
 
 def run(config):
-    key = jax.random.PRNGKey(0)
+  key = jax.random.PRNGKey(0)
+
+  devices_array = create_device_mesh(config)
+  mesh = Mesh(devices_array, config.mesh_axes)
+
+  batch_size, text_tokens, num_tokens, features = 4, 256, 2048, 128
+  base_dir = os.path.dirname(__file__)
 
-    devices_array = create_device_mesh(config)
-    mesh = Mesh(devices_array, config.mesh_axes)
+  # load in model config
+  config_path = os.path.join(base_dir, "models/ltx_video/xora_v1.2-13B-balanced-128.json")
+  with open(config_path, "r") as f:
+    model_config = json.load(f)
 
-    batch_size, text_tokens, num_tokens, features = 4, 256, 2048, 128
-    base_dir = os.path.dirname(__file__)
+  transformer = Transformer3DModel(**model_config, dtype=jnp.bfloat16, gradient_checkpointing="matmul_without_batch")
+  transformer_param_shapes = transformer.init_weights(key, batch_size, text_tokens, num_tokens, features, eval_only=False)
 
-    # load in model config
-    config_path = os.path.join(
-        base_dir, "models/ltx_video/xora_v1.2-13B-balanced-128.json")
-    with open(config_path, "r") as f:
-        model_config = json.load(f)
+  key, split_key = jax.random.split(key)
 
-    transformer = Transformer3DModel(
-        **model_config, dtype=jnp.bfloat16, gradient_checkpointing="matmul_without_batch")
-    transformer_param_shapes = transformer.init_weights(
-        key, batch_size, text_tokens, num_tokens, features, eval_only=False)
 
-    key, split_key = jax.random.split(key)
-    weights_init_fn = functools.partial(
-        transformer.init_weights,
-        split_key,
-        batch_size,
-        text_tokens,
-        num_tokens,
-        features,
-        eval_only=True
-    )
+  weights_init_fn = functools.partial(
+      transformer.init_weights, split_key, batch_size, text_tokens, num_tokens, features, eval_only=True
+  )
 
 
 def main(argv: Sequence[str]) -> None:
-    pyconfig.initialize(argv)
-    run(pyconfig.config)
+  pyconfig.initialize(argv)
+  run(pyconfig.config)
 
 
 if __name__ == "__main__":
-    app.run(main)
+  app.run(main)
@@ -25,15 +25,14 @@
 
 if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
 
-    from .controlnet_flax import FlaxControlNetModel
-    from .unet_2d_condition_flax import FlaxUNet2DConditionModel
-    from .vae_flax import FlaxAutoencoderKL
-    from .lora import *
-    from .flux.transformers.transformer_flux_flax import FluxTransformer2DModel
-    from .ltx_video.transformers.transformer3d import Transformer3DModel
+  from .controlnet_flax import FlaxControlNetModel
+  from .unet_2d_condition_flax import FlaxUNet2DConditionModel
+  from .vae_flax import FlaxAutoencoderKL
+  from .lora import *
+  from .flux.transformers.transformer_flux_flax import FluxTransformer2DModel
+  from .ltx_video.transformers.transformer3d import Transformer3DModel
 
 else:
-    import sys
+  import sys
 
-    sys.modules[__name__] = _LazyModule(
-        __name__, globals()["__file__"], _import_structure, module_spec=__spec__)
+  sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
@@ -8,63 +8,63 @@
 
 
 class GradientCheckpointType(Enum):
-    """
-    Defines the type of the gradient checkpoint we will have
+  """
+  Defines the type of the gradient checkpoint we will have
 
-    NONE - means no gradient checkpoint
-    FULL - means full gradient checkpoint, wherever possible (minimum memory usage)
-    MATMUL_WITHOUT_BATCH - means gradient checkpoint for every linear/matmul operation,
-                            except for ones that involve batch dimension - that means that all attention and projection
-                            layers will have gradient checkpoint, but not the backward with respect to the parameters
-    """
+  NONE - means no gradient checkpoint
+  FULL - means full gradient checkpoint, wherever possible (minimum memory usage)
+  MATMUL_WITHOUT_BATCH - means gradient checkpoint for every linear/matmul operation,
+                          except for ones that involve batch dimension - that means that all attention and projection
+                          layers will have gradient checkpoint, but not the backward with respect to the parameters
+  """
 
-    NONE = auto()
-    FULL = auto()
-    MATMUL_WITHOUT_BATCH = auto()
+  NONE = auto()
+  FULL = auto()
+  MATMUL_WITHOUT_BATCH = auto()
 
-    @classmethod
-    def from_str(cls, s: Optional[str] = None) -> "GradientCheckpointType":
-        """
-        Constructs the gradient checkpoint type from a string
+  @classmethod
+  def from_str(cls, s: Optional[str] = None) -> "GradientCheckpointType":
+    """
+    Constructs the gradient checkpoint type from a string
 
-        Args:
-            s (Optional[str], optional): The name of the gradient checkpointing policy. Defaults to None.
+    Args:
+        s (Optional[str], optional): The name of the gradient checkpointing policy. Defaults to None.
 
-        Returns:
-            GradientCheckpointType: The policy that corresponds to the string
-        """
-        if s is None:
-            s = "none"
-        return GradientCheckpointType[s.upper()]
+    Returns:
+        GradientCheckpointType: The policy that corresponds to the string
+    """
+    if s is None:
+      s = "none"
+    return GradientCheckpointType[s.upper()]
 
-    def to_jax_policy(self):
-        """
-        Converts the gradient checkpoint type to a jax policy
-        """
-        match self:
-            case GradientCheckpointType.NONE:
-                return SKIP_GRADIENT_CHECKPOINT_KEY
-            case GradientCheckpointType.FULL:
-                return None
-            case GradientCheckpointType.MATMUL_WITHOUT_BATCH:
-                return jax.checkpoint_policies.checkpoint_dots_with_no_batch_dims
+  def to_jax_policy(self):
+    """
+    Converts the gradient checkpoint type to a jax policy
+    """
+    match self:
+      case GradientCheckpointType.NONE:
+        return SKIP_GRADIENT_CHECKPOINT_KEY
+      case GradientCheckpointType.FULL:
+        return None
+      case GradientCheckpointType.MATMUL_WITHOUT_BATCH:
+        return jax.checkpoint_policies.checkpoint_dots_with_no_batch_dims
 
-    def apply(self, module: nn.Module) -> nn.Module:
-        """
-        Applies a gradient checkpoint policy to a module
-        if no policy is needed, it will return the module as is
+  def apply(self, module: nn.Module) -> nn.Module:
+    """
+    Applies a gradient checkpoint policy to a module
+    if no policy is needed, it will return the module as is
 
-        Args:
-            module (nn.Module): the module to apply the policy to
+    Args:
+        module (nn.Module): the module to apply the policy to
 
-        Returns:
-            nn.Module: the module with the policy applied
-        """
-        policy = self.to_jax_policy()
-        if policy == SKIP_GRADIENT_CHECKPOINT_KEY:
-            return module
-        return nn.remat(  # pylint: disable=invalid-name
-            module,
-            prevent_cse=False,
-            policy=policy,
-        )
+    Returns:
+        nn.Module: the module with the policy applied
+    """
+    policy = self.to_jax_policy()
+    if policy == SKIP_GRADIENT_CHECKPOINT_KEY:
+      return module
+    return nn.remat(  # pylint: disable=invalid-name
+        module,
+        prevent_cse=False,
+        policy=policy,
+    )