AI-Hypercomputer
diff --git a/‎.github/workflows/UnitTests.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/UnitTests.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 18 additions & 1 deletion b/‎README.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎dependencies/requirements/generated_requirements/requirements.txt‎
Lines changed: 0 additions & 2 deletions b/‎dependencies/requirements/generated_requirements/requirements.txt‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docker_build_dependency_image.sh‎
Lines changed: 23 additions & 0 deletions b/‎docker_build_dependency_image.sh‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎maxdiffusion_dependencies.Dockerfile‎
Lines changed: 24 additions & 0 deletions b/‎maxdiffusion_dependencies.Dockerfile‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎maxdiffusion_jax_ai_image_tpu.Dockerfile‎
Lines changed: 31 additions & 0 deletions b/‎maxdiffusion_jax_ai_image_tpu.Dockerfile‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 40 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎requirements_with_jax_ai_image.txt‎
Lines changed: 41 additions & 0 deletions b/‎requirements_with_jax_ai_image.txt‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎setup.cfg‎
Lines changed: 20 additions & 0 deletions b/‎setup.cfg‎
Lines changed: 20 additions & 0 deletions
@@ -57,11 +57,11 @@ jobs:
     - name: PyTest
       run: | #--deselect=src/maxdiffusion/tests/input_pipeline_interface_test.py
         export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
-        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ TOKENIZERS_PARALLELISM=false python3 -m pytest --deselect=src/maxdiffusion/tests/ltx_transformer_step_test.py -x
-#  add_pull_ready:
+        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ TOKENIZERS_PARALLELISM=false python3 -m pytest --ignore=src/maxdiffusion/kernels/ --deselect=src/maxdiffusion/tests/ltx_transformer_step_test.py -x
+#  add_pull_ready:q
 #    if: github.ref != 'refs/heads/main'
 #    permissions:
 #      checks: read
 #      pull-requests: write
 #    needs: build
-#    uses: ./.github/workflows/AddLabel.yml
+#    uses: ./.github/workflows/AddLabel.yml
@@ -597,6 +597,24 @@ To generate images, run the following command:
     ...
   ```
 
+### Ring Attention
+We added ring attention support for Wan models. Below are the stats for one `720p` (81 frames) video generation (with CFG DP):
+| Accelerator |  Model | Attention Type | Inference Steps | Sharding | e2e Generation Time |
+| -- | -- | -- | -- | -- | -- | 
+| v7x-8 | WAN 2.1 | Tokamax Flash | 50 | dp2-fsdp1-context4-tp1 | 264.2 |
+| v7x-8 | WAN 2.1 | Tokamax Ring | 50 | dp2-fsdp1-context4-tp1 | **252.4** |
+| v7x-8 | WAN 2.2 | Tokamax Flash | 40 | dp2-fsdp1-context4-tp1 | 212.7 |
+| v7x-8 | WAN 2.2 | Tokamax Ring | 40 | dp2-fsdp1-context4-tp1 | **201.7** |
+
+| Accelerator |  Model | Attention Type | Inference Steps | Sharding | e2e Generation Time |
+| -- | -- | -- | -- | -- | -- | 
+| v7x-16 | WAN 2.1 | Tokamax Flash | 50 | dp2-fsdp1-context8-tp1 | 146.6 |
+| v7x-16 | WAN 2.1 | Tokamax Ring | 50 | dp2-fsdp1-context8-tp1 | **137.2** |
+| v7x-16 | WAN 2.2 | Tokamax Flash | 40 | dp2-fsdp1-context8-tp1 | **117.8** |
+| v7x-16 | WAN 2.2 | Tokamax Ring | 40 | dp2-fsdp1-context8-tp1 | 137.5 |
+
+(* There are some known stability issues for ring attention on 16 TPUs, please use `tokamax_flash` attention instead.)
+
   ## Flux
 
   First make sure you have permissions to access the Flux repos in Huggingface.
@@ -772,4 +790,3 @@ This script will automatically format your code with `pyink` and help you identi
 
 
 The full suite of -end-to end tests is in `tests` and `src/maxdiffusion/tests`. We run them with a nightly cadance.
-
 
@@ -2,7 +2,6 @@
 # If you need to modify dependencies, please do so in the host requirements file and run seed-env again.
 
 absl-py>=2.3.1
-accelerate>=1.13.0
 aiofiles>=25.1.0
 aiohappyeyeballs>=2.6.1
 aiohttp>=3.13.3
@@ -81,7 +80,6 @@ isort>=8.0.1
 jaraco-functools>=4.4.0
 jax>=0.9.0
 jaxlib>=0.9.0
-jaxopt>=0.8.5
 jaxtyping>=0.3.9
 jinja2>=3.1.6
 keras>=3.13.1
 
@@ -66,6 +66,28 @@ if [[ ${DEVICE} == "gpu" ]]; then
     export BASEIMAGE=ghcr.io/nvidia/jax:base
   fi
   docker build --network host --build-arg MODE=${MODE} --build-arg JAX_VERSION=$JAX_VERSION --build-arg DEVICE=$DEVICE --build-arg BASEIMAGE=$BASEIMAGE -f ./maxdiffusion_gpu_dependencies.Dockerfile -t ${LOCAL_IMAGE_NAME} .
+<<<<<<< HEAD
+else 
+  if [[ ${MODE} == "stable_stack" || ${MODE} == "jax_ai_image" ]]; then
+    if [[ ! -v BASEIMAGE ]]; then
+      echo "Erroring out because BASEIMAGE is unset, please set it!"
+      exit 1
+    fi
+    docker build --no-cache \
+      --build-arg JAX_AI_IMAGE_BASEIMAGE=${BASEIMAGE} \
+      --build-arg COMMIT_HASH=${COMMIT_HASH} \
+      --network=host \
+      -t ${LOCAL_IMAGE_NAME} \
+      -f maxdiffusion_jax_ai_image_tpu.Dockerfile .
+  else
+    docker build --no-cache \
+      --network=host \
+      --build-arg MODE=${MODE} \
+      --build-arg JAX_VERSION=${JAX_VERSION} \
+      -t ${LOCAL_IMAGE_NAME} \
+      -f maxdiffusion_dependencies.Dockerfile .
+  fi
+=======
 else
   # Default to maxdiffusion_dependencies.Dockerfile for non-GPU builds
   export BASEIMAGE=${BASEIMAGE:-python:3.12-slim-bullseye}
@@ -76,4 +98,5 @@ else
     --build-arg BASEIMAGE=${BASEIMAGE} \
     -t ${LOCAL_IMAGE_NAME} \
     -f maxdiffusion_dependencies.Dockerfile .
+>>>>>>> origin/main
 fi
@@ -1,6 +1,11 @@
+<<<<<<< HEAD
+# Use Python 3.12-slim-bullseye as the base image
+FROM python:3.12-slim-bullseye
+=======
 # Use Python 3.12-slim-bullseye as the base image unless overridden
 ARG BASEIMAGE=python:3.12-slim-bullseye
 FROM $BASEIMAGE
+>>>>>>> origin/main
 
 # Environment variable for no-cache-dir and pip root user warning
 ENV PIP_NO_CACHE_DIR=1
@@ -13,8 +18,13 @@ ENV CLOUD_SDK_VERSION=latest
 # Set DEBIAN_FRONTEND to noninteractive to avoid frontend errors
 ENV DEBIAN_FRONTEND=noninteractive
 
+<<<<<<< HEAD
+# Upgrade pip to the latest version
+RUN python -m pip install --upgrade pip --no-warn-script-location
+=======
 # Upgrade pip to the latest version and install uv
 RUN python -m pip install --upgrade pip uv --no-warn-script-location
+>>>>>>> origin/main
 
 # Install system dependencies
 RUN apt-get update && apt-get install -y apt-utils git curl gnupg procps iproute2 ethtool && rm -rf /var/lib/apt/lists/*
@@ -26,12 +36,26 @@ RUN curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dea
 # Install the Google Cloud SDK
 RUN apt-get update && apt-get install -y google-cloud-sdk && rm -rf /var/lib/apt/lists/*
 
+<<<<<<< HEAD
+# Install cloud-accelerator-diagnostics
+RUN pip install cloud-accelerator-diagnostics
+
+# Install cloud-tpu-diagnostics
+RUN pip install cloud-tpu-diagnostics
+
+# Install gcsfs
+RUN pip install gcsfs
+
+# Install google-cloud-storage
+RUN pip install google-cloud-storage
+=======
 # Install diagnostic and storage dependencies using uv
 RUN python -m uv pip install --system \
     cloud-accelerator-diagnostics \
     cloud-tpu-diagnostics \
     gcsfs \
     google-cloud-storage
+>>>>>>> origin/main
 
 # Args
 ARG MODE
 
@@ -0,0 +1,31 @@
+ARG JAX_AI_IMAGE_BASEIMAGE
+
+# JAX AI Base Image
+FROM $JAX_AI_IMAGE_BASEIMAGE
+
+ARG JAX_AI_IMAGE_BASEIMAGE
+
+ARG COMMIT_HASH
+
+ENV COMMIT_HASH=$COMMIT_HASH
+
+RUN mkdir -p /deps
+
+# Set the working directory in the container
+WORKDIR /deps
+
+# Copy all files from local workspace into docker container
+COPY . .
+
+# Install Maxdiffusion Jax AI Image requirements
+RUN pip install -r /deps/requirements_with_jax_ai_image.txt
+
+# TODO: Remove the flax pin and fsspec overrides once flax stable version releases
+RUN if echo "$JAX_AI_IMAGE_BASEIMAGE" | grep -q "nightly"; then \
+        echo "Nightly build detected: Installing specific Flax commit and fsspec." && \
+        pip install --upgrade --force-reinstall git+https://github.com/google/flax.git@ef78d6584623511746be4824965cdef42b464583 && \
+        pip install "fsspec==2025.10.0"; \
+    fi
+
+# Run the script available in JAX-AI-Image base image to generate the manifest file
+RUN bash /jax-ai-image/generate_manifest.sh PREFIX=maxdiffusion COMMIT_HASH=$COMMIT_HASH
@@ -0,0 +1,40 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+jax>=0.7.2
+jaxlib>=0.4.30
+grain
+google-cloud-storage>=2.17.0
+absl-py
+chex
+datasets
+flax>=0.12.0
+optax>=0.2.3
+torch>=2.6.0
+torchvision>=0.20.1
+ftfy
+tensorboard>=2.17.0
+tensorboardx>=2.6.2.2
+tensorboard-plugin-profile>=2.15.2
+Jinja2
+scikit-image
+parameterized
+Pillow
+pylint
+pyink
+pytest==8.2.2
+tensorflow>=2.17.0
+tensorflow-datasets>=4.9.6
+ruff>=0.1.5,<=0.2
+git+https://github.com/Lightricks/LTX-Video
+git+https://github.com/zmelumian972/xla@torchax/jittable_module_callable#subdirectory=torchax
+opencv-python-headless==4.10.0.84
+orbax-checkpoint
+tokenizers==0.21.0
+huggingface_hub>=0.30.2
+transformers==4.51.0
+einops==0.8.0
+sentencepiece
+aqtp
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+hf_transfer>=0.1.9
+qwix@git+https://github.com/google/qwix.git
@@ -0,0 +1,41 @@
+# Requirements for Building the MaxDifussion Docker Image
+# These requirements are additional to the dependencies present in the JAX AI base image.
+--extra-index-url https://download.pytorch.org/whl/cpu
+jax>=0.7.2
+jaxlib>=0.4.30
+grain
+google-cloud-storage>=2.17.0
+absl-py
+chex
+datasets
+flax>=0.12.0
+optax>=0.2.3
+torch>=2.6.0
+torchvision>=0.20.1
+ftfy
+tensorboard>=2.17.0
+tensorboardx>=2.6.2.2
+tensorboard-plugin-profile>=2.15.2
+Jinja2
+scikit-image
+parameterized
+Pillow
+pylint
+pyink
+pytest==8.2.2
+tensorflow>=2.17.0
+tensorflow-datasets>=4.9.6
+ruff>=0.1.5,<=0.2
+opencv-python-headless==4.10.0.84
+orbax-checkpoint
+tokenizers==0.21.0
+huggingface_hub>=0.30.2
+transformers==4.51.0
+tokamax
+einops==0.8.0
+sentencepiece
+aqtp
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+hf_transfer>=0.1.9
+qwix@git+https://github.com/google/qwix.git
@@ -0,0 +1,20 @@
+[isort]
+default_section = FIRSTPARTY
+ensure_newline_before_comments = True
+force_grid_wrap = 0
+include_trailing_comma = True
+known_first_party = accelerate
+known_third_party =
+    numpy
+    torch
+    torch_xla
+
+line_length = 119
+lines_after_imports = 2
+multi_line_output = 3
+use_parentheses = True
+
+[flake8]
+ignore = E203, E722, E501, E741, W503, W605
+max-line-length = 119
+per-file-ignores = __init__.py:F401