AI-Hypercomputer
diff --git a/‎.github/workflows/run_tests_internal.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run_tests_internal.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dependencies/dockerfiles/maxtext_jax_ai_image.Dockerfile‎
Lines changed: 6 additions & 0 deletions b/‎dependencies/dockerfiles/maxtext_jax_ai_image.Dockerfile‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile‎
Lines changed: 2 additions & 13 deletions b/‎dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile‎
Lines changed: 2 additions & 13 deletions
diff --git a/‎dependencies/requirements/generated_requirements/cuda12-requirements.txt‎
Lines changed: 67 additions & 70 deletions b/‎dependencies/requirements/generated_requirements/cuda12-requirements.txt‎
Lines changed: 67 additions & 70 deletions
@@ -81,4 +81,4 @@ jobs:
           python3 -m pip install -e . --no-dependencies
           [ "${{ inputs.total_workers }}" -gt 1 ] && python3 -m pip install --quiet pytest-split && SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" || SPLIT_ARGS=""
           export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
-          python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 $SPLIT_ARGS
+          python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 --deselect "tests/aot_hlo_identical_test.py::AotHloIdenticalTest::test_default_hlo_match" $SPLIT_ARGS
@@ -22,7 +22,7 @@
 
 MaxText is a high performance, highly scalable, open-source LLM library and reference implementation written in pure Python/[JAX](https://docs.jax.dev/en/latest/jax-101.html) and targeting Google Cloud TPUs and GPUs for training. 
 
-MaxText provides a library of high performance models to choose from, including Gemma, Llama, DeepSeek, Qwen, and Mistral. For each of these models, MaxText supports pre-training (up to tens of thousands of chips) and scalable post-training, with popular techniques like Supervised Fine-Tuning (SFT) and Group Relative Policy Optimization (GRPO, a type of Reinforcement Learning). 
+MaxText provides a library of high performance models to choose from, including Gemma, Llama, DeepSeek, Qwen, and Mistral. For each of these models, MaxText supports pre-training (up to tens of thousands of chips) and scalable post-training, with popular techniques like Supervised Fine-Tuning (SFT) and Group Relative Policy Optimization (GRPO, a type of Reinforcement Learning) and Group Sequence Policy Optimization (GSPO, a type of Reinforcement Learning).
 
 MaxText achieves high Model FLOPs Utilization (MFU) and tokens/second from single host to very large clusters while staying simple and largely "optimization-free" thanks to the power of JAX and the XLA compiler.
 
@@ -73,7 +73,7 @@ Our goal is to provide a variety of models (dimension “a”) and techniques (d
 Check out these getting started guides:
 
 * [SFT](https://github.com/AI-Hypercomputer/maxtext/blob/main/end_to_end/tpu/llama3.1/8b/run_sft.sh) (Supervised Fine Tuning)  
-* [GRPO](https://maxtext.readthedocs.io/en/latest/tutorials/grpo.html) (Group Relative Policy Optimization)
+* [GRPO / GSPO](https://maxtext.readthedocs.io/en/latest/tutorials/grpo.html) (Group Relative & Group Sequence Policy Optimization – pass `loss_algo=gspo-token` to run GSPO)
 
 ### Model library
 
 
@@ -52,6 +52,12 @@ RUN if [ "$DEVICE" = "tpu" ]; then \
         python3 -m pip install 'google-tunix>=0.1.2'; \
   fi
 
+# Temporarily downgrade to JAX=0.7.2 for GPU images
+RUN if [ "$DEVICE" = "gpu" ]; then \
+      python3 -m pip install -U "jax[cuda12]==0.8.1"; \
+      python3 -m pip install -U "transformer-engine-cu12" "transformer-engine-jax" "transformer-engine"; \
+    fi
+
 # Now copy the remaining code (source files that may change frequently)
 COPY . .
 
 
@@ -33,22 +33,11 @@ RUN pip install -e /tunix --no-cache-dir
 
 
 COPY vllm /vllm
-RUN VLLM_TARGET_DEVICE="tpu" pip install -e /vllm --no-cache-dir --pre \
-    --extra-index-url https://pypi.org/simple/ \
-    --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \
-    --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
-    --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html \
-    --find-links https://storage.googleapis.com/libtpu-wheels/index.html \
-    --find-links https://storage.googleapis.com/libtpu-releases/index.html \
-    --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
-    --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html 
+RUN VLLM_TARGET_DEVICE="tpu" pip install -e /vllm --no-cache-dir
 
 
 COPY tpu-inference /tpu-inference
-RUN pip install -e /tpu-inference --no-cache-dir --pre \
-    --extra-index-url https://pypi.org/simple/ \
-    --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \
-    --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html
+RUN pip install -e /tpu-inference --no-cache-dir
 
 RUN pip install --no-deps qwix==0.1.4
 
 
@@ -4,37 +4,37 @@
 absl-py>=2.3.1
 aiofiles>=25.1.0
 aiohappyeyeballs>=2.6.1
-aiohttp>=3.13.1
+aiohttp>=3.13.2
 aiosignal>=1.4.0
-annotated-doc>=0.0.3
+annotated-doc>=0.0.4
 annotated-types>=0.7.0
 antlr4-python3-runtime>=4.9.3
 anyio>=4.11.0
 aqtp>=0.9.0
-array-record>=0.8.2
-astroid>=4.0.1
+array-record>=0.8.3
+astroid>=4.0.2
 astunparse>=1.6.3
 attrs>=25.4.0
-auditwheel>=6.4.2
+auditwheel>=6.5.0
 black>=24.10.0
 blobfile>=3.1.0
 build>=1.3.0
-cachetools>=6.2.1
-certifi>=2025.10.5
-cfgv>=3.4.0
+cachetools>=6.2.2
+certifi>=2025.11.12
+cfgv>=3.5.0
 charset-normalizer>=3.4.4
-cheroot>=11.0.0
+cheroot>=11.1.2
 chex>=0.1.91
-click>=8.3.0
+click>=8.3.1
 cloud-accelerator-diagnostics>=0.1.1
 cloud-tpu-diagnostics>=0.1.5
-cloudpickle>=3.1.1
+cloudpickle>=3.1.2
 clu>=0.0.12
 colorama>=0.4.6
 contourpy>=1.3.3
-coverage>=7.11.0
+coverage>=7.12.0
 cycler>=0.12.1
-datasets>=4.3.0
+datasets>=4.4.1
 decorator>=5.2.1
 dill>=0.4.0
 distlib>=0.4.0
@@ -45,41 +45,40 @@ einops>=0.8.1
 einshape>=1.0
 etils>=1.13.0
 evaluate>=0.4.6
-execnet>=2.1.1
-fastapi>=0.120.1
+execnet>=2.1.2
+fastapi>=0.122.0
 filelock>=3.20.0
 flatbuffers>=25.9.23
-flax>=0.12.0
+flax>=0.12.1
 fonttools>=4.60.1
 frozenlist>=1.8.0
-fsspec>=2025.9.0
+fsspec>=2025.10.0
 gast>=0.6.0
-gcsfs>=2025.9.0
-google-api-core>=2.28.0
-google-api-python-client>=2.185.0
-google-auth-httplib2>=0.2.0
+gcsfs>=2025.10.0
+google-api-core>=2.28.1
+google-api-python-client>=2.187.0
+google-auth-httplib2>=0.2.1
 google-auth-oauthlib>=1.2.2
-google-auth>=2.41.1
-google-benchmark>=1.9.4
-google-cloud-aiplatform>=1.122.0
+google-auth>=2.43.0
+google-cloud-aiplatform>=1.128.0
 google-cloud-appengine-logging>=1.7.0
 google-cloud-audit-log>=0.4.0
 google-cloud-bigquery>=3.38.0
-google-cloud-core>=2.4.3
+google-cloud-core>=2.5.0
 google-cloud-logging>=3.12.1
 google-cloud-monitoring>=2.28.0
 google-cloud-resource-manager>=1.15.0
-google-cloud-storage>=2.19.0
+google-cloud-storage>=3.6.0
 google-crc32c>=1.7.1
-google-genai>=1.46.0
+google-genai>=1.52.0
 google-jetstream @ https://github.com/AI-Hypercomputer/JetStream/archive/29329e8e73820993f77cfc8efe34eb2a73f5de98.zip
 google-pasta>=0.2.0
-google-resumable-media>=2.7.2
-googleapis-common-protos>=1.71.0
-grain>=0.2.13
+google-resumable-media>=2.8.0
+googleapis-common-protos>=1.72.0
+grain>=0.2.15
 grpc-google-iam-v1>=0.14.3
 grpcio-status>=1.71.2
-grpcio>=1.75.1
+grpcio>=1.76.0
 gviz-api>=1.10.0
 h11>=0.16.0
 h5py>=3.15.1
@@ -96,43 +95,42 @@ immutabledict>=4.2.2
 importlab>=0.8.1
 importlib-metadata>=8.7.0
 importlib-resources>=6.5.2
-iniconfig>=2.1.0
+iniconfig>=2.3.0
 isort>=7.0.0
 jaraco-functools>=4.3.0
-jax-cuda12-pjrt>=0.8.0 ; sys_platform == 'linux'
-jax-cuda12-plugin>=0.8.0 ; sys_platform == 'linux'
-jax-triton>=0.3.0
-jax>=0.8.0
-jaxlib>=0.8.0
+jax-cuda12-pjrt>=0.8.1 ; sys_platform == 'linux'
+jax-cuda12-plugin>=0.8.1 ; sys_platform == 'linux'
+jax>=0.8.1
+jaxlib>=0.8.1
 jaxtyping>=0.3.3
 jinja2>=3.1.6
 joblib>=1.5.2
 jsonlines>=4.0.0
-keras>=3.11.3
+keras>=3.12.0
 kiwisolver>=1.4.9
 libclang>=18.1.1
-libcst>=1.8.5
+libcst>=1.8.6
 lxml>=6.0.2
 markdown-it-py>=4.0.0
-markdown>=3.9
+markdown>=3.10
 markupsafe>=3.0.3
 matplotlib>=3.10.7
 mccabe>=0.7.0
 mdurl>=0.1.2
 ml-collections>=1.1.0
-ml-dtypes>=0.5.3
+ml-dtypes>=0.5.4
 ml-goodput-measurement>=0.0.15
 mlperf-logging @ https://github.com/mlcommons/logging/archive/38ab22670527888c8eb7825a4ece176fcc36a95d.zip
 more-itertools>=10.8.0
 mpmath>=1.3.0
 msgpack>=1.1.2
-msgspec>=0.19.0
+msgspec>=0.20.0
 multidict>=6.7.0
-multiprocess>=0.70.16
+multiprocess>=0.70.18
 mypy-extensions>=1.1.0
 namex>=0.1.0
 nest-asyncio>=1.6.0
-networkx>=3.5
+networkx>=3.6
 ninja>=1.13.0
 nltk>=3.9.2
 nodeenv>=1.9.1
@@ -143,21 +141,21 @@ nvidia-cuda-cupti-cu12>=12.9.79 ; sys_platform == 'linux'
 nvidia-cuda-nvcc-cu12>=12.9.86 ; sys_platform == 'linux'
 nvidia-cuda-nvrtc-cu12>=12.9.86 ; sys_platform == 'linux'
 nvidia-cuda-runtime-cu12>=12.9.79 ; sys_platform == 'linux'
-nvidia-cudnn-cu12>=9.14.0.64 ; sys_platform == 'linux'
+nvidia-cudnn-cu12>=9.16.0.29 ; sys_platform == 'linux'
 nvidia-cufft-cu12>=11.4.1.4 ; sys_platform == 'linux'
 nvidia-cusolver-cu12>=11.7.5.82 ; sys_platform == 'linux'
 nvidia-cusparse-cu12>=12.5.10.65 ; sys_platform == 'linux'
-nvidia-nccl-cu12>=2.28.3 ; sys_platform == 'linux'
+nvidia-nccl-cu12>=2.28.9 ; sys_platform == 'linux'
 nvidia-nvjitlink-cu12>=12.9.86 ; sys_platform == 'linux'
 nvidia-nvshmem-cu12>=3.4.5 ; sys_platform == 'linux'
 oauthlib>=3.3.1
 omegaconf>=2.3.0
 opentelemetry-api>=1.38.0
 opt-einsum>=3.4.0
 optax>=0.2.6
-optree>=0.17.0
+optree>=0.18.0
 optype>=0.14.0
-orbax-checkpoint>=0.11.26
+orbax-checkpoint>=0.11.28
 packaging>=25.0
 pandas>=2.3.3
 parameterized>=0.9.0
@@ -167,26 +165,26 @@ pillow>=12.0.0
 platformdirs>=4.5.0
 pluggy>=1.6.0
 portpicker>=1.6.0
-pre-commit>=4.3.0
+pre-commit>=4.5.0
 prometheus-client>=0.23.1
 promise>=2.3
 propcache>=0.4.1
 proto-plus>=1.26.1
 protobuf>=5.29.5
-psutil>=7.1.0
+psutil>=7.1.3
 pyarrow>=22.0.0
 pyasn1-modules>=0.4.2
 pyasn1>=0.6.1
 pycnite>=2024.7.31
 pycryptodomex>=3.23.0
-pydantic-core>=2.41.4
-pydantic>=2.12.3
+pydantic-core>=2.41.5
+pydantic>=2.12.5
 pydot>=4.0.1
 pyelftools>=0.32
 pyglove>=0.4.5
 pygments>=2.19.2
 pyink>=24.10.1
-pylint>=4.0.2
+pylint>=4.0.3
 pyparsing>=3.2.5
 pyproject-hooks>=1.2.0
 pytest-xdist>=3.8.0
@@ -195,15 +193,15 @@ python-dateutil>=2.9.0.post0
 pytype>=2024.10.11
 pytz>=2025.2
 pyyaml>=6.0.3
-qwix>=0.1.1
-regex>=2025.10.23
+qwix>=0.1.4
+regex>=2025.11.3
 requests-oauthlib>=2.0.0
 requests>=2.32.5
 rich>=14.2.0
 rsa>=4.9.1
-safetensors>=0.6.2
-scipy-stubs>=1.16.2.4
-scipy>=1.16.2
+safetensors>=0.7.0
+scipy-stubs>=1.16.3.0
+scipy>=1.16.3
 sentencepiece>=0.2.1
 seqio>=0.0.20
 setuptools>=80.9.0
@@ -214,7 +212,7 @@ simplejson>=3.20.2
 six>=1.17.0
 sniffio>=1.3.1
 sortedcontainers>=2.4.0
-starlette>=0.48.0
+starlette>=0.50.0
 sympy>=1.14.0
 tabulate>=0.9.0
 tenacity>=9.1.2
@@ -226,35 +224,34 @@ tensorflow-datasets>=4.9.9
 tensorflow-metadata>=1.17.2
 tensorflow-text>=2.19.0
 tensorflow>=2.19.1
-tensorstore>=0.1.78
-termcolor>=3.1.0
+tensorstore>=0.1.79
+termcolor>=3.2.0
 tiktoken>=0.12.0
-tokamax>=0.0.4
+tokamax>=0.0.8
 tokenizers>=0.22.1
 toml>=0.10.2
 tomlkit>=0.13.3
 toolz>=1.1.0
 tqdm>=4.67.1
-transformer-engine-cu12>=2.8.0
-transformer-engine-jax>=2.8.0
-transformer-engine>=2.8.0
-transformers>=4.57.1
+transformer-engine-cu12>=2.9.0
+transformer-engine-jax>=2.9.0
+transformer-engine>=2.9.0
+transformers>=4.57.3
 treescope>=0.1.10
-triton>=3.5.0
 typeguard>=2.13.3
 typing-extensions>=4.15.0
 typing-inspection>=0.4.2
 tzdata>=2025.2
 uritemplate>=4.2.0
 urllib3>=2.5.0
 uvicorn>=0.38.0
-virtualenv>=20.35.3
+virtualenv>=20.35.4
 wadler-lindig>=0.1.7
 websockets>=15.0.1
 werkzeug>=3.1.3
 wheel>=0.45.1
-wrapt>=2.0.0
-xprof>=2.20.7
+wrapt>=2.0.1
+xprof>=2.21.1
 xxhash>=3.6.0
 yarl>=1.22.0
 zipp>=3.23.0