AI-Hypercomputer
diff --git a/‎.github/workflows/run_jupyter_notebooks.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run_jupyter_notebooks.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codecov.yml‎
Lines changed: 2 additions & 2 deletions b/‎codecov.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dependencies/requirements/requirements_decoupled_jax_0_7.1.txt‎
Lines changed: 3 additions & 2 deletions b/‎dependencies/requirements/requirements_decoupled_jax_0_7.1.txt‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/guides/run_python_notebook.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/guides/run_python_notebook.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/run_maxtext/decoupled_mode.md‎
Lines changed: 34 additions & 28 deletions b/‎docs/run_maxtext/decoupled_mode.md‎
Lines changed: 34 additions & 28 deletions
diff --git a/‎docs/tutorials/first_run.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/tutorials/first_run.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tutorials/posttraining/multimodal.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/tutorials/posttraining/multimodal.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/MaxText/configs/decoupled_base_test.yml‎
Lines changed: 9 additions & 13 deletions b/‎src/MaxText/configs/decoupled_base_test.yml‎
Lines changed: 9 additions & 13 deletions
diff --git a/‎src/MaxText/configs/rl.yml‎
Lines changed: 1 addition & 1 deletion b/‎src/MaxText/configs/rl.yml‎
Lines changed: 1 addition & 1 deletion
@@ -91,7 +91,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           MAXTEXT_REPO_ROOT=$(pwd)
-          MAXTEXT_NOTEBOOKS_ROOT="$MAXTEXT_REPO_ROOT/src/MaxText/examples"
+          MAXTEXT_NOTEBOOKS_ROOT="$MAXTEXT_REPO_ROOT/src/maxtext/examples"
 
           for notebook in "$MAXTEXT_NOTEBOOKS_ROOT"/{sft,rl}*.ipynb; do
             filename=$(basename "$notebook")
 
@@ -35,15 +35,15 @@ Check out our [Read The Docs site](https://maxtext.readthedocs.io/en/latest/) or
 See our installation guide to [install MaxText with pip from PyPI](https://maxtext.readthedocs.io/en/latest/install_maxtext.html#from-pypi-recommended).
 
 ## Decoupled mode
-See our guide on running MaxText in decoupled mode, without any GCP dependencies in [Decoupled Mode Guide](https://maxtext.readthedocs.io/en/latest/guides/run_maxtext/decoupled_mode.html).
+See our guide on running MaxText in decoupled mode, without any GCP dependencies in [Decoupled Mode Guide](https://maxtext.readthedocs.io/en/latest/run_maxtext/decoupled_mode.html).
 
 <!-- NEWS START -->
 
 ## 🔥 Latest news 🔥
 
 * \[December 22, 2025\] [Muon optimizer](https://kellerjordan.github.io/posts/muon) is now supported.
 * \[December 10, 2025\] DeepSeek V3.1 is now supported. Use existing configs for [DeepSeek V3 671B](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/configs/models/deepseek3-671b.yml) and load in V3.1 checkpoint to use model.
-* \[December 9, 2025\] [New RL and SFT Notebook tutorials](https://github.com/AI-Hypercomputer/maxtext/tree/main/src/MaxText/examples) are available.
+* \[December 9, 2025\] [New RL and SFT Notebook tutorials](https://github.com/AI-Hypercomputer/maxtext/tree/main/src/maxtext/examples) are available.
 * \[December 4, 2025\] The [ReadTheDocs documentation site](https://maxtext.readthedocs.io/en/latest/index.html) has been reorganized.
 * \[December 3, 2025\] Multi-host support for GSPO and GRPO is now available via [new RL tutorials](https://maxtext.readthedocs.io/en/latest/tutorials/posttraining/rl_on_multi_host.html).
 * \[November 20, 2025\] A new guide, [What is Post Training in MaxText?](https://maxtext.readthedocs.io/en/latest/tutorials/post_training_index.html), is now available.
 
@@ -34,7 +34,7 @@ fixes:
 ignore:
   - "src/maxtext/assets"
   - "src/MaxText/configs"
-  - "src/MaxText/examples"
+  - "src/maxtext/examples"
   - "src/MaxText/experimental"
   - "src/maxtext/inference"
   - "src/maxtext/scratch_code"
@@ -65,7 +65,7 @@ coverage:
     patch:
       default:
         target: auto
-        threshold: 5% # fail on 5+ percent degradation
+        threshold: 10% # fail on 10+ percent degradation
         flags:
           - regular
 
@@ -8,6 +8,7 @@ flax
 grain>=0.2.12
 grpcio>=1.75.1
 huggingface_hub>=0.35.3
+jax==0.7.1
 jaxtyping>=0.3.3
 jsonlines>=4.0.0
 matplotlib>=3.10.3
@@ -19,6 +20,7 @@ omegaconf>=2.3.0
 optax>=0.2.6
 orbax-checkpoint>=0.11.25
 pandas>=2.3.3
+parameterized==0.9.0
 pathwaysutils>=0.1.3
 pillow>=11.3.0
 protobuf>=5.29.5
@@ -39,5 +41,4 @@ tiktoken>=0.12.0
 tqdm>=4.67.1
 transformers>=4.57.0
 urllib3>=2.5.0
-jax==0.7.1
-git+https://github.com/google/tunix.git
+git+https://github.com/google/tunix.git
@@ -43,7 +43,7 @@ Before proceeding, please verify that the specific notebook you are running work
 
 ### Step 1: Choose an Example
 
-1.a. Visit the [MaxText examples directory](https://github.com/AI-Hypercomputer/maxtext/tree/main/src/MaxText/examples) on Github.
+1.a. Visit the [MaxText examples directory](https://github.com/AI-Hypercomputer/maxtext/tree/main/src/maxtext/examples) on Github.
 
 1.b. Find the notebook you want to run (e.g., `sft_qwen3_demo.ipynb`) and copy its URL.
 
 
@@ -14,38 +14,40 @@
  limitations under the License.
 -->
 
-
 # Via Decoupled Mode (No Google Cloud Dependencies)
 
 Set `DECOUPLE_GCLOUD=TRUE` to run MaxText tests and local development without any Google Cloud SDK, `gs://` buckets, JetStream, or Vertex AI integrations.
 
 When enabled:
-* Skips external integration tests with markers:
-  * `external_serving` (`jetstream`, `serving`, `decode_server`)
-  * `external_training` (`goodput`)
-* `decoupled` – Applied by `tests/conftest.py` to tests that are runnable in decoupled mode (i.e. not skipped for TPU or external markers).
-* Production / serving entrypoints (`decode.py`, `maxengine_server.py`, `maxengine_config.py`, tokenizer access in `maxengine.py`) **fail fast with a clear RuntimeError** when decoupled. This prevents accidentally running partial serving logic locally when decoupled mode is ON.
-* Import-time safety is preserved by lightweight stubs returned from `decouple.py` (so modules import cleanly); only active use of missing functionality raises.
-* Conditionally replaces dataset paths in certain tests to point at minimal local datasets.
-* Uses a local base output directory (users can override with `LOCAL_BASE_OUTPUT`).
-* All tests that previously hard-coded `configs/base.yml` now use the helper `get_test_config_path()` from `tests/utils/test_helper.py`. This helper ensures usage of `decoupled_base_test.yml`.
+
+- Skips external integration tests with markers:
+  - `external_serving` (`jetstream`, `serving`, `decode_server`)
+  - `external_training` (`goodput`)
+- `decoupled` – Applied by `tests/conftest.py` to tests that are runnable in decoupled mode (i.e. not skipped for TPU or external markers).
+- Production / serving entrypoints (`decode.py`, `maxengine_server.py`, `maxengine_config.py`, tokenizer access in `maxengine.py`) **fail fast with a clear RuntimeError** when decoupled. This prevents accidentally running partial serving logic locally when decoupled mode is ON.
+- Import-time safety is preserved by lightweight stubs returned from `decouple.py` (so modules import cleanly); only active use of missing functionality raises.
+- Conditionally replaces dataset paths in certain tests to point at minimal local datasets.
+- Uses a local base output directory (users can override with `LOCAL_BASE_OUTPUT`).
+- All tests that previously hard-coded `configs/base.yml` now use the helper `get_test_config_path()` from `tests/utils/test_utils.py`. This helper ensures usage of `decoupled_base_test.yml`.
 
 Minimal datasets included (checked into the repo):
-* ArrayRecord shards: generated via `python local_datasets/get_minimal_c4_en_dataset.py`, 
+
+- ArrayRecord shards: generated via `python local_datasets/get_minimal_c4_en_dataset.py`,
   located in `local_datasets/c4_en_dataset_minimal/c4/en/3.0.1/c4-{train,validation}.array_record-*`
-* Parquet (HF style): generated via `python local_datasets/get_minimal_hf_c4_parquet.py`, 
+- Parquet (HF style): generated via `python local_datasets/get_minimal_hf_c4_parquet.py`,
   located in `local_datasets/c4_en_dataset_minimal/hf/c4`
 
-
 Run a local smoke test fully offline:
+
 ```bash
 export DECOUPLE_GCLOUD=TRUE
 pytest -k train_gpu_smoke_test -q
 ```
 
 Optional environment variables:
-* `LOCAL_GCLOUD_PROJECT` - placeholder project string (default: `local-maxtext-project`).
-* `LOCAL_BASE_OUTPUT` - override default local output directory used in tests.
+
+- `LOCAL_GCLOUD_PROJECT` - placeholder project string (default: `local-maxtext-project`).
+- `LOCAL_BASE_OUTPUT` - override default local output directory used in tests.
 
 ## Centralized Decoupling API (`gcloud_stub.py`)
 
@@ -55,32 +57,36 @@ MaxText exposes a single module `MaxText.gcloud_stub` to avoid scattering enviro
 from MaxText.gcloud_stub import is_decoupled, cloud_diagnostics, jetstream
 
 if is_decoupled():
-  # Skip optional integrations or use local fallbacks
-  pass
+    # Skip optional integrations or use local fallbacks
+    pass
 
 # Cloud diagnostics (returns diagnostic, debug_configuration, diagnostic_configuration, stack_trace_configuration)
-diagnostic, debug_configuration, diagnostic_configuration, stack_trace_configuration = cloud_diagnostics()
+diagnostic, debug_configuration, diagnostic_configuration, stack_trace_configuration = (
+    cloud_diagnostics()
+)
 
 # JetStream (serving) components
 config_lib, engine_api, token_utils, tokenizer_api, token_params_ns = jetstream()
 TokenizerParameters = getattr(token_params_ns, "TokenizerParameters", object)
 ```
 
 Behavior when `DECOUPLE_GCLOUD=TRUE`:
-* `is_decoupled()` returns True.
-* Each helper returns lightweight stubs whose attributes are safe to access; calling methods raises a clear `RuntimeError` only when actually invoked.
-* Prevents import-time failures for optional dependencies (JetStream).
+
+- `is_decoupled()` returns True.
+- Each helper returns lightweight stubs whose attributes are safe to access; calling methods raises a clear `RuntimeError` only when actually invoked.
+- Prevents import-time failures for optional dependencies (JetStream).
 
 ## Guidelines:
-* Prefer calling `jetstream()` / `cloud_diagnostics()` once at module import and branching on `is_decoupled()` for functionality that truly requires the dependency.
-* Use `is_decoupled()` to avoid direct `os.environ["DECOUPLE_GCLOUD"]` checking.
-* Use `get_test_config_path()` instead of hard-coded `base.yml`.
-* Prefer conditional local fallbacks for cloud buckets and avoid introducing direct `gs://...` paths.
-* Please add the appropriate external dependency marker (`external_serving` or `external_training`) for new tests. Prefer the smallest scope instead of module-wide `pytestmark` when only a part of a file needs an external dependency.
-* Tests add a `decoupled` marker if DECOUPLE_GCLOUD && not marked with external dependency markers. Run tests with:
+
+- Prefer calling `jetstream()` / `cloud_diagnostics()` once at module import and branching on `is_decoupled()` for functionality that truly requires the dependency.
+- Use `is_decoupled()` to avoid direct `os.environ["DECOUPLE_GCLOUD"]` checking.
+- Use `get_test_config_path()` instead of hard-coded `base.yml`.
+- Prefer conditional local fallbacks for cloud buckets and avoid introducing direct `gs://...` paths.
+- Please add the appropriate external dependency marker (`external_serving` or `external_training`) for new tests. Prefer the smallest scope instead of module-wide `pytestmark` when only a part of a file needs an external dependency.
+- Tests add a `decoupled` marker if DECOUPLE_GCLOUD && not marked with external dependency markers. Run tests with:
+
 ```
 pytest -m decoupled -vv tests
 ```
 
 This centralized approach keeps optional integrations cleanly separated from core MaxText logic, making local development (e.g. on ROCm/NVIDIA GPUs) frictionless.
-
 
@@ -75,7 +75,7 @@ In the same TPU VM where you just installed all the dependencies of MaxText, You
 
 #### Decoding in MaxText via notebook
 
-You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/utils/ckpt_conversion/to_maxtext.py) to create a MaxText/Orbax checkpoint from a Huggingface checkpoint.
+You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/utils/ckpt_conversion/to_maxtext.py) to create a MaxText/Orbax checkpoint from a Huggingface checkpoint.
 
 ### Run MaxText on NVIDIA GPUs
 
 
@@ -6,7 +6,7 @@ This document provides a guide to use the multimodal functionalities in MaxText
 - **Multimodal Decode**: Inference with text+images as input.
 - **Supervised Fine-Tuning (SFT)**: Apply SFT to the model using a visual-question-answering dataset.
 
-We also provide a [colab](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/MaxText/examples/multimodal_gemma3_demo.ipynb) for multimodal features demonstration. The following table provides a list of models and modalities we currently support:
+We also provide a [colab](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/multimodal_gemma3_demo.ipynb) for multimodal features demonstration. The following table provides a list of models and modalities we currently support:
 
 | Models                                         | Input Modalities | Output Modalities |
 | :--------------------------------------------- | :--------------- | :---------------- |
 
@@ -1,9 +1,9 @@
 # Decoupled base test config: used when DECOUPLE_GCLOUD=TRUE for tests that previously relied on base.yml.
-# Inherit all model defaults from base.yml but override any cloud-coupled paths and disable optional cloud features.
-base_config: base.yml
+# Inherit all model defaults (PyDantic already does this) but override any cloud-coupled paths and disable 
+# optional cloud features.
 
 # Output goes to a local relative directory so tests do not require GCS.
-base_output_directory: ./maxtext_local_output
+base_output_directory: ./maxtext_local_output/gcloud_decoupled_test_logs
 run_name: test_decoupled
 
 # Disable checkpointing by default for speed unless a test explicitly enables it.
@@ -23,7 +23,9 @@ profile_periodically_period: 0
 profiler_steps: 0
 
 # Leave dataset-related keys to be overridden by individual tests.
-dataset_type: ""
+dataset_path: "tests/assets/local_datasets/c4_en_dataset_minimal/"
+dataset_name: 'c4/en:3.1.0'
+eval_dataset_name: 'c4/en:3.1.0'
 
 # Use dot_product attention to avoid GPU Pallas shared memory limits on AMD GPUs
 attention: "dot_product"
@@ -44,6 +46,8 @@ ici_tensor_sequence_parallelism: 1
 ici_autoregressive_parallelism: 1
 ici_fsdp_parallelism: 1
 ici_fsdp_transpose_parallelism: 1
+# Allow higher unsharded parameter percentage for small device count
+sharding_tolerance: 0.3
 
 # DCN dimensions to 1 (no multi-slice expectation locally).
 dcn_data_parallelism: 1
@@ -68,12 +72,4 @@ goodput_upload_interval_seconds: 0
 enable_pathways_goodput: false
 enable_gcp_goodput_metrics: false
 
-# Disable any cloud logging / BigQuery or external metric uploads.
-enable_cloud_logging: false
-upload_metrics_to_bigquery: false
-bigquery_project: ""
-bigquery_dataset: ""
-bigquery_table: ""
-
-# Force local-only behavior for tests: avoid accidental env pickup.
-tensorboard_dir: "./maxtext_local_output/tensorboard"
+tensorboard_dir: "./maxtext_local_output/gcloud_decoupled_test_logs/tensorboard"
@@ -171,7 +171,7 @@ reasoning_start_token: '<reasoning>'
 reasoning_end_token: '</reasoning>'
 solution_start_token: '<answer>'
 solution_end_token: '</answer>'
-chat_template_path: 'src/MaxText/examples/chat_templates/gsm8k_rl.json'
+chat_template_path: 'src/maxtext/examples/chat_templates/gsm8k_rl.json'
 skip_jax_distributed_system: True
 
 # # TODO(@mazumdera): fix this