Merge pull request #3394 from AI-Hypercomputer:post-training-ci-gate

Google-ML-Automation · Google-ML-Automation · commit 086c50d298ca · 2026-03-16T16:30:18.000-07:00
PiperOrigin-RevId: 884691170
diff --git a/.github/workflows/build_and_test_maxtext.yml b/.github/workflows/build_and_test_maxtext.yml
@@ -133,7 +133,7 @@ jobs:
       device_name: X64
       cloud_runner: linux-x86-n2-16
       image_type: ${{ matrix.image_type }}
-      pytest_marker: 'cpu_only'
+      pytest_marker: 'cpu_only and not post_training'
       xla_python_client_mem_fraction: 0.75
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
@@ -155,7 +155,7 @@ jobs:
       device_name: v6e-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-ct6e-180-4tpu
-      pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
+      pytest_marker: 'not cpu_only and not gpu_only and not integration_test and not post_training'
       xla_python_client_mem_fraction: 0.75
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
@@ -175,7 +175,7 @@ jobs:
       device_name: v6e-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-ct6e-180-4tpu
-      pytest_marker: 'not cpu_only and not gpu_only and integration_test'
+      pytest_marker: 'not cpu_only and not gpu_only and integration_test and not post_training'
       xla_python_client_mem_fraction: 0.75
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
@@ -195,7 +195,7 @@ jobs:
       device_name: v6e-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-ct6e-180-4tpu
-      pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
+      pytest_marker: 'not cpu_only and not gpu_only and not integration_test and not post_training'
       xla_python_client_mem_fraction: 0.75
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
@@ -215,7 +215,7 @@ jobs:
       device_name: v6e-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-ct6e-180-4tpu
-      pytest_marker: 'not cpu_only and not gpu_only and integration_test'
+      pytest_marker: 'not cpu_only and not gpu_only and integration_test and not post_training'
       xla_python_client_mem_fraction: 0.75
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
@@ -236,13 +236,57 @@ jobs:
       device_name: a100-40gb-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-a2-48-a100-4gpu
-      pytest_marker: 'not cpu_only and not tpu_only and not integration_test'
+      pytest_marker: 'not cpu_only and not tpu_only and not integration_test and not post_training'
       xla_python_client_mem_fraction: 0.65
       tf_force_gpu_allow_growth: true
       container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"
       is_scheduled_run: ${{ github.event_name == 'schedule' }}
       maxtext_sha: ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
 
+  maxtext_post_training_cpu_unit_tests:
+    needs: build_and_upload_maxtext_package
+    if: needs.doc_only_check.outputs.run_tests == 'true'
+    uses: ./.github/workflows/run_tests_against_package.yml
+    strategy:
+        fail-fast: false
+        matrix:
+          image_type: ["py312"]
+    with:
+      device_type: cpu
+      device_name: X64
+      cloud_runner: linux-x86-n2-16
+      image_type: ${{ matrix.image_type }}
+      pytest_marker: 'cpu_only'
+      pytest_addopts: 'tests/post_training/unit'
+      xla_python_client_mem_fraction: 0.75
+      tf_force_gpu_allow_growth: false
+      container_resource_option: "--privileged"
+      is_scheduled_run: ${{ github.event_name == 'schedule' }}
+      extra_pip_deps_file: 'src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
+      maxtext_sha: ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
+
+  maxtext_post_training_tpu_unit_tests:
+    needs: build_and_upload_maxtext_package
+    if: needs.doc_only_check.outputs.run_tests == 'true'
+    uses: ./.github/workflows/run_tests_against_package.yml
+    strategy:
+        fail-fast: false
+        matrix:
+          image_type: ["py312"]
+    with:
+      device_type: tpu
+      device_name: v6e-4
+      image_type: ${{ matrix.image_type }}
+      cloud_runner: linux-x86-ct6e-180-4tpu
+      pytest_marker: 'tpu_only'
+      pytest_addopts: 'tests/post_training/unit'
+      xla_python_client_mem_fraction: 0.75
+      tf_force_gpu_allow_growth: false
+      container_resource_option: "--privileged"
+      is_scheduled_run: ${{ github.event_name == 'schedule' }}
+      extra_pip_deps_file: 'src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
+      maxtext_sha: ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
+
   maxtext_gpu_integration_tests:
     needs: build_and_upload_maxtext_package
     if: needs.doc_only_check.outputs.run_tests == 'true'
@@ -257,7 +301,7 @@ jobs:
       device_name: a100-40gb-4
       image_type: ${{ matrix.image_type }}
       cloud_runner: linux-x86-a2-48-a100-4gpu
-      pytest_marker: 'not cpu_only and not tpu_only and integration_test'
+      pytest_marker: 'not cpu_only and not tpu_only and integration_test and not post_training'
       xla_python_client_mem_fraction: 0.65
       tf_force_gpu_allow_growth: true
       container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"
@@ -266,7 +310,7 @@ jobs:
 
   all_tests_passed:
     name: All Required Tests Passed
-    needs: [doc_only_check, build_and_upload_maxtext_package, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests]
+    needs: [doc_only_check, build_and_upload_maxtext_package, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests, maxtext_post_training_cpu_unit_tests, maxtext_post_training_tpu_unit_tests]
     if: always()
     runs-on: ubuntu-latest
     steps:
@@ -287,6 +331,8 @@ jobs:
           echo "TPU pathways integration: ${NEEDS_MAXTEXT_TPU_PATHWAYS_INTEGRATION_TESTS_RESULT}"
           echo "GPU tests: ${NEEDS_MAXTEXT_GPU_UNIT_TESTS_RESULT}"
           echo "GPU integration: ${NEEDS_MAXTEXT_GPU_INTEGRATION_TESTS_RESULT}"
+          echo "Post-training CPU tests: ${NEEDS_MAXTEXT_POST_TRAINING_CPU_UNIT_TESTS_RESULT}"
+          echo "Post-training TPU tests: ${NEEDS_MAXTEXT_POST_TRAINING_TPU_UNIT_TESTS_RESULT}"
 
           # Fail only if any job failed or was cancelled (skipped is OK)
           if [ "${{ contains(needs.*.result, 'failure') }}" == "true" ] || [ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]; then
@@ -305,6 +351,8 @@ jobs:
           NEEDS_MAXTEXT_TPU_PATHWAYS_INTEGRATION_TESTS_RESULT: ${{ needs.maxtext_tpu_pathways_integration_tests.result }}
           NEEDS_MAXTEXT_GPU_UNIT_TESTS_RESULT: ${{ needs.maxtext_gpu_unit_tests.result }}
           NEEDS_MAXTEXT_GPU_INTEGRATION_TESTS_RESULT: ${{ needs.maxtext_gpu_integration_tests.result }}
+          NEEDS_MAXTEXT_POST_TRAINING_CPU_UNIT_TESTS_RESULT: ${{ needs.maxtext_post_training_cpu_unit_tests.result }}
+          NEEDS_MAXTEXT_POST_TRAINING_TPU_UNIT_TESTS_RESULT: ${{ needs.maxtext_post_training_tpu_unit_tests.result }}
 
   all_notebooks_passed:
     name: All Notebooks Passed
@@ -337,7 +385,7 @@ jobs:
 
   notify_failure:
     name: Notify failed build # creates an issue or modifies last open existing issue for failed build
-    needs: [maxtext_jupyter_notebooks, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests]
+    needs: [maxtext_jupyter_notebooks, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests, maxtext_post_training_cpu_unit_tests, maxtext_post_training_tpu_unit_tests]
     if: ${{ always() }}
     runs-on: ubuntu-latest
     permissions:
diff --git a/.github/workflows/run_tests_against_package.yml b/.github/workflows/run_tests_against_package.yml
@@ -61,6 +61,10 @@ on:
       maxtext_sha:
         required: true
         type: string
+      extra_pip_deps_file:
+        required: false
+        type: string
+        default: ''
 
 permissions:
   contents: read
@@ -96,6 +100,12 @@ jobs:
           python3 --version
           python3 -m pip freeze
           uv pip install pytest-cov
+      - name: Install extra pip deps
+        if: inputs.extra_pip_deps_file != ''
+        shell: bash
+        run: |
+          source .venv/bin/activate
+          uv pip install -r ${{ inputs.extra_pip_deps_file }}
       - name: Copy test assets files
         run : gcloud storage cp gs://maxtext-test-assets/* tests/assets
       - name: Run Tests
diff --git a/pytest.ini b/pytest.ini
@@ -5,7 +5,7 @@ testpaths =
 python_files = *_test.py *_tests.py
 addopts =
     -rf --import-mode=importlib --strict-markers
-    --ignore=tests/integration/grpo_trainer_correctness_test.py
+    --ignore=tests/post_training/integration/grpo_trainer_correctness_test.py
     --ignore=tests/integration/smoke/train_gpu_smoke_test.py
     --ignore=tests/integration/smoke/train_int8_smoke_test.py
     --ignore=tests/integration/smoke/train_smoke_test.py
@@ -36,4 +36,5 @@ markers =
                       e.g., end_to_end tests
     external_serving: JetStream / serving / decode server components
     external_training: goodput integrations
+    post_training: marks tests for post-training code paths.
 
diff --git a/src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt b/src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt
@@ -0,0 +1 @@
+google-tunix @ https://github.com/google/tunix/archive/336d102fe32ca0edbe42a8f66ff0fd533cebdf52.zip
diff --git a/src/install_maxtext_extra_deps/extra_post_train_deps_from_github.txt b/src/install_maxtext_extra_deps/extra_post_train_deps_from_github.txt
@@ -1,5 +1,5 @@
+-r extra_post_train_base_deps_from_github.txt
 google-jetstream @ https://github.com/AI-Hypercomputer/JetStream/archive/29329e8e73820993f77cfc8efe34eb2a73f5de98.zip
-google-tunix @ https://github.com/google/tunix/archive/336d102fe32ca0edbe42a8f66ff0fd533cebdf52.zip
 mlperf-logging @ https://github.com/mlcommons/logging/archive/38ab22670527888c8eb7825a4ece176fcc36a95d.zip
 tpu-inference @ https://github.com/vllm-project/tpu-inference/archive/0cae84fc9a883ba1bde02d4f07930e6af9e92958.zip
 vllm @ git+https://github.com/vllm-project/vllm@ee8a29511fc69e3f0f6291fa6ff1cf6e47f7750d
diff --git a/tests/post_training/integration/grpo_correctness.py b/tests/post_training/integration/grpo_correctness.py
@@ -35,7 +35,7 @@
 
 from trl import GRPOConfig, GRPOTrainer
 
-pytestmark = [pytest.mark.external_training]  # uses pre-generated checkpoint
+pytestmark = [pytest.mark.external_training, pytest.mark.post_training]  # uses pre-generated checkpoint
 
 
 class GRPOTest(unittest.TestCase):
diff --git a/tests/post_training/integration/grpo_trainer_correctness_test.py b/tests/post_training/integration/grpo_trainer_correctness_test.py
@@ -22,7 +22,7 @@
   from maxtext/tests/assets/logits_generation/generate_grpo_golden_logits.py
 
 Usage:
-  pytest tests/integration/grpo_trainer_correctness_test.py
+  pytest tests/post_training/integration/grpo_trainer_correctness_test.py
 """
 
 import os
@@ -52,7 +52,7 @@
 import transformers
 
 # This test is for serving pathways via offline_engine and maxengine.
-pytestmark = [pytest.mark.external_training]
+pytestmark = [pytest.mark.external_training, pytest.mark.post_training]
 
 
 def get_golden_data(config):
diff --git a/tests/post_training/integration/sft_trainer_correctness_test.py b/tests/post_training/integration/sft_trainer_correctness_test.py
@@ -21,7 +21,7 @@
 
 Usage:
 
-  pytest tests/integration/sft_trainer_correctness_test.py
+  pytest tests/post_training/integration/sft_trainer_correctness_test.py
 """
 
 import os.path
@@ -46,6 +46,8 @@
 import pytest
 from transformers import AutoTokenizer
 
+pytestmark = [pytest.mark.post_training]
+
 
 def get_golden_data(model_name):
   """Get the golden data for sft_trainer from maxtext/tests/assets/logits_generation/generate_sft_golden_data.py."""
diff --git a/tests/post_training/unit/distillation_checkpointing_test.py b/tests/post_training/unit/distillation_checkpointing_test.py
@@ -17,7 +17,7 @@
 import pytest
 
 pytest.importorskip("tunix")
-pytestmark = [pytest.mark.tpu_only]
+pytestmark = [pytest.mark.tpu_only, pytest.mark.post_training]
 
 import json
 import os
diff --git a/tests/post_training/unit/distillation_data_processing_test.py b/tests/post_training/unit/distillation_data_processing_test.py
@@ -14,6 +14,10 @@
 
 """Data processing tests for distillation."""
 
+import pytest
+
+pytestmark = [pytest.mark.post_training, pytest.mark.cpu_only]
+
 import argparse
 import os
 import subprocess
diff --git a/tests/post_training/unit/rl_utils_test.py b/tests/post_training/unit/rl_utils_test.py
@@ -18,6 +18,8 @@
 import pytest
 from types import SimpleNamespace
 
+pytestmark = [pytest.mark.post_training]
+
 evaluate_rl = pytest.importorskip(
     "maxtext.trainers.post_train.rl.evaluate_rl",
     reason="tunix (required by evaluate_rl) is not installed GPU",
diff --git a/tests/post_training/unit/sft_data_processing_test.py b/tests/post_training/unit/sft_data_processing_test.py
@@ -13,6 +13,10 @@
 # limitations under the License.
 
 """Data processing tests for SFT."""
+import pytest
+
+pytestmark = [pytest.mark.post_training, pytest.mark.cpu_only]
+
 import subprocess
 import unittest
 import os.path
diff --git a/tests/post_training/unit/sft_hooks_test.py b/tests/post_training/unit/sft_hooks_test.py
@@ -16,7 +16,7 @@
 import pytest
 
 pytest.importorskip("tunix")
-pytestmark = [pytest.mark.tpu_only, pytest.mark.external_training]
+pytestmark = [pytest.mark.tpu_only, pytest.mark.external_training, pytest.mark.post_training]
 
 import jax
 
diff --git a/tests/post_training/unit/train_distill_test.py b/tests/post_training/unit/train_distill_test.py
@@ -18,7 +18,7 @@
 import pytest
 
 pytest.importorskip("tunix")
-pytestmark = [pytest.mark.tpu_only]
+pytestmark = [pytest.mark.tpu_only, pytest.mark.post_training]
 
 import shutil
 import tempfile
diff --git a/tests/post_training/unit/train_rl_test.py b/tests/post_training/unit/train_rl_test.py
@@ -21,6 +21,8 @@
 import jax
 
 
+pytestmark = [pytest.mark.post_training]
+
 # Same as in rl_utils_test.py.
 train_rl = pytest.importorskip(
     "maxtext.trainers.post_train.rl.train_rl",

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+google-tunix @ https://github.com/google/tunix/archive/336d102fe32ca0edbe42a8f66ff0fd533cebdf52.zip`