@@ -133,7 +133,7 @@ jobs:
133133 device_name : X64
134134 cloud_runner : linux-x86-n2-16
135135 image_type : ${{ matrix.image_type }}
136- pytest_marker : ' cpu_only'
136+ pytest_marker : ' cpu_only and not post_training '
137137 xla_python_client_mem_fraction : 0.75
138138 tf_force_gpu_allow_growth : false
139139 container_resource_option : " --privileged"
@@ -155,7 +155,7 @@ jobs:
155155 device_name : v6e-4
156156 image_type : ${{ matrix.image_type }}
157157 cloud_runner : linux-x86-ct6e-180-4tpu
158- pytest_marker : ' not cpu_only and not gpu_only and not integration_test'
158+ pytest_marker : ' not cpu_only and not gpu_only and not integration_test and not post_training '
159159 xla_python_client_mem_fraction : 0.75
160160 tf_force_gpu_allow_growth : false
161161 container_resource_option : " --privileged"
@@ -175,7 +175,7 @@ jobs:
175175 device_name : v6e-4
176176 image_type : ${{ matrix.image_type }}
177177 cloud_runner : linux-x86-ct6e-180-4tpu
178- pytest_marker : ' not cpu_only and not gpu_only and integration_test'
178+ pytest_marker : ' not cpu_only and not gpu_only and integration_test and not post_training '
179179 xla_python_client_mem_fraction : 0.75
180180 tf_force_gpu_allow_growth : false
181181 container_resource_option : " --privileged"
@@ -195,7 +195,7 @@ jobs:
195195 device_name : v6e-4
196196 image_type : ${{ matrix.image_type }}
197197 cloud_runner : linux-x86-ct6e-180-4tpu
198- pytest_marker : ' not cpu_only and not gpu_only and not integration_test'
198+ pytest_marker : ' not cpu_only and not gpu_only and not integration_test and not post_training '
199199 xla_python_client_mem_fraction : 0.75
200200 tf_force_gpu_allow_growth : false
201201 container_resource_option : " --privileged"
@@ -215,7 +215,7 @@ jobs:
215215 device_name : v6e-4
216216 image_type : ${{ matrix.image_type }}
217217 cloud_runner : linux-x86-ct6e-180-4tpu
218- pytest_marker : ' not cpu_only and not gpu_only and integration_test'
218+ pytest_marker : ' not cpu_only and not gpu_only and integration_test and not post_training '
219219 xla_python_client_mem_fraction : 0.75
220220 tf_force_gpu_allow_growth : false
221221 container_resource_option : " --privileged"
@@ -236,13 +236,57 @@ jobs:
236236 device_name : a100-40gb-4
237237 image_type : ${{ matrix.image_type }}
238238 cloud_runner : linux-x86-a2-48-a100-4gpu
239- pytest_marker : ' not cpu_only and not tpu_only and not integration_test'
239+ pytest_marker : ' not cpu_only and not tpu_only and not integration_test and not post_training '
240240 xla_python_client_mem_fraction : 0.65
241241 tf_force_gpu_allow_growth : true
242242 container_resource_option : " --shm-size 2g --runtime=nvidia --gpus all --privileged"
243243 is_scheduled_run : ${{ github.event_name == 'schedule' }}
244244 maxtext_sha : ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
245245
246+ maxtext_post_training_cpu_unit_tests :
247+ needs : build_and_upload_maxtext_package
248+ if : needs.doc_only_check.outputs.run_tests == 'true'
249+ uses : ./.github/workflows/run_tests_against_package.yml
250+ strategy :
251+ fail-fast : false
252+ matrix :
253+ image_type : ["py312"]
254+ with :
255+ device_type : cpu
256+ device_name : X64
257+ cloud_runner : linux-x86-n2-16
258+ image_type : ${{ matrix.image_type }}
259+ pytest_marker : ' cpu_only'
260+ pytest_addopts : ' tests/post_training/unit'
261+ xla_python_client_mem_fraction : 0.75
262+ tf_force_gpu_allow_growth : false
263+ container_resource_option : " --privileged"
264+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
265+ extra_pip_deps_file : ' src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
266+ maxtext_sha : ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
267+
268+ maxtext_post_training_tpu_unit_tests :
269+ needs : build_and_upload_maxtext_package
270+ if : needs.doc_only_check.outputs.run_tests == 'true'
271+ uses : ./.github/workflows/run_tests_against_package.yml
272+ strategy :
273+ fail-fast : false
274+ matrix :
275+ image_type : ["py312"]
276+ with :
277+ device_type : tpu
278+ device_name : v6e-4
279+ image_type : ${{ matrix.image_type }}
280+ cloud_runner : linux-x86-ct6e-180-4tpu
281+ pytest_marker : ' tpu_only'
282+ pytest_addopts : ' tests/post_training/unit'
283+ xla_python_client_mem_fraction : 0.75
284+ tf_force_gpu_allow_growth : false
285+ container_resource_option : " --privileged"
286+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
287+ extra_pip_deps_file : ' src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
288+ maxtext_sha : ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
289+
246290 maxtext_gpu_integration_tests :
247291 needs : build_and_upload_maxtext_package
248292 if : needs.doc_only_check.outputs.run_tests == 'true'
@@ -257,7 +301,7 @@ jobs:
257301 device_name : a100-40gb-4
258302 image_type : ${{ matrix.image_type }}
259303 cloud_runner : linux-x86-a2-48-a100-4gpu
260- pytest_marker : ' not cpu_only and not tpu_only and integration_test'
304+ pytest_marker : ' not cpu_only and not tpu_only and integration_test and not post_training '
261305 xla_python_client_mem_fraction : 0.65
262306 tf_force_gpu_allow_growth : true
263307 container_resource_option : " --shm-size 2g --runtime=nvidia --gpus all --privileged"
@@ -266,7 +310,7 @@ jobs:
266310
267311 all_tests_passed :
268312 name : All Required Tests Passed
269- needs : [doc_only_check, build_and_upload_maxtext_package, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests]
313+ needs : [doc_only_check, build_and_upload_maxtext_package, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests, maxtext_post_training_cpu_unit_tests, maxtext_post_training_tpu_unit_tests ]
270314 if : always()
271315 runs-on : ubuntu-latest
272316 steps :
@@ -287,6 +331,8 @@ jobs:
287331 echo "TPU pathways integration: ${NEEDS_MAXTEXT_TPU_PATHWAYS_INTEGRATION_TESTS_RESULT}"
288332 echo "GPU tests: ${NEEDS_MAXTEXT_GPU_UNIT_TESTS_RESULT}"
289333 echo "GPU integration: ${NEEDS_MAXTEXT_GPU_INTEGRATION_TESTS_RESULT}"
334+ echo "Post-training CPU tests: ${NEEDS_MAXTEXT_POST_TRAINING_CPU_UNIT_TESTS_RESULT}"
335+ echo "Post-training TPU tests: ${NEEDS_MAXTEXT_POST_TRAINING_TPU_UNIT_TESTS_RESULT}"
290336
291337 # Fail only if any job failed or was cancelled (skipped is OK)
292338 if [ "${{ contains(needs.*.result, 'failure') }}" == "true" ] || [ "${{ contains(needs.*.result, 'cancelled') }}" == "true" ]; then
@@ -305,6 +351,8 @@ jobs:
305351 NEEDS_MAXTEXT_TPU_PATHWAYS_INTEGRATION_TESTS_RESULT : ${{ needs.maxtext_tpu_pathways_integration_tests.result }}
306352 NEEDS_MAXTEXT_GPU_UNIT_TESTS_RESULT : ${{ needs.maxtext_gpu_unit_tests.result }}
307353 NEEDS_MAXTEXT_GPU_INTEGRATION_TESTS_RESULT : ${{ needs.maxtext_gpu_integration_tests.result }}
354+ NEEDS_MAXTEXT_POST_TRAINING_CPU_UNIT_TESTS_RESULT : ${{ needs.maxtext_post_training_cpu_unit_tests.result }}
355+ NEEDS_MAXTEXT_POST_TRAINING_TPU_UNIT_TESTS_RESULT : ${{ needs.maxtext_post_training_tpu_unit_tests.result }}
308356
309357 all_notebooks_passed :
310358 name : All Notebooks Passed
@@ -337,7 +385,7 @@ jobs:
337385
338386 notify_failure :
339387 name : Notify failed build # creates an issue or modifies last open existing issue for failed build
340- needs : [maxtext_jupyter_notebooks, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests]
388+ needs : [maxtext_jupyter_notebooks, maxtext_cpu_unit_tests, maxtext_tpu_unit_tests, maxtext_tpu_integration_tests, maxtext_tpu_pathways_unit_tests, maxtext_tpu_pathways_integration_tests, maxtext_gpu_unit_tests, maxtext_gpu_integration_tests, maxtext_post_training_cpu_unit_tests, maxtext_post_training_tpu_unit_tests ]
341389 if : ${{ always() }}
342390 runs-on : ubuntu-latest
343391 permissions :
0 commit comments