3333 - tpu
3434 - gpu
3535
36+ permissions :
37+ contents : read
38+
3639jobs :
37- build :
38- name : Build ${{ matrix.device }}-${{ matrix.build_mode }} Image
39- runs-on : linux-x86-n2-16-buildkit
40- container : google/cloud-sdk:524.0.0
40+ setup :
41+ runs-on : ubuntu-latest
42+ outputs :
43+ maxtext_sha : ${{ steps.vars.outputs.maxtext_sha }}
44+ image_date : ${{ steps.vars.outputs.image_date }}
45+ steps :
46+ - name : Checkout MaxText
47+ uses : actions/checkout@v5
48+
49+ - name : Get metadata
50+ id : vars
51+ run : |
52+ # MaxText SHA
53+ echo "maxtext_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
54+
55+ # Image date
56+ echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
4157
42- # Use Github Actions matrix to run image builds in parallel
58+ tpu-pre-training :
59+ name : ${{ matrix.image_name }}
60+ needs : setup
4361 strategy :
4462 fail-fast : false
4563 matrix :
4664 include :
47- # TPU Image Builds
4865 - device : tpu
4966 build_mode : stable
5067 image_name : maxtext_jax_stable
5370 build_mode : nightly
5471 image_name : maxtext_jax_nightly
5572 dockerfile : ./dependencies/dockerfiles/maxtext_dependencies.Dockerfile
56- # GPU Image Builds
73+ uses : ./.github/workflows/build_and_push_docker_image.yml
74+ with :
75+ image_name : ${{ matrix.image_name }}
76+ device : ${{ matrix.device }}
77+ build_mode : ${{ matrix.build_mode }}
78+ dockerfile : ${{ matrix.dockerfile }}
79+ maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
80+ image_date : ${{ needs.setup.outputs.image_date }}
81+
82+ tpu-post-training :
83+ name : ${{ matrix.image_name }}
84+ needs : [setup, tpu-pre-training]
85+ strategy :
86+ fail-fast : false
87+ matrix :
88+ include :
89+ - device : tpu
90+ build_mode : post-training
91+ image_name : maxtext_post_training_stable
92+ dockerfile : ./dependencies/dockerfiles/maxtext_post_training_dependencies.Dockerfile
93+ - device : tpu
94+ build_mode : post-training
95+ image_name : maxtext_post_training_nightly
96+ dockerfile : ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
97+ uses : ./.github/workflows/build_and_push_docker_image.yml
98+ with :
99+ image_name : ${{ matrix.image_name }}
100+ device : ${{ matrix.device }}
101+ build_mode : ${{ matrix.build_mode }}
102+ dockerfile : ${{ matrix.dockerfile }}
103+ maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
104+ image_date : ${{ needs.setup.outputs.image_date }}
105+ is_post_training : true
106+
107+ gpu-pre-training :
108+ name : ${{ matrix.image_name }}
109+ needs : setup
110+ strategy :
111+ fail-fast : false
112+ matrix :
113+ include :
57114 - device : gpu
58115 build_mode : stable
59116 image_name : maxtext_gpu_jax_stable
@@ -62,74 +119,11 @@ jobs:
62119 build_mode : nightly
63120 image_name : maxtext_gpu_jax_nightly
64121 dockerfile : ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
65-
66- if : >
67- github.event_name == 'schedule' ||
68- github.event_name == 'pull_request' ||
69- github.event_name == 'workflow_dispatch' && (
70- github.event.inputs.target_device == 'all' ||
71- github.event.inputs.target_device == 'tpu' ||
72- github.event.inputs.target_device == 'gpu'
73- )
74-
75- # Setup for GKE runners per b/412986220#comment82 and b/412986220#comment90
76- steps :
77- - name : Check if build should run
78- id : check
79- shell : bash
80- run : |
81- if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.target_device }}" != "all" && "${{ github.event.inputs.target_device }}" != "${{ matrix.device }}" ]]; then
82- echo "should_run=false" >> $GITHUB_OUTPUT
83- echo "Skipping build for device: ${{ matrix.device }} in ${{ matrix.build_mode }} mode."
84- else
85- echo "should_run=true" >> $GITHUB_OUTPUT
86- echo "Building for device: ${{ matrix.device }} in ${{ matrix.build_mode }} mode."
87- fi
88-
89- - name : Checkout git repository
90- uses : actions/checkout@v5
91- if : steps.check.outputs.should_run == 'true'
92-
93- - name : Mark git repository as safe
94- if : steps.check.outputs.should_run == 'true'
95- run : git config --global --add safe.directory ${GITHUB_WORKSPACE}
96-
97- - name : Configure Docker
98- if : steps.check.outputs.should_run == 'true'
99- run : gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
100-
101- - name : Set up Docker BuildX
102- uses : docker/setup-buildx-action@v3.11.1
103- if : steps.check.outputs.should_run == 'true'
104- with :
105- driver : remote
106- endpoint : tcp://localhost:1234
107-
108- # Env variables to be passed to Dockerfile
109- - name : Get metadata
110- id : vars
111- if : steps.check.outputs.should_run == 'true'
112- run : |
113- echo "commit_hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
114- echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
115-
116- # Docker BuildX command config
117- - name : Build and Push Docker Image
118- uses : docker/build-push-action@v6
119- if : steps.check.outputs.should_run == 'true'
120- with :
121- push : true
122- context : .
123- file : ${{ matrix.dockerfile }}
124- tags : |
125- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:maxtext_${{ steps.vars.outputs.commit_hash }}
126- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:${{ steps.vars.outputs.image_date }}
127- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:latest
128- cache-from : type=gha
129- cache-to : type=gha,mode=max
130- provenance : false
131- build-args : |
132- DEVICE=${{ matrix.device }}
133- MODE=${{ matrix.build_mode }}
134- JAX_VERSION=NONE
135- LIBTPU_GCS_PATH=NONE
122+ uses : ./.github/workflows/build_and_push_docker_image.yml
123+ with :
124+ image_name : ${{ matrix.image_name }}
125+ device : ${{ matrix.device }}
126+ build_mode : ${{ matrix.build_mode }}
127+ dockerfile : ${{ matrix.dockerfile }}
128+ maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
129+ image_date : ${{ needs.setup.outputs.image_date }}
0 commit comments