Skip to content

Commit 9a05980

Browse files
Merge pull request #3335 from AI-Hypercomputer:docker_image_fix
PiperOrigin-RevId: 879728816
2 parents 2670a9e + 9593677 commit 9a05980

2 files changed

Lines changed: 27 additions & 25 deletions

File tree

.github/workflows/UploadDockerImages.yml

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,31 +79,32 @@ jobs:
7979
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
8080
image_date: ${{ needs.setup.outputs.image_date }}
8181

82-
tpu-post-training:
83-
name: ${{ matrix.image_name }}
84-
needs: [setup, tpu-pre-training]
85-
strategy:
86-
fail-fast: false
87-
matrix:
88-
include:
89-
- device: tpu
90-
build_mode: post-training
91-
image_name: maxtext_post_training_stable
92-
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_dependencies.Dockerfile
93-
- device: tpu
94-
build_mode: post-training
95-
image_name: maxtext_post_training_nightly
96-
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
82+
tpu-post-training-stable:
83+
name: tpu-post-training-stable
84+
needs: setup
9785
uses: ./.github/workflows/build_and_push_docker_image.yml
9886
with:
99-
image_name: ${{ matrix.image_name }}
100-
device: ${{ matrix.device }}
101-
build_mode: ${{ matrix.build_mode }}
102-
dockerfile: ${{ matrix.dockerfile }}
87+
image_name: maxtext_post_training_stable
88+
device: tpu
89+
build_mode: stable
90+
workflow: post-training
91+
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
92+
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
93+
image_date: ${{ needs.setup.outputs.image_date }}
94+
95+
tpu-post-training-nightly:
96+
name: tpu-post-training-nightly
97+
needs: [setup, tpu-post-training-stable]
98+
uses: ./.github/workflows/build_and_push_docker_image.yml
99+
with:
100+
image_name: maxtext_post_training_nightly
101+
device: tpu
102+
build_mode: nightly
103+
workflow: post-training
104+
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
103105
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
104106
image_date: ${{ needs.setup.outputs.image_date }}
105-
base_image: gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:${{ needs.setup.outputs.image_date }}
106-
is_post_training: true
107+
base_image: gcr.io/tpu-prod-env-multipod/maxtext_post_training_stable:${{ needs.setup.outputs.image_date }}
107108

108109
gpu-pre-training:
109110
name: ${{ matrix.image_name }}

.github/workflows/build_and_push_docker_image.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ on:
4141
required: false
4242
type: string
4343
default: ''
44-
is_post_training:
44+
workflow:
4545
required: false
46-
type: boolean
47-
default: false
46+
type: string
47+
default: 'pre-training'
4848

4949
permissions:
5050
contents: read
@@ -121,6 +121,7 @@ jobs:
121121
build-args: |
122122
DEVICE=${{ inputs.device }}
123123
MODE=${{ inputs.build_mode }}
124+
WORKFLOW=${{ inputs.workflow }}
124125
JAX_VERSION=NONE
125126
LIBTPU_VERSION=NONE
126127
INCLUDE_TEST_ASSETS=true
@@ -144,7 +145,7 @@ jobs:
144145
145146
146147
# Add post-training dependencies tags
147-
if [ "${{ inputs.is_post_training }}" == "true" ]; then
148+
if [ "${{ inputs.workflow }}" == "post-training" ]; then
148149
for dir in tunix vllm tpu-inference; do
149150
if [ -d "./$dir" ]; then
150151
dir_hash=$(git -C "$dir" rev-parse --short HEAD)

0 commit comments

Comments
 (0)