Skip to content

Commit 38ed3ff

Browse files
Merge pull request #3134 from CIeNET-International:charlesli/revertUploadDockerImages
PiperOrigin-RevId: 869974240
2 parents 514d0db + efaa1c2 commit 38ed3ff

10 files changed

Lines changed: 162 additions & 418 deletions

.github/workflows/UploadDockerImages.yml

Lines changed: 5 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616
# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717

18-
name: Build and Test Images
18+
name: Build Images
1919

2020
on:
2121
schedule:
@@ -32,11 +32,6 @@ on:
3232
- all
3333
- tpu
3434
- gpu
35-
for_dev_test:
36-
description: 'For development test purpose. All images will be added a -test suffix'
37-
required: false
38-
type: boolean
39-
default: false
4035

4136
permissions:
4237
contents: read
@@ -47,7 +42,6 @@ jobs:
4742
outputs:
4843
maxtext_sha: ${{ steps.vars.outputs.maxtext_sha }}
4944
image_date: ${{ steps.vars.outputs.image_date }}
50-
image_suffix: ${{ steps.vars.outputs.image_suffix }}
5145
steps:
5246
- name: Checkout MaxText
5347
uses: actions/checkout@v5
@@ -61,13 +55,6 @@ jobs:
6155
# Image date
6256
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
6357
64-
# If for_dev_test is true, set suffix to -test, otherwise empty
65-
if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then
66-
echo "image_suffix=-test" >> $GITHUB_OUTPUT
67-
else
68-
echo "image_suffix=" >> $GITHUB_OUTPUT
69-
fi
70-
7158
tpu-pre-training:
7259
name: ${{ matrix.image_name }}
7360
needs: setup
@@ -85,7 +72,7 @@ jobs:
8572
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
8673
uses: ./.github/workflows/build_and_push_docker_image.yml
8774
with:
88-
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
75+
image_name: ${{ matrix.image_name }}
8976
device: ${{ matrix.device }}
9077
build_mode: ${{ matrix.build_mode }}
9178
dockerfile: ${{ matrix.dockerfile }}
@@ -109,13 +96,14 @@ jobs:
10996
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
11097
uses: ./.github/workflows/build_and_push_docker_image.yml
11198
with:
112-
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
99+
image_name: ${{ matrix.image_name }}
113100
device: ${{ matrix.device }}
114101
build_mode: ${{ matrix.build_mode }}
115102
dockerfile: ${{ matrix.dockerfile }}
116103
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
117104
image_date: ${{ needs.setup.outputs.image_date }}
118105
base_image: gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:${{ needs.setup.outputs.image_date }}
106+
is_post_training: true
119107

120108
gpu-pre-training:
121109
name: ${{ matrix.image_name }}
@@ -134,48 +122,9 @@ jobs:
134122
dockerfile: ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
135123
uses: ./.github/workflows/build_and_push_docker_image.yml
136124
with:
137-
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
125+
image_name: ${{ matrix.image_name }}
138126
device: ${{ matrix.device }}
139127
build_mode: ${{ matrix.build_mode }}
140128
dockerfile: ${{ matrix.dockerfile }}
141129
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
142130
image_date: ${{ needs.setup.outputs.image_date }}
143-
144-
# TEST JOBS
145-
pre-training-tpu-tests:
146-
needs: [setup, tpu-pre-training]
147-
strategy:
148-
fail-fast: false
149-
matrix:
150-
image: [maxtext_jax_stable, maxtext_jax_nightly]
151-
uses: ./.github/workflows/test_and_tag_docker_image.yml
152-
with:
153-
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
154-
image_date: ${{ needs.setup.outputs.image_date }}
155-
test_mode: tpu-pre-training
156-
157-
post-training-tpu-tests:
158-
needs: [setup, tpu-post-training]
159-
strategy:
160-
fail-fast: false
161-
matrix:
162-
image: [maxtext_post_training_stable, maxtext_post_training_nightly]
163-
uses: ./.github/workflows/test_and_tag_docker_image.yml
164-
with:
165-
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
166-
image_date: ${{ needs.setup.outputs.image_date }}
167-
test_mode: tpu-post-training
168-
169-
170-
pre-training-gpu-tests:
171-
needs: [setup, gpu-pre-training]
172-
strategy:
173-
fail-fast: false
174-
matrix:
175-
image: [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
176-
uses: ./.github/workflows/test_and_tag_docker_image.yml
177-
with:
178-
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
179-
image_date: ${{ needs.setup.outputs.image_date }}
180-
test_mode: gpu-pre-training
181-

.github/workflows/build_and_push_docker_image.yml

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ on:
4141
required: false
4242
type: string
4343
default: ''
44+
is_post_training:
45+
required: false
46+
type: boolean
47+
default: false
4448

4549
permissions:
4650
contents: read
@@ -78,9 +82,7 @@ jobs:
7882
ref: ${{ inputs.maxtext_sha }}
7983

8084
- name: Checkout post-training dependencies
81-
if: |
82-
steps.check.outputs.should_run == 'true' &&
83-
contains(inputs.image_name, 'post_training_nightly')
85+
if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly'
8486
run: |
8587
git clone https://github.com/google/tunix.git ./tunix
8688
git clone https://github.com/vllm-project/vllm.git ./vllm
@@ -108,7 +110,8 @@ jobs:
108110
push: true
109111
context: .
110112
file: ${{ inputs.dockerfile }}
111-
tags: gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}:${{ inputs.image_date }}-build-${{ github.run_id }}
113+
tags: gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}:latest
114+
cache-from: type=gha
112115
outputs: type=image,compression=zstd,force-compression=true
113116
build-args: |
114117
DEVICE=${{ inputs.device }}
@@ -123,20 +126,24 @@ jobs:
123126
shell: bash
124127
run: |
125128
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}"
126-
TEMP_IMG="$SOURCE_IMAGE:${{ inputs.image_date }}-build-${{ github.run_id }}"
129+
130+
# Add date tag
131+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:${{ inputs.image_date }}" --quiet
127132
128133
# Convert date to YYYYMMDD format
129134
clean_date=$(echo "${{ inputs.image_date }}" | sed 's/[-:]//g' | cut -c1-8)
130135
131136
# Add MaxText tag
132137
maxtext_hash=$(git rev-parse --short HEAD)
133-
gcloud container images add-tag "$TEMP_IMG" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
134-
gcloud container images add-tag "$TEMP_IMG" "$SOURCE_IMAGE:${{ inputs.image_date }}" --quiet
138+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
139+
135140
136141
# Add post-training dependencies tags
137-
for dir in tunix vllm tpu-inference; do
138-
if [ -d "./$dir" ]; then
139-
dir_hash=$(git -C "$dir" rev-parse --short HEAD)
140-
gcloud container images add-tag "$TEMP_IMG" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet
141-
fi
142-
done
142+
if [ "${{ inputs.is_post_training }}" == "true" ]; then
143+
for dir in tunix vllm tpu-inference; do
144+
if [ -d "./$dir" ]; then
145+
dir_hash=$(git -C "$dir" rev-parse --short HEAD)
146+
gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet
147+
fi
148+
done
149+
fi

0 commit comments

Comments
 (0)