File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1919# This workflow will run a small MaxText training workload on a GPU runner
2020# using a custom Docker image with all dependencies pre-installed.
2121
22+ # This workflow will run a small MaxText training workload on a GPU runner
23+ # using a custom Docker image with all code and dependencies pre-installed.
24+
2225name : MaxText Custom Image Workload
2326
2427on :
@@ -32,21 +35,23 @@ jobs:
3235 name : " Run MaxText Training Workload"
3336 runs-on : ["linux-x86-a3-megagpu-h100-8gpu"]
3437 container :
35- # Use your newly built custom image
38+ # Use your custom image which contains the source code and dependencies.
3639 image : us-docker.pkg.dev/tpu-prod-env-multipod/jax-stable-stack/maxtext-gpu-custom:latest
3740
3841 steps :
3942 - name : Run MaxText Training
43+ # The Docker image's working directory is /deps, but the code is in /deps/src.
44+ working-directory : /deps/src
4045 env :
4146 NVTE_FRAMEWORK : jax
4247 TF_FORCE_GPU_ALLOW_GROWTH : " true"
4348 run : |
44- # The working directory is /deps, so this path is correct .
49+ # Run the main training script from the /deps/src directory .
4550 python MaxText/train.py MaxText/configs/base.yml \
4651 run_name="maxtext-ci-test-${{ github.run_id }}" \
4752 steps=5 \
4853 enable_checkpointing=false \
49- attention='cudnet_flash_te ' \
54+ attention='cudnn_flash_te ' \
5055 dataset_type='synthetic'
5156
5257# name: SDXL Workload Training on GPU
You can’t perform that action at this time.
0 commit comments