Skip to content

Commit ae98376

Browse files
Merge pull request #3391 from AI-Hypercomputer:mattdavidow-add-zone-documentation
PiperOrigin-RevId: 882723630
2 parents bc032b3 + 36517e6 commit ae98376

1 file changed

Lines changed: 3 additions & 0 deletions

File tree

docs/tutorials/posttraining/rl_on_multi_host.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ export MAXTEXT_CKPT_PATH=${BASE_OUTPUT_DIRECTORY?}/${WORKLOAD?}/0/items
8383
export TPU_TYPE=<TPU Type> # e.g., 'v5p-128'
8484
export TPU_CLUSTER=<cluster name>
8585
export PROJECT_ID=<GCP project ID>
86+
export ZONE=<GCP zone>
8687
export CLOUD_IMAGE_NAME=<your artifact registry image> # Name for the Docker image to be built
8788
```
8889

@@ -195,6 +196,7 @@ xpk workload create-pathways --workload ${WORKLOAD?} \
195196
--docker-image gcr.io/${PROJECT_ID?}/${CLOUD_IMAGE_NAME?} --cluster ${TPU_CLUSTER?} \
196197
--tpu-type=${TPU_TYPE?} --num-slices=1 \
197198
--project=${PROJECT_ID?} --priority=high \
199+
--zone=${ZONE?} \
198200
--command "HF_TOKEN=${HF_TOKEN?} TF_CPP_MIN_LOG_LEVEL=0 JAX_PLATFORMS=proxy JAX_BACKEND_TARGET=grpc://127.0.0.1:29000 ENABLE_PATHWAYS_PERSISTENCE='1' \
199201
python3 -m src.maxtext.trainers.post_train.rl.train_rl src/maxtext/configs/post_train/rl.yml \
200202
model_name=${MODEL?} \
@@ -212,6 +214,7 @@ xpk workload create-pathways --workload ${WORKLOAD?} \
212214
--docker-image gcr.io/${PROJECT_ID?}/${CLOUD_IMAGE_NAME?} --cluster ${TPU_CLUSTER?} \
213215
--tpu-type=${TPU_TYPE?} --num-slices=1 \
214216
--project=${PROJECT_ID?} --priority=high \
217+
--zone=${ZONE?} \
215218
--command "HF_TOKEN=${HF_TOKEN?} TF_CPP_MIN_LOG_LEVEL=0 JAX_PLATFORMS=proxy JAX_BACKEND_TARGET=grpc://127.0.0.1:29000 ENABLE_PATHWAYS_PERSISTENCE='1' \
216219
python3 -m src.maxtext.trainers.post_train.rl.train_rl src/maxtext/configs/post_train/rl.yml \
217220
model_name=${MODEL?} \

0 commit comments

Comments
 (0)