Skip to content

Commit dc29039

Browse files
Merge pull request #3171 from gurusai-voleti:ai-gsutil-migration-d14bd24923d643978341f64e4e888807
PiperOrigin-RevId: 888274064
2 parents e57ed73 + 78b88dd commit dc29039

File tree

7 files changed

+18
-17
lines changed

7 files changed

+18
-17
lines changed

benchmarks/maxtext_xpk_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def build_user_command(
428428
if wl_config.hlo_dump:
429429
hlo_dump = "XLA_FLAGS='--xla_dump_large_constants --xla_dump_to=/tmp/xla_dump'"
430430
upload_hlo_dump = (
431-
f" && gsutil -m cp -r /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
431+
f" && gcloud storage cp -r /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
432432
)
433433
# Construct the command string with proper formatting and line continuations
434434
command = " ".join(

benchmarks/upload_metrics_to_bq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def add_parser_arguments(parser: argparse.ArgumentParser):
187187

188188

189189
def download_metrics_file_locally(metrics_gcs_file: str, local_file: str) -> int:
190-
command = f"gsutil cp -r {metrics_gcs_file} {local_file}"
190+
command = f"gcloud storage cp --recursive {metrics_gcs_file} {local_file}"
191191
return run_command_with_updates(command, f"Download {metrics_gcs_file} in {local_file}")
192192

193193

src/dependencies/scripts/setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ install_custom_libtpu() {
187187
# Install 'crcmod' to download 'libtpu.so' from GCS reliably
188188
python3 -m uv pip install -U crcmod
189189
# Copy libtpu.so from GCS path
190-
gsutil cp "$LIBTPU_GCS_PATH" "$libtpu_path"
190+
gcloud storage cp "$LIBTPU_GCS_PATH" "$libtpu_path"
191191
}
192192

193193
install_maxtext_package_without_deps() {

src/maxtext/inference/mlperf/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ cd ${DATA_DISK_DIR?}
4242
#### LLama2-70b:
4343

4444
```
45-
gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.calibration_1000.pkl .
45+
gcloud storage cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.calibration_1000.pkl .
4646
mv open_orca_gpt4_tokenized_llama.calibration_1000.pkl processed-calibration-data.pkl
4747
48-
gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl .
48+
gcloud storage cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl .
4949
mv open_orca_gpt4_tokenized_llama.sampled_24576.pkl processed-data.pkl
5050
```
5151

tests/post_training/integration/grpo_trainer_correctness_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,10 @@ def setUp(self):
114114
super().setUp()
115115
jax.config.update("jax_default_prng_impl", "unsafe_rbg")
116116
command = [
117-
"gsutil",
117+
"gcloud",
118+
"storage",
118119
"cp",
119-
"-r",
120+
"--recursive",
120121
"gs://maxtext-dataset/hf/llama3.1-tokenizer",
121122
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
122123
]

tests/post_training/unit/sft_data_processing_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import subprocess
2121
import unittest
2222
import os.path
23-
import pytest
2423
import numpy as np
2524
import jax
2625
from jax.sharding import Mesh
@@ -314,7 +313,7 @@
314313
{"test_data": QWEN_DATA},
315314
]
316315
)
317-
@pytest.mark.external_training # Uses gsutil to pull tokenizer.
316+
@pytest.mark.external_training # Uses gcloud storage to pull tokenizer.
318317
class SFTDataProcessingTest(unittest.TestCase):
319318
test_data = {}
320319

@@ -323,15 +322,16 @@ def setUpClass(cls):
323322
super().setUpClass()
324323
exit_code = subprocess.call(
325324
[
326-
"gsutil",
325+
"gcloud",
326+
"storage",
327327
"cp",
328-
"-r",
328+
"--recursive",
329329
"gs://maxtext-dataset/hf/llama2-chat-tokenizer",
330330
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
331331
]
332332
)
333333
if exit_code != 0:
334-
raise ValueError(f"Download tokenizer with gsutil cp failed with exit code: {exit_code}")
334+
raise ValueError(f"Download tokenizer with gcloud storage cp failed with exit code: {exit_code}")
335335

336336
def setUp(self):
337337
super().setUp()

tools/orchestration/multihost_job.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def move_script_dir_to_gcs(script_dir, tmp_dir, zip_name, bucket_path):
104104

105105
# Move zip file to GCS
106106
zip_in_gcs_path = "/".join((bucket_path, zip_name))
107-
command = ["gsutil", "mv", zip_path, zip_in_gcs_path]
107+
command = ["gcloud", "storage", "mv", zip_path, zip_in_gcs_path]
108108
captured_output = subprocess.run(command, check=True, capture_output=True)
109109

110110
# Cleanup
@@ -148,7 +148,7 @@ def write_startup_script(zip_gcs_path, zip_name, log_name, bucket_path, startup_
148148
tar xzf {zip_name}
149149
{args.COMMAND}) 2>&1) >> {log_name}
150150
(echo "{finish_status_str()}") >> {log_name}
151-
gsutil cp {log_name} "{bucket_path}/"
151+
gcloud storage cp {log_name} "{bucket_path}/"
152152
(({create_kill_command_str(args)}) 2>&1 ) >> {log_name}"""
153153

154154
with open(startup_script_file, "wt", encoding="utf-8") as f:
@@ -199,10 +199,10 @@ def write_download_from_gcs_sh(zip_gcs_path):
199199
while [ \$GCS_READ_SUCCESS -eq 0 ]
200200
do
201201
{{ # try
202-
gsutil cp {zip_gcs_path} . &&
202+
gcloud storage cp {zip_gcs_path} . &&
203203
echo 'Code download from GCS successful!' && GCS_READ_SUCCESS=1
204204
}} || {{ # catch
205-
echo 'Failed to read GCS via gsutil, trying again'
205+
echo 'Failed to read GCS via gcloud storage, trying again'
206206
sleep 10
207207
}}
208208
done"""
@@ -341,7 +341,7 @@ def main(raw_args=None) -> None:
341341
captured_output = move_script_dir_to_gcs(args.SCRIPT_DIR, tmp_dir_relative_to_script, zip_name, bucket_path)
342342
if captured_output.returncode != 0:
343343
print("\n\n Moving code to GCS failed")
344-
print(f"Running 'gsutil mv zip {bucket_path}' failed with error: ")
344+
print(f"Running 'gcloud storage mv zip {bucket_path}' failed with error: ")
345345
print(captured_output.stderr.decode())
346346
print("\nYou may need to run 'gcloud auth login'")
347347
return -1

0 commit comments

Comments
 (0)