Skip to content

Commit 1f3c0dc

Browse files
chore: Migrate gsutil usage to gcloud storage
1 parent 5d9e57f commit 1f3c0dc

File tree

7 files changed

+18
-16
lines changed

7 files changed

+18
-16
lines changed

benchmarks/maxtext_xpk_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def build_user_command(
428428
if wl_config.hlo_dump:
429429
hlo_dump = "XLA_FLAGS='--xla_dump_large_constants --xla_dump_to=/tmp/xla_dump'"
430430
upload_hlo_dump = (
431-
f" && gsutil -m cp -r /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
431+
f" && gcloud storage cp --recursive /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
432432
)
433433
# Construct the command string with proper formatting and line continuations
434434
command = " ".join(

benchmarks/upload_metrics_to_bq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def add_parser_arguments(parser: argparse.ArgumentParser):
187187

188188

189189
def download_metrics_file_locally(metrics_gcs_file: str, local_file: str) -> int:
190-
command = f"gsutil cp -r {metrics_gcs_file} {local_file}"
190+
command = f"gcloud storage cp --recursive {metrics_gcs_file} {local_file}"
191191
return run_command_with_updates(command, f"Download {metrics_gcs_file} in {local_file}")
192192

193193

src/dependencies/scripts/setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ install_custom_libtpu() {
187187
# Install 'crcmod' to download 'libtpu.so' from GCS reliably
188188
python3 -m uv pip install -U crcmod
189189
# Copy libtpu.so from GCS path
190-
gsutil cp "$LIBTPU_GCS_PATH" "$libtpu_path"
190+
gcloud storage cp "$LIBTPU_GCS_PATH" "$libtpu_path"
191191
}
192192

193193
install_maxtext_package_without_deps() {

src/maxtext/inference/mlperf/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ cd ${DATA_DISK_DIR?}
4242
#### LLama2-70b:
4343

4444
```
45-
gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.calibration_1000.pkl .
45+
gcloud storage cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.calibration_1000.pkl .
4646
mv open_orca_gpt4_tokenized_llama.calibration_1000.pkl processed-calibration-data.pkl
4747
48-
gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl .
48+
gcloud storage cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl .
4949
mv open_orca_gpt4_tokenized_llama.sampled_24576.pkl processed-data.pkl
5050
```
5151

tests/post_training/integration/grpo_trainer_correctness_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,10 @@ def setUp(self):
114114
super().setUp()
115115
jax.config.update("jax_default_prng_impl", "unsafe_rbg")
116116
command = [
117-
"gsutil",
117+
"gcloud",
118+
"storage",
118119
"cp",
119-
"-r",
120+
"--recursive",
120121
"gs://maxtext-dataset/hf/llama3.1-tokenizer",
121122
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
122123
]

tests/post_training/unit/sft_data_processing_test.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@
314314
{"test_data": QWEN_DATA},
315315
]
316316
)
317-
@pytest.mark.external_training # Uses gsutil to pull tokenizer.
317+
@pytest.mark.external_training # Uses gcloud storage to pull tokenizer.
318318
class SFTDataProcessingTest(unittest.TestCase):
319319
test_data = {}
320320

@@ -323,15 +323,16 @@ def setUpClass(cls):
323323
super().setUpClass()
324324
exit_code = subprocess.call(
325325
[
326-
"gsutil",
326+
"gcloud",
327+
"storage",
327328
"cp",
328-
"-r",
329+
"--recursive",
329330
"gs://maxtext-dataset/hf/llama2-chat-tokenizer",
330331
os.path.join(MAXTEXT_ASSETS_ROOT, ""),
331332
]
332333
)
333334
if exit_code != 0:
334-
raise ValueError(f"Download tokenizer with gsutil cp failed with exit code: {exit_code}")
335+
raise ValueError(f"Download tokenizer with gcloud storage cp failed with exit code: {exit_code}")
335336

336337
def setUp(self):
337338
super().setUp()

tools/orchestration/multihost_job.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def move_script_dir_to_gcs(script_dir, tmp_dir, zip_name, bucket_path):
104104

105105
# Move zip file to GCS
106106
zip_in_gcs_path = "/".join((bucket_path, zip_name))
107-
command = ["gsutil", "mv", zip_path, zip_in_gcs_path]
107+
command = ["gcloud", "storage", "mv", zip_path, zip_in_gcs_path]
108108
captured_output = subprocess.run(command, check=True, capture_output=True)
109109

110110
# Cleanup
@@ -148,7 +148,7 @@ def write_startup_script(zip_gcs_path, zip_name, log_name, bucket_path, startup_
148148
tar xzf {zip_name}
149149
{args.COMMAND}) 2>&1) >> {log_name}
150150
(echo "{finish_status_str()}") >> {log_name}
151-
gsutil cp {log_name} "{bucket_path}/"
151+
gcloud storage cp {log_name} "{bucket_path}/"
152152
(({create_kill_command_str(args)}) 2>&1 ) >> {log_name}"""
153153

154154
with open(startup_script_file, "wt", encoding="utf-8") as f:
@@ -199,10 +199,10 @@ def write_download_from_gcs_sh(zip_gcs_path):
199199
while [ \$GCS_READ_SUCCESS -eq 0 ]
200200
do
201201
{{ # try
202-
gsutil cp {zip_gcs_path} . &&
202+
gcloud storage cp {zip_gcs_path} . &&
203203
echo 'Code download from GCS successful!' && GCS_READ_SUCCESS=1
204204
}} || {{ # catch
205-
echo 'Failed to read GCS via gsutil, trying again'
205+
echo 'Failed to read GCS via gcloud storage, trying again'
206206
sleep 10
207207
}}
208208
done"""
@@ -341,7 +341,7 @@ def main(raw_args=None) -> None:
341341
captured_output = move_script_dir_to_gcs(args.SCRIPT_DIR, tmp_dir_relative_to_script, zip_name, bucket_path)
342342
if captured_output.returncode != 0:
343343
print("\n\n Moving code to GCS failed")
344-
print(f"Running 'gsutil mv zip {bucket_path}' failed with error: ")
344+
print(f"Running 'gcloud storage mv zip {bucket_path}' failed with error: ")
345345
print(captured_output.stderr.decode())
346346
print("\nYou may need to run 'gcloud auth login'")
347347
return -1

0 commit comments

Comments
 (0)