Merge pull request #3092 from AI-Hypercomputer:fix-tokenizer-path

Google-ML-Automation · Google-ML-Automation · commit 695694b81d67 · 2026-02-05T14:09:47.000-08:00
PiperOrigin-RevId: 866106142
diff --git a/src/MaxText/configs/base.yml b/src/MaxText/configs/base.yml
@@ -220,7 +220,7 @@ expert_shard_attention_option: "fsdp"
 
 # when moe weight matrices are sharded on both fsdp and fsdp-transpose axes, use two separate all-gather calls
 moe_fsdp_use_two_stage_all_gather: false
-# Shard the expert dimension of the MLP weights on the FSDP axis. 
+# Shard the expert dimension of the MLP weights on the FSDP axis.
 # This configuration is recommended only when num_experts is a multiple of fsdp_parallelism
 shard_exp_on_fsdp: False
 # use fsdp and fsdp_transpose axes for sharding the moe weights
@@ -535,7 +535,7 @@ num_vocab_tiling: 1
 
 # Tokenizer
 vocab_size: 32_000 # powers of 2 for sharding
-tokenizer_path: "src/MaxText/src/maxtext/assets/tokenizers/tokenizer.llama2"
+tokenizer_path: "src/maxtext/assets/tokenizers/tokenizer.llama2"
 # tfds pipeline supports tokenizer_type: sentencepiece, huggingface, tiktoken
 # grain pipeline supports tokenizer_type: sentencepiece, huggingface
 # hf pipeline only supports huggingface type, and will ignore tokenizer_type flag
@@ -1027,7 +1027,7 @@ use_mrope: false
 mrope_section: [24, 20, 20]
 position_id_per_seconds: 25
 
-# Subslice shape in the form of "x,y,z" when using pathways (single controller). 
+# Subslice shape in the form of "x,y,z" when using pathways (single controller).
 # Example: "8,8" to use a 8x8 subgrid (64 chips) of a full pod (16x16) of trillium.
 subslice_shape: ""