Add option to start test_batch in train_rl from a specific index, also add default tokenizer_path for default model

A9isha · A9isha · commit 023e416bc36f · 2026-03-14T06:59:27.000Z
diff --git a/src/maxtext/configs/post_train/rl.yml b/src/maxtext/configs/post_train/rl.yml
@@ -100,6 +100,7 @@ micro_batch_size: -1
 # Keep `num_test_batches` low so that evaluation runs quickly. It can be
 # increased to a max. of 330 (if batch size is 4).
 num_test_batches: 5  # 200
+test_batch_start_index: 0
 train_fraction: 1.0
 
 eval_interval: 10  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
diff --git a/src/maxtext/configs/types.py b/src/maxtext/configs/types.py
@@ -1636,6 +1636,7 @@ class RLDataset(BaseModel):
   batch_size: int = Field(1, description="Global batch size for the dataset loader in RL.")
   num_batches: int = Field(4, description="Number of batches for RL training.")
   num_test_batches: int = Field(5, description="Number of batches for RL evaluation.")
+  test_batch_start_index: int = Field(0, description="Start index for the test dataset")
   train_fraction: float = Field(1.0, description="Fraction of the dataset to be used for training.")
   micro_batch_size: int = Field(-1, description="Micro batch size for rollout and training.")
 
diff --git a/src/maxtext/trainers/post_train/rl/train_rl.py b/src/maxtext/trainers/post_train/rl/train_rl.py
@@ -416,7 +416,9 @@ def _filter_long_prompts(x):
   train_dataset = train_dataset.to_iter_dataset().batch(trainer_config.batch_size)
 
   test_dataset = test_dataset.filter(_filter_long_prompts)
-  test_dataset = test_dataset[: trainer_config.num_test_batches * trainer_config.batch_size]
+  test_dataset = test_dataset[
+      trainer_config.test_batch_start_index : trainer_config.num_test_batches * trainer_config.batch_size
+  ]
 
   test_dataset = test_dataset.to_iter_dataset().batch(trainer_config.batch_size)
 
diff --git a/src/maxtext/utils/globals.py b/src/maxtext/utils/globals.py
@@ -74,6 +74,8 @@
     "olmo3-7b": "allenai/Olmo-3-7B-Instruct",
     "olmo3-7b-pt": "allenai/Olmo-3-1025-7B",
     "olmo3-32b": "allenai/Olmo-3-32B-Think",
+    # "default" is not HF model, but adding to to avoid confusing warning about tokenizer_path
+    "default": os.path.join(MAXTEXT_ASSETS_ROOT, "tokenizers/tokenizer.llama2"),
 }
 
 __all__ = [

Original file line number	Diff line number	Diff line change
`@@ -74,6 +74,8 @@`
`74`	`74`	`"olmo3-7b": "allenai/Olmo-3-7B-Instruct",`
`75`	`75`	`"olmo3-7b-pt": "allenai/Olmo-3-1025-7B",`
`76`	`76`	`"olmo3-32b": "allenai/Olmo-3-32B-Think",`
	`77`	`+ # "default" is not HF model, but adding to to avoid confusing warning about tokenizer_path`
	`78`	`+ "default": os.path.join(MAXTEXT_ASSETS_ROOT, "tokenizers/tokenizer.llama2"),`
`77`	`79`	`}`
`78`	`80`
`79`	`81`	`__all__ = [`