Skip to content

Commit 146fb8e

Browse files
Merge pull request #3324 from AI-Hypercomputer:hengtaoguo-parse
PiperOrigin-RevId: 879335972
2 parents 102af23 + d0eb830 commit 146fb8e

2 files changed

Lines changed: 2 additions & 2 deletions

File tree

src/maxtext/configs/post_train/rl.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ max_num_seqs: null
153153
# If True, enables asynchronous scheduling in vLLM for faster generation
154154
async_scheduling: True
155155
# stop generation when any of these strings is generated
156-
stop_strings: [</answer>]
156+
stop_strings: null
157157

158158
# ====== Checkpoint Configuration ======
159159
enable_checkpointing: True

src/maxtext/trainers/post_train/rl/utils_rl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def get_match_format_regex(tmvp_config):
106106
match_format = re.compile(
107107
(
108108
r"^[\s]{0,}"
109-
rf"{tmvp_config.reasoning_start_token}.+?{tmvp_config.reasoning_end_token}.*?"
109+
rf"{tmvp_config.reasoning_start_token}.+{tmvp_config.reasoning_end_token}.*?"
110110
rf"{tmvp_config.solution_start_token}(.+?){tmvp_config.solution_end_token}"
111111
),
112112
flags=re.MULTILINE | re.DOTALL,

0 commit comments

Comments
 (0)