fix ring of experts using random routing

NuojCheng · NuojCheng · commit 755bdb8cd866 · 2026-03-09T21:18:01.000Z
diff --git a/src/maxtext/layers/moe.py b/src/maxtext/layers/moe.py
@@ -1127,6 +1127,7 @@ def wrapper(x, logits, pre_bias_logits, w0, w1, wo, w0_bias, w1_bias, wo_bias, r
             pre_bias_logits,
             self.config.use_custom_sort_vjp,
             roll_to_expert_id=num_experts_per_shard * expert_shard_id,
+            rngs=rngs,
         )
 
         # Filter down to the group sizes that apply to only the experts in the
diff --git a/tests/unit/train_compile_test.py b/tests/unit/train_compile_test.py
@@ -426,6 +426,30 @@ def test_moe_megablox_bf16(self):
         )
     )
 
+  @pytest.mark.cpu_only
+  def test_moe_megablox_ring_ep_random(self):
+    temp_dir = gettempdir()
+    compiled_trainstep_file = os.path.join(temp_dir, "test_moe_megablox_ring_ep_random.pickle")
+    train_compile_main(
+        (
+            "",
+            get_test_config_path(),
+            f"compiled_trainstep_file={compiled_trainstep_file}",
+            "compile_topology=v5p-16",
+            "use_iota_embed=true",
+            "compile_topology_num_slices=1",
+            "model_name=deepseek3-test",
+            "sparse_matmul=True",
+            "megablox=True",
+            "per_device_batch_size=4",
+            "max_target_length=128",
+            "use_ring_of_experts=True",
+            "use_random_routing=True",
+            "attention=flash",
+            "dtype=bfloat16",
+        )
+    )
+
   @pytest.mark.cpu_only
   def test_moe_ragged_dot_bf16(self):
     temp_dir = gettempdir()

Original file line number	Diff line number	Diff line change
`@@ -1127,6 +1127,7 @@ def wrapper(x, logits, pre_bias_logits, w0, w1, wo, w0_bias, w1_bias, wo_bias, r`
`1127`	`1127`	`pre_bias_logits,`
`1128`	`1128`	`self.config.use_custom_sort_vjp,`
`1129`	`1129`	`roll_to_expert_id=num_experts_per_shard * expert_shard_id,`
	`1130`	`+ rngs=rngs,`
`1130`	`1131`	`)`
`1131`	`1132`
`1132`	`1133`	`# Filter down to the group sizes that apply to only the experts in the`