Merge pull request #3314 from AI-Hypercomputer:chengnuojin-move-ga

Google-ML-Automation · Google-ML-Automation · commit c24d3211a62e · 2026-03-04T18:41:12.000-08:00
PiperOrigin-RevId: 878766160
diff --git a/src/maxtext/configs/types.py b/src/maxtext/configs/types.py
@@ -1383,7 +1383,11 @@ class Profiling(BaseModel):
   xprof_e2e_enable_fw_throttle_event: bool = Field(False, description="Enable FW throttle event.")
   xprof_e2e_enable_fw_power_level_event: bool = Field(False, description="Enable FW power level event.")
   xprof_e2e_enable_fw_thermal_event: bool = Field(False, description="Enable FW thermal event.")
-  profile_power_events: bool = Field(False, description="Enable TPU-specific power/thermal profiling events. Defaults to False to avoid breaking GPU xplane tracing.")
+  profile_power_events: bool = Field(
+      False,
+      description="Enable TPU-specific power/thermal profiling events."
+      " Defaults to False to avoid breaking GPU xplane tracing.",
+  )
 
 
 class HloDump(BaseModel):
diff --git a/src/maxtext/trainers/pre_train/train.py b/src/maxtext/trainers/pre_train/train.py
@@ -58,7 +58,6 @@
 from maxtext.common.gcloud_stub import cloud_diagnostics as _cloud_diag, is_decoupled
 from maxtext.common.gcloud_stub import vertex_tensorboard_modules
 from maxtext.common.metric_logger import MetricLogger, record_activation_metrics
-from maxtext.optimizers.gradient_accumulation import gradient_accumulation_loss_and_grad
 from maxtext.trainers.post_train.dpo.dpo_utils import _merge_dpo_state, _split_dpo_state, dpo_loss_fn
 from maxtext.utils import exceptions
 from maxtext.utils import gcs_utils
@@ -68,6 +67,7 @@
 from maxtext.utils import qk_clip_utils
 from maxtext.utils import sharding
 from maxtext.utils import train_utils
+from maxtext.utils.gradient_accumulation import gradient_accumulation_loss_and_grad
 from maxtext.utils.vocabulary_tiling import vocab_tiling_linen_loss
 
 _diag_modules = _cloud_diag()
diff --git a/src/maxtext/utils/gradient_accumulation.py b/src/maxtext/utils/gradient_accumulation.py