Merge pull request #2977 from AI-Hypercomputer:chengnuojin-fix-hlo

Google-ML-Automation · Google-ML-Automation · commit cfd7786df46e · 2026-01-21T10:01:31.000-08:00
PiperOrigin-RevId: 859148792
diff --git a/src/MaxText/configs/types.py b/src/MaxText/configs/types.py
@@ -36,6 +36,7 @@
 from MaxText import accelerator_to_spec_map, max_utils
 from MaxText.common_types import AttentionType, DecoderBlockType, ShardMode
 from MaxText.globals import MAXTEXT_ASSETS_ROOT
+from MaxText.utils import gcs_utils
 
 logger = logging.getLogger(__name__)
 
@@ -1821,6 +1822,29 @@ def set_derived_and_validate_values(self) -> "MaxTextConfig":
     if self.final_logits_soft_cap == 0.0:
       self.final_logits_soft_cap = None
 
+    # This must be invoked before initializing the backend
+    # pylint: disable=access-member-before-definition
+    def validate_and_set_hlo_dump_defaults():
+      if os.environ.get("XLA_FLAGS") and self.dump_hlo_xla_flags:
+        raise ValueError("You must set either XLA_FLAGS or dump_hlo_xla_flags to dump HLO, but not both.")
+      if not os.environ.get("XLA_FLAGS") and not self.dump_hlo_xla_flags:
+        self.dump_hlo_xla_flags = f"--xla_dump_to={self.dump_hlo_local_dir} --xla_dump_large_constants"
+        if self.dump_hlo_local_module_name:
+          self.dump_hlo_xla_flags = (
+              f"{self.dump_hlo_xla_flags} --xla_dump_hlo_module_re={self.dump_hlo_local_module_name}"
+          )
+      if not self.dump_hlo_gcs_dir:
+        self.dump_hlo_gcs_dir = os.path.join(self.base_output_directory, self.run_name, "xla_dump")
+      else:
+        self.dump_hlo_gcs_dir = gcs_utils.add_trailing_slash(self.dump_hlo_gcs_dir)
+      if not os.environ.get("XLA_FLAGS"):
+        os.environ["XLA_FLAGS"] = self.dump_hlo_xla_flags
+
+    # pylint: enable=access-member-before-definition
+
+    # Validate and initiate hlo dump related configs
+    validate_and_set_hlo_dump_defaults()
+
     # D. CALCULATE MODEL DIMENSIONS from global_parameter_scale
     # This allows scaling the model size up or down easily with a single power-of-two factor.
     emb_scale, num_head_scale, mlp_dim_scale, layer_scale = get_individual_scales(self.global_parameter_scale)