fix for gpu

hx89 · hx89 · commit e38675baf2a8 · 2025-10-01T20:16:59.000-07:00
diff --git a/src/maxdiffusion/configs/base_flux_schnell.yml b/src/maxdiffusion/configs/base_flux_schnell.yml
@@ -236,6 +236,7 @@ enable_profiler: False
 # the iteration time a chance to stabilize.
 skip_first_n_steps_for_profiler: 5
 profiler_steps: 10
+profiler: ""
 
 # Generation parameters
 prompt: "A magical castle in the middle of a forest, artistic drawing"
@@ -284,3 +285,5 @@ quantization: ''
 quantization_local_shard_count: -1
 use_qwix_quantization: False 
 compile_topology_num_slices: -1 # Number of target slices, set to a positive integer.
+
+save_final_checkpoint: False
diff --git a/src/maxdiffusion/train_flux.py b/src/maxdiffusion/train_flux.py
@@ -19,6 +19,7 @@
 import jax
 from absl import app
 from maxdiffusion import (max_logging, pyconfig)
+from contextlib import contextmanager
 
 from maxdiffusion.train_utils import (
     validate_train_config,
@@ -39,6 +40,24 @@ def main(argv: Sequence[str]) -> None:
   max_logging.log(f"Found {jax.device_count()} devices.")
   train(config)
 
+@contextmanager
+def transformer_engine_context():
+  """ If TransformerEngine is available, this context manager will provide the library with MaxText-specific details needed for correcct operation. """
+  try:
+    from transformer_engine.jax.sharding import global_shard_guard, MeshResource
+    # Inform TransformerEngine of MaxText's physical mesh resources.
+    mesh_resource = MeshResource(
+      dp_resource = "data",
+      tp_resource = "tensor",
+      fsdp_resource = "fsdp",
+      pp_resource = None,
+      cp_resource = None,
+    )
+    with global_shard_guard(mesh_resource):
+      yield
+  except ImportError:
+    yield
 
 if __name__ == "__main__":
-  app.run(main)
+  with transformer_engine_context():
+    app.run(main)