AI-Hypercomputer
diff --git a/‎benchmarks/xla_flags_library.py‎
Lines changed: 10 additions & 0 deletions b/‎benchmarks/xla_flags_library.py‎
Lines changed: 10 additions & 0 deletions
@@ -77,6 +77,10 @@
 
 
 # Enable SparseCore All Gather (1D), Reduce Scatter (1D) and All Reduce (ND)
+# On Ironwood, by default:
+# xla_tpu_enable_sparse_core_collective_offload_all_gather as True
+# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True
+# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True
 ENABLE_SPARSECORE_OFFLOADING_FOR_RS_AG_AR = (
     " --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false"
     " --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false"
@@ -91,6 +95,8 @@
 
 # Enable SparseCore Reduce Scatter (SC RS)
 # Either one of CF or SC can be enabled at a time.
+# On Ironwood, by default:
+# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True
 ENABLE_SPARSECORE_OFFLOADING_FOR_REDUCE_SCATTER = (
     " --xla_tpu_enable_async_collective_fusion_fuse_reduce_scatter=false"
     " --xla_tpu_enable_sparse_core_collective_offload_reduce_scatter=true"
@@ -99,6 +105,8 @@
 
 # Enable SparseCore All Gather (SC AG).
 # Either one of CF or SC can be enabled at a time.
+# On Ironwood, by default:
+# xla_tpu_enable_sparse_core_collective_offload_all_gather as True
 ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_GATHER = (
     " --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false"
     " --xla_tpu_enable_sparse_core_collective_offload_all_gather=true"
@@ -109,6 +117,8 @@
 # Either one of CF or SC can be enabled at a time.
 # This is useful for reducing the gradient reduction all-reduce time with
 # overlapping with compute during that time.
+# On Ironwood, by default:
+# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True
 ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_REDUCE = (
     " --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false"
     " --xla_tpu_enable_sparse_core_collective_offload_all_reduce=true"