Skip to content

Commit 4bcee99

Browse files
Merge pull request #2997 from AI-Hypercomputer:sc_offload_doc
PiperOrigin-RevId: 859867073
2 parents 1d29ce1 + 824c566 commit 4bcee99

2 files changed

Lines changed: 144 additions & 109 deletions

File tree

benchmarks/xla_flags_library.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777

7878

7979
# Enable SparseCore All Gather (1D), Reduce Scatter (1D) and All Reduce (ND)
80+
# On Ironwood, by default:
81+
# xla_tpu_enable_sparse_core_collective_offload_all_gather as True
82+
# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True
83+
# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True
8084
ENABLE_SPARSECORE_OFFLOADING_FOR_RS_AG_AR = (
8185
" --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false"
8286
" --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false"
@@ -91,6 +95,8 @@
9195

9296
# Enable SparseCore Reduce Scatter (SC RS)
9397
# Either one of CF or SC can be enabled at a time.
98+
# On Ironwood, by default:
99+
# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True
94100
ENABLE_SPARSECORE_OFFLOADING_FOR_REDUCE_SCATTER = (
95101
" --xla_tpu_enable_async_collective_fusion_fuse_reduce_scatter=false"
96102
" --xla_tpu_enable_sparse_core_collective_offload_reduce_scatter=true"
@@ -99,6 +105,8 @@
99105

100106
# Enable SparseCore All Gather (SC AG).
101107
# Either one of CF or SC can be enabled at a time.
108+
# On Ironwood, by default:
109+
# xla_tpu_enable_sparse_core_collective_offload_all_gather as True
102110
ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_GATHER = (
103111
" --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false"
104112
" --xla_tpu_enable_sparse_core_collective_offload_all_gather=true"
@@ -109,6 +117,8 @@
109117
# Either one of CF or SC can be enabled at a time.
110118
# This is useful for reducing the gradient reduction all-reduce time with
111119
# overlapping with compute during that time.
120+
# On Ironwood, by default:
121+
# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True
112122
ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_REDUCE = (
113123
" --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false"
114124
" --xla_tpu_enable_sparse_core_collective_offload_all_reduce=true"

0 commit comments

Comments
 (0)