|
77 | 77 |
|
78 | 78 |
|
79 | 79 | # Enable SparseCore All Gather (1D), Reduce Scatter (1D) and All Reduce (ND) |
| 80 | +# On Ironwood, by default: |
| 81 | +# xla_tpu_enable_sparse_core_collective_offload_all_gather as True |
| 82 | +# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True |
| 83 | +# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True |
80 | 84 | ENABLE_SPARSECORE_OFFLOADING_FOR_RS_AG_AR = ( |
81 | 85 | " --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false" |
82 | 86 | " --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false" |
|
91 | 95 |
|
92 | 96 | # Enable SparseCore Reduce Scatter (SC RS) |
93 | 97 | # Either one of CF or SC can be enabled at a time. |
| 98 | +# On Ironwood, by default: |
| 99 | +# xla_tpu_enable_sparse_core_collective_offload_reduce_scatter as True |
94 | 100 | ENABLE_SPARSECORE_OFFLOADING_FOR_REDUCE_SCATTER = ( |
95 | 101 | " --xla_tpu_enable_async_collective_fusion_fuse_reduce_scatter=false" |
96 | 102 | " --xla_tpu_enable_sparse_core_collective_offload_reduce_scatter=true" |
|
99 | 105 |
|
100 | 106 | # Enable SparseCore All Gather (SC AG). |
101 | 107 | # Either one of CF or SC can be enabled at a time. |
| 108 | +# On Ironwood, by default: |
| 109 | +# xla_tpu_enable_sparse_core_collective_offload_all_gather as True |
102 | 110 | ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_GATHER = ( |
103 | 111 | " --xla_tpu_enable_async_collective_fusion_fuse_all_gather=false" |
104 | 112 | " --xla_tpu_enable_sparse_core_collective_offload_all_gather=true" |
|
109 | 117 | # Either one of CF or SC can be enabled at a time. |
110 | 118 | # This is useful for reducing the gradient reduction all-reduce time with |
111 | 119 | # overlapping with compute during that time. |
| 120 | +# On Ironwood, by default: |
| 121 | +# xla_tpu_enable_sparse_core_collective_offload_all_reduce as True |
112 | 122 | ENABLE_SPARSECORE_OFFLOADING_FOR_ALL_REDUCE = ( |
113 | 123 | " --xla_tpu_enable_async_collective_fusion_fuse_all_reduce=false" |
114 | 124 | " --xla_tpu_enable_sparse_core_collective_offload_all_reduce=true" |
|
0 commit comments