We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 9f4ce1c + d66f16c commit 644eda6Copy full SHA for 644eda6
1 file changed
src/MaxText/configs/models/deepseek3-671b-2dfsdp.yml
@@ -61,6 +61,8 @@ data_sharding: [['data', 'fsdp', 'fsdp_transpose', 'expert', 'context']]
61
logical_axis_rules: [
62
['activation_batch', ['data', 'fsdp', 'fsdp_transpose', 'expert', 'context']],
63
['activation_kv_batch', ['data', 'fsdp', 'fsdp_transpose', 'expert', 'context']],
64
+ ['activation_embed_and_logits_batch', ['data', 'fsdp', 'fsdp_transpose', 'expert']],
65
+ ['activation_norm_length', ['context']],
66
['activation_heads', []],
67
['embed', ['fsdp']],
68
['embed_no_exp', ['fsdp']],
0 commit comments