Skip to content

Commit 7d7607a

Browse files
committed
fix flash block sizes and attention
1 parent cde8ab8 commit 7d7607a

4 files changed

Lines changed: 234 additions & 71 deletions

File tree

src/maxdiffusion/configs/base_wan_animate_27b.yml

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,27 +75,30 @@ mask_padding_tokens: True
7575
attention_sharding_uniform: True
7676
dropout: 0.1
7777

78+
# Tuned for 720p (720x1280), 81 frames, CP=8 on Trillium (32MB VMEM):
79+
# block_q=2048, block_kv=4096, block_kv_compute=1024
80+
# ~31% faster than default (512,512,512): 389s vs 508s at 40 steps
7881
flash_block_sizes: {
79-
"block_q" : 512,
80-
"block_kv_compute" : 512,
81-
"block_kv" : 512,
82+
"block_q" : 2048,
83+
"block_kv_compute" : 1024,
84+
"block_kv" : 4096,
8285
"block_q_dkv" : 512,
8386
"block_kv_dkv" : 512,
8487
"block_kv_dkv_compute" : 512,
8588
"block_q_dq" : 512,
8689
"block_kv_dq" : 512,
8790
"use_fused_bwd_kernel": False,
8891
}
89-
# Use on v6e
92+
# Default (480p / training):
9093
# flash_block_sizes: {
91-
# "block_q" : 3024,
92-
# "block_kv_compute" : 1024,
93-
# "block_kv" : 2048,
94-
# "block_q_dkv" : 3024,
95-
# "block_kv_dkv" : 2048,
96-
# "block_kv_dkv_compute" : 2048,
97-
# "block_q_dq" : 3024,
98-
# "block_kv_dq" : 2048
94+
# "block_q" : 512,
95+
# "block_kv_compute" : 512,
96+
# "block_kv" : 512,
97+
# "block_q_dkv" : 512,
98+
# "block_kv_dkv" : 512,
99+
# "block_kv_dkv_compute" : 512,
100+
# "block_q_dq" : 512,
101+
# "block_kv_dq" : 512,
99102
# "use_fused_bwd_kernel": False,
100103
# }
101104
# GroupNorm groups

0 commit comments

Comments
 (0)