Skip to content

Commit 73bd4ab

Browse files
[Feature] 为 FusedMoE 添加 hidden_size 显式参数支持 (#7361)
[Feature] 为 FusedMoE 添加 hidden_size 显式参数支持
1 parent 1e08ee7 commit 73bd4ab

11 files changed

Lines changed: 12 additions & 1 deletion

File tree

fastdeploy/model_executor/layers/moe/moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ class FusedMoE(nn.Layer):
153153
def __init__(
154154
self,
155155
fd_config,
156+
hidden_size: int = -1,
156157
reduce_results: bool = True,
157158
renormalize: bool = False,
158159
moe_intermediate_size: int = -1,
@@ -204,7 +205,7 @@ def __init__(
204205
self.tp_size == 1 and self.ep_size > 1
205206
), "MoE only support parallelism on TP or EP dimension."
206207

207-
self.hidden_size = fd_config.model_config.hidden_size
208+
self.hidden_size = hidden_size
208209
self.num_experts = num_experts
209210

210211
self.num_local_experts = self.num_experts // self.ep_size

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def __init__(self, fd_config: FDConfig, layer_id: int, prefix: str) -> None:
167167

168168
self.experts = FusedMoE(
169169
fd_config=fd_config,
170+
hidden_size=fd_config.model_config.hidden_size,
170171
reduce_results=False,
171172
renormalize=self.norm_topk_prob,
172173
moe_intermediate_size=fd_config.model_config.moe_intermediate_size,

fastdeploy/model_executor/models/ernie4_5_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ def __init__(
210210

211211
self.experts = FusedMoE(
212212
fd_config=fd_config,
213+
hidden_size=fd_config.model_config.hidden_size,
213214
moe_intermediate_size=fd_config.model_config.moe_intermediate_size,
214215
num_experts=fd_config.model_config.moe_num_experts,
215216
top_k=fd_config.model_config.moe_k,

fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def __init__(
148148
)
149149
self.experts = FusedMoE(
150150
fd_config=fd_config,
151+
hidden_size=fd_config.model_config.hidden_size,
151152
reduce_results=False,
152153
moe_intermediate_size=moe_intermediate_size,
153154
num_experts=num_experts,

fastdeploy/model_executor/models/glm4_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def __init__(
169169

170170
self.experts = FusedMoE(
171171
fd_config,
172+
hidden_size=fd_config.model_config.hidden_size,
172173
reduce_results=not self.merge_ffn_tp,
173174
renormalize=self.norm_topk_prob,
174175
moe_intermediate_size=fd_config.model_config.moe_intermediate_size,

fastdeploy/model_executor/models/gpt_oss.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def __init__(self, fd_config: FDConfig, layer_id: int, prefix: str = ""):
114114

115115
self.experts = FusedMoE(
116116
fd_config=fd_config,
117+
hidden_size=fd_config.model_config.hidden_size,
117118
moe_intermediate_size=fd_config.model_config.intermediate_size,
118119
num_experts=num_local_experts,
119120
top_k=fd_config.model_config.num_experts_per_tok,

fastdeploy/model_executor/models/qwen3moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def __init__(
6363
}
6464
self.experts = FusedMoE(
6565
fd_config,
66+
hidden_size=fd_config.model_config.hidden_size,
6667
moe_intermediate_size=fd_config.model_config.moe_intermediate_size,
6768
num_experts=fd_config.model_config.num_experts,
6869
top_k=fd_config.model_config.num_experts_per_tok,

tests/layers/test_fusedmoe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ def __init__(
509509

510510
self.fused_moe = FusedMoE(
511511
fd_config=self.fd_config,
512+
hidden_size=self.fd_config.model_config.hidden_size,
512513
moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size,
513514
num_experts=self.fd_config.model_config.moe_num_experts,
514515
top_k=self.fd_config.model_config.moe_k,

tests/layers/test_nvfp4_fusedmoe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ def __init__(
516516

517517
self.fused_moe = FusedMoE(
518518
fd_config=self.fd_config,
519+
hidden_size=self.fd_config.model_config.hidden_size,
519520
moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size,
520521
num_experts=self.fd_config.model_config.moe_num_experts,
521522
top_k=self.fd_config.model_config.moe_k,

tests/layers/test_w4a8_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def __init__(
110110

111111
self.fused_moe = FusedMoE(
112112
fd_config=self.fd_config,
113+
hidden_size=self.fd_config.model_config.hidden_size,
113114
moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size,
114115
num_experts=self.fd_config.model_config.moe_num_experts,
115116
top_k=self.fd_config.model_config.moe_k,

0 commit comments

Comments
 (0)