mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
add noaux_tc to unitest fused_moe (#4656)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -83,8 +83,13 @@ class FuseMoEWrapper(paddle.nn.Layer):
|
||||
moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size,
|
||||
num_experts=self.fd_config.model_config.moe_num_experts,
|
||||
top_k=self.fd_config.model_config.moe_k,
|
||||
layer_idx=0,
|
||||
# avoiding invoke clean_low_latency_buffer in mixed ep.
|
||||
layer_idx=666,
|
||||
weight_key_map=weight_key_map,
|
||||
topk_method="noaux_tc",
|
||||
topk_group=4,
|
||||
n_group=8,
|
||||
gate_correction_bias=paddle.zeros([self.fd_config.model_config.moe_num_experts], paddle.float32),
|
||||
)
|
||||
moe_layer = self.fused_moe
|
||||
|
||||
|
||||
Reference in New Issue
Block a user