diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py index 2ae7bb515..874a90cca 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py @@ -49,7 +49,7 @@ class MoEMethodBase(QuantMethodBase): from .ep import EPDecoderRunner self.ep_decoder_runner = EPDecoderRunner( layer.top_k, layer.hidden_size, layer.num_experts, - layer.model_config.num_max_dispatch_tokens_per_rank, + layer.fd_config.model_config.num_max_dispatch_tokens_per_rank, layer.ep_size, layer.ep_rank) else: from .ep import EPPrefillRunner diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py index fbdd9e7ae..62f795b54 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py @@ -241,7 +241,7 @@ class DeepGemmFusedMoeMethod(MoEMethodBase): [ layer.num_local_experts, layer.ep_size * - layer.model_config.num_max_dispatch_tokens_per_rank, + layer.fd_config.model_config.num_max_dispatch_tokens_per_rank, layer.moe_intermediate_size * 2, ], dtype=paddle.bfloat16, @@ -251,7 +251,7 @@ class DeepGemmFusedMoeMethod(MoEMethodBase): [ layer.num_local_experts, layer.ep_size * - layer.model_config.num_max_dispatch_tokens_per_rank, + layer.fd_config.model_config.num_max_dispatch_tokens_per_rank, layer.hidden_size, ], dtype=paddle.bfloat16,