[PD Disaggregation] Support Qwen3-MoE use PD + EP inference. (#4691)

support Qwen-MoE PD/EP
2025-12-24 13:28:13 +08:00 · 2025-11-06 10:32:15 +08:00
parent e8c3e20ee6
commit 62dfad4a5f
10 changed files with 93 additions and 74 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -212,7 +212,6 @@ class ModelConfig:
        # set attribute from pretrained_config
        for key, value in pretrained_config.items():
            setattr(self, key, value)
-
        # we need set default value when not exist
        for key, value in PRETRAINED_INIT_CONFIGURATION.items():
            if not hasattr(self, key):
@@ -300,6 +299,9 @@ class ModelConfig:
        if not hasattr(self, "mla_use_absorb"):
            self.mla_use_absorb = False

+        if hasattr(self, "num_experts") and getattr(self, "moe_num_experts") is None:
+            self.moe_num_experts = self.num_experts
+
    def read_from_env(self):
        """
        Read configuration information from environment variables and update the object's attributes.