【Sync】Release/2.0.1 (#2745)

* add rl qwen model support * fix * fix
2025-10-16 05:30:58 +08:00 · 2025-07-08 14:38:18 +08:00
parent ea787d8f62
commit 6851489425
13 changed files with 438 additions and 171 deletions
--- a/fastdeploy/model_executor/layers/moe/moe.py
+++ b/fastdeploy/model_executor/layers/moe/moe.py
@@ -89,6 +89,7 @@ class FusedMoE(nn.Layer):
        self.routed_scaling_factor = routed_scaling_factor

        moe_quant_config = fd_config.quant_config
+        self.moe_quant_type = None
        if moe_quant_config:
            self.quant_method = moe_quant_config.get_quant_method(self)
            self.moe_quant_type = moe_quant_config.name()
@@ -142,7 +143,7 @@ class FusedMoE(nn.Layer):
        if self.moe_quant_type == "fp8":
            #(TODO:gaoziyuan)
            pass
-        else:
+        elif self.moe_quant_type == "wint8":
            self.weight_dtype = "int8"
            self.init_weight_only_scale()