From 631a1e23391ba1d39441188f78974518986f1cfa Mon Sep 17 00:00:00 2001 From: GoldPancake <56388518+Deleter-D@users.noreply.github.com> Date: Fri, 17 Oct 2025 14:53:01 +0800 Subject: [PATCH] fix mtp quant param (#4469) --- fastdeploy/model_executor/models/ernie4_5_mtp.py | 4 +++- fastdeploy/spec_decode/mtp.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/models/ernie4_5_mtp.py b/fastdeploy/model_executor/models/ernie4_5_mtp.py index e463c27d8..ac7dd8c33 100644 --- a/fastdeploy/model_executor/models/ernie4_5_mtp.py +++ b/fastdeploy/model_executor/models/ernie4_5_mtp.py @@ -315,7 +315,9 @@ class Ernie4_5_MTPModel(nn.Layer): hidden_states = hidden_states + residual - hidden_states = self.norm(hidden_states) + # NOTE@wangyuanpeng04 Whether to use norm here is determined by + # whether norm is used in the MTP training phase. + # hidden_states = self.norm(hidden_states) return hidden_states diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py index 2642ff3ef..7570e2125 100644 --- a/fastdeploy/spec_decode/mtp.py +++ b/fastdeploy/spec_decode/mtp.py @@ -82,6 +82,8 @@ class MTPProposer(Proposer): self.model_config.quantization = self.speculative_config.quantization self.model_config.start_layer_index = self.num_main_model_layers self.speculative_config.model_type = "mtp" + if self.speculative_config.quantization is not None: + self.model_config.is_quantized = False def _load_model(self): """