From 631a1e23391ba1d39441188f78974518986f1cfa Mon Sep 17 00:00:00 2001
From: GoldPancake <56388518+Deleter-D@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:53:01 +0800
Subject: [PATCH] fix mtp quant param (#4469)

---
 fastdeploy/model_executor/models/ernie4_5_mtp.py | 4 +++-
 fastdeploy/spec_decode/mtp.py                    | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/model_executor/models/ernie4_5_mtp.py b/fastdeploy/model_executor/models/ernie4_5_mtp.py
index e463c27d8..ac7dd8c33 100644
--- a/fastdeploy/model_executor/models/ernie4_5_mtp.py
+++ b/fastdeploy/model_executor/models/ernie4_5_mtp.py
@@ -315,7 +315,9 @@ class Ernie4_5_MTPModel(nn.Layer):
 
         hidden_states = hidden_states + residual
 
-        hidden_states = self.norm(hidden_states)
+        # NOTE@wangyuanpeng04 Whether to use norm here is determined by
+        # whether norm is used in the MTP training phase.
+        # hidden_states = self.norm(hidden_states)
 
         return hidden_states
 
diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py
index 2642ff3ef..7570e2125 100644
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -82,6 +82,8 @@ class MTPProposer(Proposer):
             self.model_config.quantization = self.speculative_config.quantization
         self.model_config.start_layer_index = self.num_main_model_layers
         self.speculative_config.model_type = "mtp"
+        if self.speculative_config.quantization is not None:
+            self.model_config.is_quantized = False
 
     def _load_model(self):
         """