diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 2155a0e14..dfeb82a1c 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -277,6 +277,8 @@ class SpeculativeConfig:
 
         for key, value in args.items():
             if key in name_map.keys() and hasattr(self, name_map[key]):
+                if key == "speculative_benchmark_mode":
+                    value = True if value.lower() == "true" else False
                 setattr(self, name_map[key], value)
 
 
diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py
index e2953f0af..953b93571 100644
--- a/fastdeploy/output/token_processor.py
+++ b/fastdeploy/output/token_processor.py
@@ -288,9 +288,12 @@ class TokenProcessor:
             if self.cfg.speculative_config.method in ["mtp"]:
                 single_head_acceptance_rates = []
                 for head in range(self.cfg.speculative_config.num_speculative_tokens):
-                    single_head_acceptance_rates.append(
-                        self.num_accept_requests_per_head[head] / self.num_rest_requests_per_head[head]
-                    )
+                    if self.num_rest_requests_per_head[head] != 0:
+                        single_head_acceptance_rates.append(
+                            self.num_accept_requests_per_head[head] / self.num_rest_requests_per_head[head]
+                        )
+                    else:
+                        single_head_acceptance_rates.append(0)
                 spec_logger.info(f" Single head accept ratio: {single_head_acceptance_rates}")
 
             if self.number_of_output_tokens > 1000000:
@@ -599,9 +602,12 @@ class TokenProcessor:
                 # Update the rest requests for each head
                 num_rest_requests = num_accept_requests
                 # Calculate the acceptance rate for each head
-                single_head_acceptance_rate = (
-                    self.num_accept_requests_per_head[head] / self.num_rest_requests_per_head[head]
-                )
+                if self.num_rest_requests_per_head[head] != 0:
+                    single_head_acceptance_rate = (
+                        self.num_accept_requests_per_head[head] / self.num_rest_requests_per_head[head]
+                    )
+                else:
+                    single_head_acceptance_rate = 0
                 main_process_metrics.spec_decode_draft_single_head_acceptance_rate[head].set(
                     single_head_acceptance_rate
                 )
diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py
index c3c559832..d421b6a54 100644
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -34,6 +34,7 @@ from fastdeploy.model_executor.ops.gpu import (
     draft_model_preprocess,
     draft_model_update,
     eagle_get_hidden_states,
+    eagle_get_self_hidden_states,
     mtp_save_first_token,
     mtp_step_paddle,
     share_external_data,
@@ -305,6 +306,10 @@ class MTPProposer(Proposer):
 
         self.model_inputs["batch_drop"] = paddle.full(shape=[self.max_num_seqs, 1], fill_value=False, dtype="bool")
         self.model_inputs["used_list_len"] = paddle.full(shape=[self.max_num_seqs], fill_value=0, dtype="int32")
+        if self.max_draft_token_num > 1:
+            self.last_seq_lens_this_time = paddle.full_like(
+                self.main_model_inputs["seq_lens_this_time"], fill_value=-1, dtype="int32"
+            )
 
     def insert_prefill_inputs(self, req_dicts: List[Request]):
         """
@@ -486,6 +491,13 @@ class MTPProposer(Proposer):
         """
         for substep in range(self.max_draft_token_num):
             if self.model_inputs["not_need_stop"]:
+                if substep != 0:
+                    target_hidden_states = eagle_get_self_hidden_states(
+                        hiddden_states,
+                        self.last_seq_lens_this_time,
+                        self.model_inputs["seq_lens_this_time"],
+                        self.model_inputs["step_idx"],
+                    )
                 self.model_inputs["substep"] = substep
                 # Remove padding
                 (
@@ -530,6 +542,11 @@ class MTPProposer(Proposer):
                     eos_token_ids=self.model_inputs["eos_token_id"],
                 )
 
+                if self.max_draft_token_num > 1:
+                    self.last_seq_lens_this_time = paddle.clone(
+                        self.model_inputs["seq_lens_this_time"]
+                    )
+    
                 model_output = self.model(
                     ids_remove_padding=self.model_inputs["ids_remove_padding"],
                     previous_hidden_states=target_hidden_states,
diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py
index 7013ac011..17ed382b7 100644
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -499,8 +499,8 @@ def parse_args():
     )
     parser.add_argument(
         "--speculative_benchmark_mode",
-        default=False,
-        type=bool,
+        default="False",
+        type=str,
     )
     parser.add_argument(
         "--max_num_batched_tokens",