fix mtp in rl (#4234)

2025-09-26 20:41:53 +08:00 · 2025-09-24 16:59:24 +08:00
parent 8a506500f3
commit e2b68b33c9
2 changed files with 7 additions and 1 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -1028,7 +1028,10 @@ class EngineArgs:
                if paddle.is_compiled_with_xpu():
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM
+                    if speculative_cfg is not None and speculative_cfg.method is not None:
+                        self.max_num_batched_tokens = self.max_model_len
+                    else:
+                        self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM
            else:
                if self.enable_chunked_prefill:
                    self.max_num_batched_tokens = 2048
--- a/fastdeploy/output/token_processor.py
+++ b/fastdeploy/output/token_processor.py
@@ -332,6 +332,9 @@ class TokenProcessor:
                    + accept_num[i]
                ].tolist()
                if len(token_ids) == 0 or token_ids[-1] <= 0:
+                    if envs.ENABLE_V1_KVCACHE_SCHEDULER:
+                        if task_id in self.resource_manager.to_be_rescheduled_request_id_set:
+                            self.resource_manager.reschedule_preempt_task(task_id)
                    continue
            else:
                token_id = int(tokens[i, 0])