[BugFix] Modify the bug in Qwen2 when enabling ENABLE_V1_KVCACHE_SCHEDULER. (#3670)

* merge 2.1 * fix * pre-commit * fix
2025-10-07 01:22:59 +08:00 · 2025-08-29 19:53:44 +08:00
parent 8517e04956
commit 578b8c5da2
2 changed files with 8 additions and 1 deletions
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -260,7 +260,11 @@ class GPUModelRunner(ModelRunnerBase):
                    self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
                    self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0

-                input_ids = request.prompt_token_ids + request.output_token_ids
+                if len(request.output_token_ids) == 0:
+                    input_ids = request.prompt_token_ids
+                else:
+                    input_ids = request.prompt_token_ids + request.output_token_ids
+
                logger.debug(
                    f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
                )