[BugFix] Modify the bug in Qwen2 when enabling ENABLE_V1_KVCACHE_SCHEDULER. (#3670)

* merge 2.1

* fix

* pre-commit

* fix
This commit is contained in:
lizexu123
2025-08-29 19:53:44 +08:00
committed by GitHub
parent 8517e04956
commit 578b8c5da2
2 changed files with 8 additions and 1 deletions

View File

@@ -29,6 +29,7 @@ import traceback
import uuid
import weakref
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
from typing import Dict, List, Optional, Tuple
import numpy as np
@@ -479,6 +480,8 @@ class LLMEngine:
"""
# TODO 输入输出长度确认
if sampling_params is not None:
task.update(asdict(sampling_params))
request = Request.from_dict(task)
llm_logger.info(f"Receive request {request}")
if sampling_params is not None:

View File

@@ -260,7 +260,11 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0
if len(request.output_token_ids) == 0:
input_ids = request.prompt_token_ids
else:
input_ids = request.prompt_token_ids + request.output_token_ids
logger.debug(
f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
)