[BugFix] Modify the bug in Qwen2 when enabling ENABLE_V1_KVCACHE_SCHEDULER. (#3670)

* merge 2.1

* fix

* pre-commit

* fix
This commit is contained in:
lizexu123
2025-08-29 19:53:44 +08:00
committed by GitHub
parent 8517e04956
commit 578b8c5da2
2 changed files with 8 additions and 1 deletions

View File

@@ -29,6 +29,7 @@ import traceback
import uuid import uuid
import weakref import weakref
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import numpy as np import numpy as np
@@ -479,6 +480,8 @@ class LLMEngine:
""" """
# TODO 输入输出长度确认 # TODO 输入输出长度确认
if sampling_params is not None:
task.update(asdict(sampling_params))
request = Request.from_dict(task) request = Request.from_dict(task)
llm_logger.info(f"Receive request {request}") llm_logger.info(f"Receive request {request}")
if sampling_params is not None: if sampling_params is not None:

View File

@@ -260,7 +260,11 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["need_think_end"][idx : idx + 1, :] = 0 self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0 self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0
if len(request.output_token_ids) == 0:
input_ids = request.prompt_token_ids
else:
input_ids = request.prompt_token_ids + request.output_token_ids input_ids = request.prompt_token_ids + request.output_token_ids
logger.debug( logger.debug(
f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}" f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
) )