Revert "[BugFix] Modify the bug in Qwen2 when enabling ENABLE_V1_KVCACHE_SCHE…" (#3719)

This reverts commit 578b8c5da2.
This commit is contained in:
Jiang-Jia-Jun
2025-08-29 19:55:50 +08:00
committed by GitHub
parent 578b8c5da2
commit 3364af767b
2 changed files with 1 additions and 8 deletions

View File

@@ -29,7 +29,6 @@ import traceback
import uuid import uuid
import weakref import weakref
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
import numpy as np import numpy as np
@@ -480,8 +479,6 @@ class LLMEngine:
""" """
# TODO 输入输出长度确认 # TODO 输入输出长度确认
if sampling_params is not None:
task.update(asdict(sampling_params))
request = Request.from_dict(task) request = Request.from_dict(task)
llm_logger.info(f"Receive request {request}") llm_logger.info(f"Receive request {request}")
if sampling_params is not None: if sampling_params is not None:

View File

@@ -260,11 +260,7 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["need_think_end"][idx : idx + 1, :] = 0 self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0 self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0
if len(request.output_token_ids) == 0:
input_ids = request.prompt_token_ids
else:
input_ids = request.prompt_token_ids + request.output_token_ids input_ids = request.prompt_token_ids + request.output_token_ids
logger.debug( logger.debug(
f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}" f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
) )