mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
Revert "[BugFix] Modify the bug in Qwen2 when enabling ENABLE_V1_KVCACHE_SCHE…" (#3719)
This reverts commit 578b8c5da2
.
This commit is contained in:
@@ -29,7 +29,6 @@ import traceback
|
|||||||
import uuid
|
import uuid
|
||||||
import weakref
|
import weakref
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from dataclasses import asdict
|
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -480,8 +479,6 @@ class LLMEngine:
|
|||||||
"""
|
"""
|
||||||
# TODO 输入输出长度确认
|
# TODO 输入输出长度确认
|
||||||
|
|
||||||
if sampling_params is not None:
|
|
||||||
task.update(asdict(sampling_params))
|
|
||||||
request = Request.from_dict(task)
|
request = Request.from_dict(task)
|
||||||
llm_logger.info(f"Receive request {request}")
|
llm_logger.info(f"Receive request {request}")
|
||||||
if sampling_params is not None:
|
if sampling_params is not None:
|
||||||
|
@@ -260,11 +260,7 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
|
self.share_inputs["need_think_end"][idx : idx + 1, :] = 0
|
||||||
self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0
|
self.share_inputs["reasoning_index"][idx : idx + 1, :] = 0
|
||||||
|
|
||||||
if len(request.output_token_ids) == 0:
|
|
||||||
input_ids = request.prompt_token_ids
|
|
||||||
else:
|
|
||||||
input_ids = request.prompt_token_ids + request.output_token_ids
|
input_ids = request.prompt_token_ids + request.output_token_ids
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
|
f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}"
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user