mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-12 20:11:20 +08:00
[Feature] Support block scheduler v1 for FD (#2928)
* Support FD block scheduler v1 * Support FD block scheduler v1 * Support FD block scheduler v1 * Fix according to copilot review * Fix according to review * Remove is_dummy * Fix bug when real_bsz=1 * Fix infer first token cost time --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -25,6 +25,7 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput
|
||||
from fastdeploy.inter_communicator import IPCSignal
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
@@ -269,9 +270,12 @@ class TokenProcessor:
|
||||
else:
|
||||
time.sleep(0.002)
|
||||
else:
|
||||
self.resource_manager.stop_flags[index] = True
|
||||
self.resource_manager.tasks_list[index] = None
|
||||
self.resource_manager._recycle_block_tables(task)
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.resource_manager.finish_requests_async(task_id)
|
||||
else:
|
||||
self.resource_manager.stop_flags[index] = True
|
||||
self.resource_manager.tasks_list[index] = None
|
||||
self.resource_manager._recycle_block_tables(task)
|
||||
if task_id in self.tokens_counter:
|
||||
del self.tokens_counter[task_id]
|
||||
|
||||
@@ -508,6 +512,7 @@ class TokenProcessor:
|
||||
self.tokens_counter[task_id] += 1
|
||||
if token_id != RECOVERY_STOP_SIGNAL:
|
||||
result.outputs.token_ids.append(token_id)
|
||||
task.output_token_ids.append(token_id)
|
||||
if token_id in task.eos_token_ids or is_prefill or recovery_stop:
|
||||
result.finished = True
|
||||
if recovery_stop:
|
||||
|
Reference in New Issue
Block a user