mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
Launch expert_service before kv_cache initialization in worker_process (#3045)
* launch expert_service before kv_cache initialization * add two signal make sure model loading and expert_service lauching finished * fix the EP bug * fix ep * update launching way * fix ep * update * roback ep * pre-commit all files --------- Co-authored-by: RAM <gstian5555@outlook.com> Co-authored-by: Divano <dddivano@outlook.com>
This commit is contained in:
@@ -296,12 +296,14 @@ class TokenProcessor:
|
||||
else:
|
||||
batch = self.output_tokens[1, 0]
|
||||
tokens = tokens[2 : batch + 2]
|
||||
|
||||
|
||||
batch_result = list()
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
need_to_be_reschedule_req_ids = list(self.resource_manager.to_be_rescheduled_request_id_set)
|
||||
for request_id in need_to_be_reschedule_req_ids:
|
||||
if self.resource_manager.requests[request_id].idx >= (batch - 1): # No more token generated for preempted request
|
||||
if self.resource_manager.requests[request_id].idx >= (
|
||||
batch - 1
|
||||
): # No more token generated for preempted request
|
||||
self.resource_manager.reschedule_preempt_task(request_id)
|
||||
for i in range(batch):
|
||||
if self.resource_manager.stop_flags[i]:
|
||||
|
Reference in New Issue
Block a user