mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[BugFix] support real batch_size (#3109)
* support real bsz * fix * fix xpu_model_runner.py,gpu_model_runner.py,gcu_model_runner.py,iluvatar_model_runner.py * add event_loop_ep * fix * Add comments * fix * support mtp real_batch_size * fix * self.tmp_seq_lens_this_time->self.seq_lens_this_time_buffer * fix * fix VL real_seq_lens_this_time * fix * fix mtp * fix * fix mtp * fix xpu * fix
This commit is contained in:
@@ -145,9 +145,14 @@ class XpuWorker(WorkerBase):
|
||||
def execute_model(
|
||||
self,
|
||||
model_forward_batch: Optional[List[Request]] = None,
|
||||
is_dummy_run: bool = False,
|
||||
num_running_requests: Optional[int] = None,
|
||||
) -> Optional[ModelRunnerOutput]:
|
||||
""" """
|
||||
output = self.model_runner.execute_model(model_forward_batch)
|
||||
if is_dummy_run:
|
||||
output = self.model_runner.execute_model(model_forward_batch)
|
||||
else:
|
||||
output = self.model_runner.execute_model(model_forward_batch, num_running_requests)
|
||||
return output
|
||||
|
||||
def exist_prefill(self):
|
||||
@@ -156,15 +161,15 @@ class XpuWorker(WorkerBase):
|
||||
"""
|
||||
return self.model_runner.exist_prefill()
|
||||
|
||||
def preprocess_new_task(self, req_dicts: List[Request]) -> None:
|
||||
def preprocess_new_task(self, req_dicts: List[Request], num_running_requests: int) -> None:
|
||||
"""Process new requests and then start the decode loop
|
||||
TODO(gongshaotian):The scheduler should schedule the handling of prefill,
|
||||
and workers and modelrunners should not perceive it.
|
||||
"""
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.model_runner.insert_tasks_v1(req_dicts=req_dicts)
|
||||
self.model_runner.insert_tasks_v1(req_dicts=req_dicts, num_running_requests=num_running_requests)
|
||||
else:
|
||||
self.model_runner.process_prefill_inputs(req_dicts=req_dicts)
|
||||
self.model_runner.process_prefill_inputs(req_dicts=req_dicts, num_running_requests=num_running_requests)
|
||||
|
||||
def check_health(self) -> bool:
|
||||
""" """
|
||||
|
Reference in New Issue
Block a user