This commit is contained in:
yinwei
2025-08-09 16:29:35 +08:00
committed by GitHub
parent 6706ccb37e
commit 702c313ed1
2 changed files with 13 additions and 21 deletions

View File

@@ -149,10 +149,9 @@ class XpuWorker(WorkerBase):
num_running_requests: Optional[int] = None,
) -> Optional[ModelRunnerOutput]:
""" """
if is_dummy_run:
output = self.model_runner.execute_model(model_forward_batch)
else:
output = self.model_runner.execute_model(model_forward_batch, num_running_requests)
output = self.model_runner.execute_model(model_forward_batch)
return output
def exist_prefill(self):
@@ -161,15 +160,15 @@ class XpuWorker(WorkerBase):
"""
return self.model_runner.exist_prefill()
def preprocess_new_task(self, req_dicts: List[Request], num_running_requests: int) -> None:
def preprocess_new_task(self, req_dicts: List[Request], num_running_requests: int = -1) -> None:
"""Process new requests and then start the decode loop
TODO(gongshaotian):The scheduler should schedule the handling of prefill,
and workers and modelrunners should not perceive it.
"""
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
self.model_runner.insert_tasks_v1(req_dicts=req_dicts, num_running_requests=num_running_requests)
self.model_runner.insert_tasks_v1(req_dicts=req_dicts)
else:
self.model_runner.process_prefill_inputs(req_dicts=req_dicts, num_running_requests=num_running_requests)
self.model_runner.process_prefill_inputs(req_dicts=req_dicts)
def check_health(self) -> bool:
""" """