revert pr (#3286)

2025-10-07 17:41:52 +08:00 · 2025-08-09 16:29:35 +08:00
parent 6706ccb37e
commit 702c313ed1
2 changed files with 13 additions and 21 deletions
--- a/fastdeploy/worker/xpu_worker.py
+++ b/fastdeploy/worker/xpu_worker.py
@@ -149,10 +149,9 @@ class XpuWorker(WorkerBase):
        num_running_requests: Optional[int] = None,
    ) -> Optional[ModelRunnerOutput]:
        """ """
-        if is_dummy_run:
-            output = self.model_runner.execute_model(model_forward_batch)
-        else:
-            output = self.model_runner.execute_model(model_forward_batch, num_running_requests)
+
+        output = self.model_runner.execute_model(model_forward_batch)
+
        return output

    def exist_prefill(self):
@@ -161,15 +160,15 @@ class XpuWorker(WorkerBase):
        """
        return self.model_runner.exist_prefill()

-    def preprocess_new_task(self, req_dicts: List[Request], num_running_requests: int) -> None:
+    def preprocess_new_task(self, req_dicts: List[Request], num_running_requests: int = -1) -> None:
        """Process new requests and then start the decode loop
        TODO(gongshaotian):The scheduler should schedule the handling of prefill,
        and workers and modelrunners should not perceive it.
        """
        if envs.ENABLE_V1_KVCACHE_SCHEDULER:
-            self.model_runner.insert_tasks_v1(req_dicts=req_dicts, num_running_requests=num_running_requests)
+            self.model_runner.insert_tasks_v1(req_dicts=req_dicts)
        else:
-            self.model_runner.process_prefill_inputs(req_dicts=req_dicts, num_running_requests=num_running_requests)
+            self.model_runner.process_prefill_inputs(req_dicts=req_dicts)

    def check_health(self) -> bool:
        """ """