[LLM] update function name (#2985)

* [LLM] update function name
2025-10-05 08:37:06 +08:00 · 2025-07-24 15:03:40 +08:00
parent e14587a954
commit 3792345c3a
10 changed files with 24 additions and 24 deletions
--- a/fastdeploy/worker/gcu_model_runner.py
+++ b/fastdeploy/worker/gcu_model_runner.py
@@ -105,9 +105,9 @@ class GCUModelRunner(ModelRunnerBase):
            self.local_rank + int(self.parallel_config.engine_worker_queue_port)
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
        if int(paddle.max(self.share_inputs["seq_lens_encoder"])) != 0:
            return 1
--- a/fastdeploy/worker/gcu_worker.py
+++ b/fastdeploy/worker/gcu_worker.py
@@ -69,11 +69,11 @@ class GcuWorker(WorkerBase):
            local_rank=self.local_rank,
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
-        return self.model_runner.prefill_finished()
+        return self.model_runner.exist_prefill()

    def determine_available_memory(self) -> int:
        """
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -148,9 +148,9 @@ class GPUModelRunner(ModelRunnerBase):
            self.local_rank + int(self.parallel_config.engine_worker_queue_port)
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        Check whether prefill stage finished
+        check whether prefill stage exist
        """
        if int(paddle.max(self.share_inputs["seq_lens_encoder"])) != 0:
            return 1
--- a/fastdeploy/worker/gpu_worker.py
+++ b/fastdeploy/worker/gpu_worker.py
@@ -78,11 +78,11 @@ class GpuWorker(WorkerBase):
            local_rank=self.local_rank,
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        Check whether prefill stage finished
+        check whether prefill stage exist
        """
-        return self.model_runner.prefill_finished()
+        return self.model_runner.exist_prefill()

    def determine_available_memory(self) -> int:
        """
--- a/fastdeploy/worker/iluvatar_model_runner.py
+++ b/fastdeploy/worker/iluvatar_model_runner.py
@@ -105,9 +105,9 @@ class IluvatarModelRunner(ModelRunnerBase):
            self.local_rank + int(self.parallel_config.engine_worker_queue_port)
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
        if int(paddle.max(self.share_inputs["seq_lens_encoder"])) != 0:
            return 1
--- a/fastdeploy/worker/iluvatar_worker.py
+++ b/fastdeploy/worker/iluvatar_worker.py
@@ -69,11 +69,11 @@ class IluvatarWorker(WorkerBase):
            local_rank=self.local_rank,
        )

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
-        return self.model_runner.prefill_finished()
+        return self.model_runner.exist_prefill()

    def determine_available_memory(self) -> int:
        """
--- a/fastdeploy/worker/worker_base.py
+++ b/fastdeploy/worker/worker_base.py
@@ -96,6 +96,6 @@ class WorkerBase(ABC):
        """Basic health check (override for device-specific checks)."""
        return NotImplementedError

-    def prefill_finished(self):
-        """check whether prefill stage finished."""
+    def exist_prefill(self):
+        """check whether prefill stage exist."""
        return True
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -286,7 +286,7 @@ class PaddleDisWorkerProc:
            if self.local_rank % mp_num_per_node == 0:
                if self.task_queue.num_tasks() > 0:
                    # VL only support 1 batch to prefill
-                    if not self.fd_config.model_config.enable_mm or not self.worker.prefill_finished():
+                    if not self.fd_config.model_config.enable_mm or not self.worker.exist_prefill():
                        if self.nnode > 1:
                            self.task_queue.read_finish_flag.set(1)
                        else:
@@ -346,7 +346,7 @@ class PaddleDisWorkerProc:
            # Execute model to generate token. The generated token will be written to the buffer.
            # These generated tokens can be obtained through get_output op.
            self.worker.execute_model(req_dicts)
-            self.exist_prefill_task_signal.value[0] = self.worker.prefill_finished()
+            self.exist_prefill_task_signal.value[0] = self.worker.exist_prefill()

    def initialize_kv_cache(self) -> None:
        """Profiles the peak memory usage of the model to determine how many
--- a/fastdeploy/worker/xpu_model_runner.py
+++ b/fastdeploy/worker/xpu_model_runner.py
@@ -584,9 +584,9 @@ class XPUModelRunner(ModelRunnerBase):
        logger.warn("XPU not support cuda graph currently")
        pass

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
        if int(paddle.max(self.share_inputs["seq_lens_encoder"])) != 0:
            return 1
--- a/fastdeploy/worker/xpu_worker.py
+++ b/fastdeploy/worker/xpu_worker.py
@@ -143,11 +143,11 @@ class XpuWorker(WorkerBase):
        output = self.model_runner.execute_model(model_forward_batch)
        return output

-    def prefill_finished(self):
+    def exist_prefill(self):
        """
-        check whether prefill stage finished
+        check whether prefill stage exist
        """
-        return self.model_runner.prefill_finished()
+        return self.model_runner.exist_prefill()

    def preprocess_new_task(self, req_dicts: List[Request]) -> None:
        """Process new requests and then start the decode loop