Sync v2.0 version of code to github repo

2025-10-05 00:33:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/worker/worker_base.py
+++ b/fastdeploy/worker/worker_base.py
@@ -0,0 +1,101 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from paddle import nn
+
+from fastdeploy.config import FDConfig
+from fastdeploy.worker.model_runner_base import ModelRunnerBase
+from fastdeploy.worker.output import ModelRunnerOutput
+
+
+class WorkerBase(ABC):
+    """
+        Engine -> (WIP)Executor -> Worker -> ModelRunner -> Model
+        Worker interface that allows inference framwork to cleanly separate implementations for different harware.
+    """
+
+    def __init__(
+        self,
+        fd_config: FDConfig,
+        local_rank: int,
+        rank: int,
+    ) -> None:
+        """
+        Initizalize common worker components.
+
+        Args:
+             fd_config:
+             local_rank:
+             rank:
+        """
+        # Set Configuration
+        self.fd_config = fd_config
+        self.model_config = fd_config.model_config
+        self.load_config = fd_config.load_config
+        self.parallel_config = fd_config.parallel_config
+        self.device_config = fd_config.device_config
+        # ... config
+
+        # Device and Runner
+        self.device: Optional[str]  # gpu, xpu ...
+        self.local_rank = local_rank
+        self.rank = rank
+        self.model_runner: Optional[ModelRunnerBase]
+
+    @abstractmethod
+    def init_device(self) -> None:
+        """ Initialize the device state."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def initialize_cache(self, num_gpu_blocks: int,
+                         num_cpu_blocks: int) -> None:
+        """Initizlize the KV Cache with the given size in blocks."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_model(self) -> nn.Layer:
+        """ Get the model loaded by worker."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def load_model(self) -> None:
+        """load model from local or remote"""
+        raise NotImplementedError
+
+    @abstractmethod
+    def execute_model(
+        self,
+        model_forward_batch=None,
+    ) -> Optional[ModelRunnerOutput]:
+        """ """
+        raise NotImplementedError
+
+    @abstractmethod
+    def graph_optimize_and_warm_up_model(self) -> None:
+        """Prepare model for execution through grpah optimizaiton(CudaGrpah/CINN) or warmup."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def check_health(self) -> None:
+        """Basic health check (override for device-specific checks)."""
+        return NotImplementedError
+
+    def prefill_finished(self):
+        """check whether prefill stage finished."""
+        return True