[Serving] Add a simple Python serving (#962)

* init simple serving * simple serving is working * ppyoloe demo * Update README_CN.md * update readme * complete vision result to json
2025-10-06 00:57:33 +08:00 · 2022-12-26 21:09:08 +08:00
parent ec67f8ee6d
commit 22d91a73c6
18 changed files with 707 additions and 0 deletions
--- a/python/fastdeploy/serving/model_manager.py
+++ b/python/fastdeploy/serving/model_manager.py
@@ -0,0 +1,57 @@
+# coding:utf-8
+# copyright (c) 2022  paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license"
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+import os
+import time
+import json
+import logging
+import threading
+# from .predictor import Predictor
+from .handler import BaseModelHandler
+from .utils import lock_predictor
+
+
+class ModelManager:
+    def __init__(self, model_handler, predictor):
+        self._model_handler = model_handler
+        self._predictors = []
+        self._predictor_locks = []
+        self._register(predictor)
+
+    def _register(self, predictor):
+        # Get the model handler
+        if not issubclass(self._model_handler, BaseModelHandler):
+            raise TypeError(
+                "The model_handler must be subclass of BaseModelHandler, please check the type."
+            )
+
+        # TODO: Create multiple predictors to run on different GPUs or different CPU threads
+        self._predictors.append(predictor)
+        self._predictor_locks.append(threading.Lock())
+
+    def _get_predict_id(self):
+        t = time.time()
+        t = int(round(t * 1000))
+        predictor_id = t % len(self._predictors)
+        logging.info("The predictor id: {} is selected by running the model.".
+                     format(predictor_id))
+        return predictor_id
+
+    def predict(self, data, parameters):
+        predictor_id = self._get_predict_id()
+        with lock_predictor(self._predictor_locks[predictor_id]):
+            model_output = self._model_handler.process(
+                self._predictors[predictor_id], data, parameters)
+            return model_output