[Feature] Models api (#3073)

* add v1/models interface related * add model parameters * default model verification * unit test * check model err_msg * unit test * type annotation * model parameter in response * modify document description * modify document description * unit test * verification * verification update * model_name * pre-commit * update test case * update test case * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/entrypoints/openai/serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-10-04 16:22:57 +08:00 · 2025-08-21 17:02:56 +08:00
parent b7eee3aec1
commit 466cbb5a99
13 changed files with 289 additions and 20 deletions
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -46,8 +46,9 @@ class OpenAIServingChat:
    OpenAI-style chat completions serving
    """

-    def __init__(self, engine_client, pid, ips, max_waiting_time, chat_template):
+    def __init__(self, engine_client, models, pid, ips, max_waiting_time, chat_template):
        self.engine_client = engine_client
+        self.models = models
        self.pid = pid
        self.master_ip = ips
        self.max_waiting_time = max_waiting_time
@@ -81,6 +82,14 @@ class OpenAIServingChat:
            err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
            api_server_logger.error(err_msg)
            return ErrorResponse(message=err_msg, code=400)
+
+        if self.models:
+            is_supported, request.model = self.models.is_supported_model(request.model)
+            if not is_supported:
+                err_msg = f"Unsupported model: {request.model}, support {', '.join([x.name for x in self.models.model_paths])} or default"
+                api_server_logger.error(err_msg)
+                return ErrorResponse(message=err_msg, code=400)
+
        try:
            if self.max_waiting_time < 0:
                await self.engine_client.semaphore.acquire()