[Feature] Models api (#3073)

* add v1/models interface related * add model parameters * default model verification * unit test * check model err_msg * unit test * type annotation * model parameter in response * modify document description * modify document description * unit test * verification * verification update * model_name * pre-commit * update test case * update test case * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/entrypoints/openai/serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-10-05 08:37:06 +08:00 · 2025-08-21 17:02:56 +08:00
parent b7eee3aec1
commit 466cbb5a99
13 changed files with 289 additions and 20 deletions
--- a/tests/utils/test_custom_chat_template.py
+++ b/tests/utils/test_custom_chat_template.py
@@ -57,7 +57,12 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
    async def test_serving_chat(self):
        request = ChatCompletionRequest(messages=[{"role": "user", "content": "你好"}])
        self.chat_completion_handler = OpenAIServingChat(
-            self.mock_engine, pid=123, ips=None, max_waiting_time=-1, chat_template=self.input_chat_template
+            self.mock_engine,
+            models=None,
+            pid=123,
+            ips=None,
+            max_waiting_time=-1,
+            chat_template=self.input_chat_template,
        )

        async def mock_chat_completion_full_generator(
@@ -79,7 +84,12 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
    async def test_serving_chat_cus(self):
        request = ChatCompletionRequest(messages=[{"role": "user", "content": "hi"}], chat_template="hello")
        self.chat_completion_handler = OpenAIServingChat(
-            self.mock_engine, pid=123, ips=None, max_waiting_time=10, chat_template=self.input_chat_template
+            self.mock_engine,
+            models=None,
+            pid=123,
+            ips=None,
+            max_waiting_time=10,
+            chat_template=self.input_chat_template,
        )

        async def mock_chat_completion_full_generator(