[Feature] Models api (#3073)

* add v1/models interface related

* add model parameters

* default model verification

* unit test

* check model err_msg

* unit test

* type annotation

* model parameter in response

* modify document description

* modify document description

* unit test

* verification

* verification update

* model_name

* pre-commit

* update test case

* update test case

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update fastdeploy/entrypoints/openai/serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Yzc216
2025-08-21 17:02:56 +08:00
committed by GitHub
parent b7eee3aec1
commit 466cbb5a99
13 changed files with 289 additions and 20 deletions

View File

@@ -57,7 +57,12 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
async def test_serving_chat(self):
request = ChatCompletionRequest(messages=[{"role": "user", "content": "你好"}])
self.chat_completion_handler = OpenAIServingChat(
self.mock_engine, pid=123, ips=None, max_waiting_time=-1, chat_template=self.input_chat_template
self.mock_engine,
models=None,
pid=123,
ips=None,
max_waiting_time=-1,
chat_template=self.input_chat_template,
)
async def mock_chat_completion_full_generator(
@@ -79,7 +84,12 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
async def test_serving_chat_cus(self):
request = ChatCompletionRequest(messages=[{"role": "user", "content": "hi"}], chat_template="hello")
self.chat_completion_handler = OpenAIServingChat(
self.mock_engine, pid=123, ips=None, max_waiting_time=10, chat_template=self.input_chat_template
self.mock_engine,
models=None,
pid=123,
ips=None,
max_waiting_time=10,
chat_template=self.input_chat_template,
)
async def mock_chat_completion_full_generator(