mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
[Feature] Models api (#3073)
* add v1/models interface related * add model parameters * default model verification * unit test * check model err_msg * unit test * type annotation * model parameter in response * modify document description * modify document description * unit test * verification * verification update * model_name * pre-commit * update test case * update test case * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/entrypoints/openai/serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -46,8 +46,9 @@ class OpenAIServingChat:
|
||||
OpenAI-style chat completions serving
|
||||
"""
|
||||
|
||||
def __init__(self, engine_client, pid, ips, max_waiting_time, chat_template):
|
||||
def __init__(self, engine_client, models, pid, ips, max_waiting_time, chat_template):
|
||||
self.engine_client = engine_client
|
||||
self.models = models
|
||||
self.pid = pid
|
||||
self.master_ip = ips
|
||||
self.max_waiting_time = max_waiting_time
|
||||
@@ -81,6 +82,14 @@ class OpenAIServingChat:
|
||||
err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
|
||||
api_server_logger.error(err_msg)
|
||||
return ErrorResponse(message=err_msg, code=400)
|
||||
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: {request.model}, support {', '.join([x.name for x in self.models.model_paths])} or default"
|
||||
api_server_logger.error(err_msg)
|
||||
return ErrorResponse(message=err_msg, code=400)
|
||||
|
||||
try:
|
||||
if self.max_waiting_time < 0:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
|
Reference in New Issue
Block a user