mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Models api (#3073)
* add v1/models interface related * add model parameters * default model verification * unit test * check model err_msg * unit test * type annotation * model parameter in response * modify document description * modify document description * unit test * verification * verification update * model_name * pre-commit * update test case * update test case * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/entrypoints/openai/test_serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/entrypoints/openai/serving_models.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -38,8 +38,9 @@ from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
class OpenAIServingCompletion:
|
||||
def __init__(self, engine_client, pid, ips, max_waiting_time):
|
||||
def __init__(self, engine_client, models, pid, ips, max_waiting_time):
|
||||
self.engine_client = engine_client
|
||||
self.models = models
|
||||
self.pid = pid
|
||||
self.master_ip = ips
|
||||
self.host_ip = get_host_ip()
|
||||
@@ -71,6 +72,12 @@ class OpenAIServingCompletion:
|
||||
err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
|
||||
api_server_logger.error(err_msg)
|
||||
return ErrorResponse(message=err_msg, code=400)
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: {request.model}, support {', '.join([x.name for x in self.models.model_paths])} or default"
|
||||
api_server_logger.error(err_msg)
|
||||
return ErrorResponse(message=err_msg, code=400)
|
||||
created_time = int(time.time())
|
||||
if request.user is not None:
|
||||
request_id = f"cmpl-{request.user}-{uuid.uuid4()}"
|
||||
|
Reference in New Issue
Block a user