Files
FastDeploy/fastdeploy/entrypoints/openai/serving_models.py
Yzc216 466cbb5a99 [Feature] Models api (#3073)
* add v1/models interface related

* add model parameters

* default model verification

* unit test

* check model err_msg

* unit test

* type annotation

* model parameter in response

* modify document description

* modify document description

* unit test

* verification

* verification update

* model_name

* pre-commit

* update test case

* update test case

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update tests/entrypoints/openai/test_serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update fastdeploy/entrypoints/openai/serving_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: LiqinruiG <37392159+LiqinruiG@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-08-21 17:02:56 +08:00

97 lines
2.9 KiB
Python

"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from dataclasses import dataclass
from typing import List, Union
from fastdeploy.entrypoints.openai.protocol import (
ErrorResponse,
ModelInfo,
ModelList,
ModelPermission,
)
from fastdeploy.utils import api_server_logger, get_host_ip
@dataclass
class ModelPath:
name: str
model_path: str
verification: bool = False
class OpenAIServingModels:
"""
OpenAI-style models serving
"""
def __init__(
self,
model_paths: list[ModelPath],
max_model_len: int,
ips: Union[List[str], str],
):
self.model_paths = model_paths
self.max_model_len = max_model_len
self.master_ip = ips
self.host_ip = get_host_ip()
if self.master_ip is not None:
if isinstance(self.master_ip, list):
self.master_ip = self.master_ip[0]
else:
self.master_ip = self.master_ip.split(",")[0]
def _check_master(self):
if self.master_ip is None:
return True
if self.host_ip == self.master_ip:
return True
return False
def is_supported_model(self, model_name) -> tuple[bool, str]:
"""
Check whether the specified model is supported.
"""
if self.model_paths[0].verification is False:
return True, self.model_name()
if model_name == "default":
return True, self.model_name()
return any(model.name == model_name for model in self.model_paths), model_name
def model_name(self) -> str:
"""
Returns the current model name.
"""
return self.model_paths[0].name
async def list_models(self) -> ModelList:
"""
Show available models.
"""
if not self._check_master():
err_msg = (
f"Only master node can accept models request, please send request to master node: {self.master_ip}"
)
api_server_logger.error(err_msg)
return ErrorResponse(message=err_msg, code=400)
model_infos = [
ModelInfo(
id=model.name, max_model_len=self.max_model_len, root=model.model_path, permission=[ModelPermission()]
)
for model in self.model_paths
]
return ModelList(data=model_infos)