mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-23 16:44:22 +08:00
[Feature] remove dependency on enable_mm and refine multimodal's code (#3014)
* remove dependency on enable_mm * fix codestyle check error * fix codestyle check error * update docs * resolve conflicts on model config * fix unit test error * fix code style check error --------- Co-authored-by: shige <1021937542@qq.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -27,8 +27,8 @@ from openai.types.chat import (
|
||||
)
|
||||
from typing_extensions import Required, TypeAlias, TypedDict
|
||||
|
||||
from fastdeploy.input.multimodal.image import ImageMediaIO
|
||||
from fastdeploy.input.multimodal.video import VideoMediaIO
|
||||
from fastdeploy.multimodal.image import ImageMediaIO
|
||||
from fastdeploy.multimodal.video import VideoMediaIO
|
||||
|
||||
|
||||
class VideoURL(TypedDict, total=False):
|
||||
|
@@ -19,9 +19,11 @@ import uuid
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.engine.config import ModelConfig
|
||||
from fastdeploy.input.preprocess import InputPreprocessor
|
||||
from fastdeploy.inter_communicator import IPCSignal, ZmqClient
|
||||
from fastdeploy.metrics.work_metrics import work_process_metrics
|
||||
from fastdeploy.multimodal.registry import MultimodalRegistry
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.utils import EngineError, api_server_logger
|
||||
|
||||
@@ -33,26 +35,34 @@ class EngineClient:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name_or_path,
|
||||
tokenizer,
|
||||
max_model_len,
|
||||
tensor_parallel_size,
|
||||
pid,
|
||||
limit_mm_per_prompt,
|
||||
mm_processor_kwargs,
|
||||
enable_mm=False,
|
||||
# enable_mm=False,
|
||||
reasoning_parser=None,
|
||||
data_parallel_size=1,
|
||||
enable_logprob=False,
|
||||
):
|
||||
import fastdeploy.model_executor.models # noqa: F401
|
||||
|
||||
architectures = ModelConfig({"model": model_name_or_path}).architectures[0]
|
||||
if MultimodalRegistry.contains_model(architectures):
|
||||
self.enable_mm = True
|
||||
else:
|
||||
self.enable_mm = False
|
||||
|
||||
input_processor = InputPreprocessor(
|
||||
tokenizer,
|
||||
reasoning_parser,
|
||||
limit_mm_per_prompt,
|
||||
mm_processor_kwargs,
|
||||
enable_mm,
|
||||
self.enable_mm,
|
||||
)
|
||||
self.enable_logprob = enable_logprob
|
||||
self.enable_mm = enable_mm
|
||||
self.reasoning_parser = reasoning_parser
|
||||
self.data_processor = input_processor.create_processor()
|
||||
self.max_model_len = max_model_len
|
||||
|
@@ -28,9 +28,11 @@ from tqdm import tqdm
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.engine.engine import LLMEngine
|
||||
from fastdeploy.engine.sampling_params import SamplingParams
|
||||
|
||||
# from fastdeploy.entrypoints.chat_utils import ChatCompletionMessageParam
|
||||
from fastdeploy.utils import llm_logger, retrive_model_from_server
|
||||
from fastdeploy.utils import (
|
||||
deprecated_kwargs_warning,
|
||||
llm_logger,
|
||||
retrive_model_from_server,
|
||||
)
|
||||
from fastdeploy.worker.output import Logprob, LogprobsLists
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
@@ -72,6 +74,8 @@ class LLM:
|
||||
enable_logprob: Optional[bool] = False,
|
||||
**kwargs,
|
||||
):
|
||||
deprecated_kwargs_warning(**kwargs)
|
||||
|
||||
model = retrive_model_from_server(model, revision)
|
||||
engine_args = EngineArgs(
|
||||
model=model,
|
||||
|
@@ -105,13 +105,14 @@ async def lifespan(app: FastAPI):
|
||||
pid = os.getpid()
|
||||
api_server_logger.info(f"{pid}")
|
||||
engine_client = EngineClient(
|
||||
args.model,
|
||||
args.tokenizer,
|
||||
args.max_model_len,
|
||||
args.tensor_parallel_size,
|
||||
pid,
|
||||
args.limit_mm_per_prompt,
|
||||
args.mm_processor_kwargs,
|
||||
args.enable_mm,
|
||||
# args.enable_mm,
|
||||
args.reasoning_parser,
|
||||
args.data_parallel_size,
|
||||
args.enable_logprob,
|
||||
|
Reference in New Issue
Block a user