[Feature] remove dependency on enable_mm and refine multimodal's code (#3014)

* remove dependency on enable_mm

* fix codestyle check error

* fix codestyle check error

* update docs

* resolve conflicts on model config

* fix unit test error

* fix code style check error

---------

Co-authored-by: shige <1021937542@qq.com>
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ApplEOFDiscord
2025-08-01 20:01:18 +08:00
committed by GitHub
parent 243394044d
commit b71cbb466d
24 changed files with 118 additions and 29 deletions

View File

@@ -27,8 +27,8 @@ from openai.types.chat import (
)
from typing_extensions import Required, TypeAlias, TypedDict
from fastdeploy.input.multimodal.image import ImageMediaIO
from fastdeploy.input.multimodal.video import VideoMediaIO
from fastdeploy.multimodal.image import ImageMediaIO
from fastdeploy.multimodal.video import VideoMediaIO
class VideoURL(TypedDict, total=False):

View File

@@ -19,9 +19,11 @@ import uuid
import numpy as np
from fastdeploy.engine.config import ModelConfig
from fastdeploy.input.preprocess import InputPreprocessor
from fastdeploy.inter_communicator import IPCSignal, ZmqClient
from fastdeploy.metrics.work_metrics import work_process_metrics
from fastdeploy.multimodal.registry import MultimodalRegistry
from fastdeploy.platforms import current_platform
from fastdeploy.utils import EngineError, api_server_logger
@@ -33,26 +35,34 @@ class EngineClient:
def __init__(
self,
model_name_or_path,
tokenizer,
max_model_len,
tensor_parallel_size,
pid,
limit_mm_per_prompt,
mm_processor_kwargs,
enable_mm=False,
# enable_mm=False,
reasoning_parser=None,
data_parallel_size=1,
enable_logprob=False,
):
import fastdeploy.model_executor.models # noqa: F401
architectures = ModelConfig({"model": model_name_or_path}).architectures[0]
if MultimodalRegistry.contains_model(architectures):
self.enable_mm = True
else:
self.enable_mm = False
input_processor = InputPreprocessor(
tokenizer,
reasoning_parser,
limit_mm_per_prompt,
mm_processor_kwargs,
enable_mm,
self.enable_mm,
)
self.enable_logprob = enable_logprob
self.enable_mm = enable_mm
self.reasoning_parser = reasoning_parser
self.data_processor = input_processor.create_processor()
self.max_model_len = max_model_len

View File

@@ -28,9 +28,11 @@ from tqdm import tqdm
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.engine.engine import LLMEngine
from fastdeploy.engine.sampling_params import SamplingParams
# from fastdeploy.entrypoints.chat_utils import ChatCompletionMessageParam
from fastdeploy.utils import llm_logger, retrive_model_from_server
from fastdeploy.utils import (
deprecated_kwargs_warning,
llm_logger,
retrive_model_from_server,
)
from fastdeploy.worker.output import Logprob, LogprobsLists
root_logger = logging.getLogger()
@@ -72,6 +74,8 @@ class LLM:
enable_logprob: Optional[bool] = False,
**kwargs,
):
deprecated_kwargs_warning(**kwargs)
model = retrive_model_from_server(model, revision)
engine_args = EngineArgs(
model=model,

View File

@@ -105,13 +105,14 @@ async def lifespan(app: FastAPI):
pid = os.getpid()
api_server_logger.info(f"{pid}")
engine_client = EngineClient(
args.model,
args.tokenizer,
args.max_model_len,
args.tensor_parallel_size,
pid,
args.limit_mm_per_prompt,
args.mm_processor_kwargs,
args.enable_mm,
# args.enable_mm,
args.reasoning_parser,
args.data_parallel_size,
args.enable_logprob,