[Feature] remove dependency on enable_mm and refine multimodal's code (#3014)

* remove dependency on enable_mm

* fix codestyle check error

* fix codestyle check error

* update docs

* resolve conflicts on model config

* fix unit test error

* fix code style check error

---------

Co-authored-by: shige <1021937542@qq.com>
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ApplEOFDiscord
2025-08-01 20:01:18 +08:00
committed by GitHub
parent 243394044d
commit b71cbb466d
24 changed files with 118 additions and 29 deletions

View File

@@ -25,6 +25,7 @@ from fastdeploy.config import (
ModelConfig,
ParallelConfig,
)
from fastdeploy.multimodal.registry import MultimodalRegistry
from fastdeploy.platforms import current_platform
from fastdeploy.scheduler import SchedulerConfig
from fastdeploy.utils import ceil_div, get_host_ip, is_port_available, llm_logger
@@ -78,7 +79,7 @@ class Config:
engine_worker_queue_port: int = 8002,
limit_mm_per_prompt: Optional[Dict[str, Any]] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
enable_mm: bool = False,
# enable_mm: bool = False,
splitwise_role: str = "mixed",
innode_prefill_ports: Optional[List[int]] = None,
max_num_partial_prefills: int = 1,
@@ -156,7 +157,7 @@ class Config:
self.max_num_seqs = max_num_seqs
self.limit_mm_per_prompt = limit_mm_per_prompt
self.mm_processor_kwargs = mm_processor_kwargs
self.enable_mm = enable_mm
# self.enable_mm = enable_mm
self.speculative_config = speculative_config
self.use_warmup = use_warmup
self.splitwise_role = splitwise_role
@@ -174,11 +175,19 @@ class Config:
assert self.splitwise_role in ["mixed", "prefill", "decode"]
import fastdeploy.model_executor.models # noqa: F401
architectures = self.model_config.architectures[0]
if MultimodalRegistry.contains_model(architectures):
self.enable_mm = True
else:
self.enable_mm = False
# TODO
self.max_prefill_batch = 3
if current_platform.is_xpu():
self.max_prefill_batch = 1
if enable_mm:
if self.enable_mm:
self.max_prefill_batch = 1 # TODO:当前多模prefill阶段只支持并行度为1,待优化
# TODO(@wufeisheng): TP and EP need to be supported simultaneously.