mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] remove dependency on enable_mm and refine multimodal's code (#3014)
* remove dependency on enable_mm * fix codestyle check error * fix codestyle check error * update docs * resolve conflicts on model config * fix unit test error * fix code style check error --------- Co-authored-by: shige <1021937542@qq.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -25,6 +25,7 @@ from fastdeploy.config import (
|
||||
ModelConfig,
|
||||
ParallelConfig,
|
||||
)
|
||||
from fastdeploy.multimodal.registry import MultimodalRegistry
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.scheduler import SchedulerConfig
|
||||
from fastdeploy.utils import ceil_div, get_host_ip, is_port_available, llm_logger
|
||||
@@ -78,7 +79,7 @@ class Config:
|
||||
engine_worker_queue_port: int = 8002,
|
||||
limit_mm_per_prompt: Optional[Dict[str, Any]] = None,
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
enable_mm: bool = False,
|
||||
# enable_mm: bool = False,
|
||||
splitwise_role: str = "mixed",
|
||||
innode_prefill_ports: Optional[List[int]] = None,
|
||||
max_num_partial_prefills: int = 1,
|
||||
@@ -156,7 +157,7 @@ class Config:
|
||||
self.max_num_seqs = max_num_seqs
|
||||
self.limit_mm_per_prompt = limit_mm_per_prompt
|
||||
self.mm_processor_kwargs = mm_processor_kwargs
|
||||
self.enable_mm = enable_mm
|
||||
# self.enable_mm = enable_mm
|
||||
self.speculative_config = speculative_config
|
||||
self.use_warmup = use_warmup
|
||||
self.splitwise_role = splitwise_role
|
||||
@@ -174,11 +175,19 @@ class Config:
|
||||
|
||||
assert self.splitwise_role in ["mixed", "prefill", "decode"]
|
||||
|
||||
import fastdeploy.model_executor.models # noqa: F401
|
||||
|
||||
architectures = self.model_config.architectures[0]
|
||||
if MultimodalRegistry.contains_model(architectures):
|
||||
self.enable_mm = True
|
||||
else:
|
||||
self.enable_mm = False
|
||||
|
||||
# TODO
|
||||
self.max_prefill_batch = 3
|
||||
if current_platform.is_xpu():
|
||||
self.max_prefill_batch = 1
|
||||
if enable_mm:
|
||||
if self.enable_mm:
|
||||
self.max_prefill_batch = 1 # TODO:当前多模prefill阶段只支持并行度为1,待优化
|
||||
|
||||
# TODO(@wufeisheng): TP and EP need to be supported simultaneously.
|
||||
|
Reference in New Issue
Block a user