[BugFix] fix vl performance bug (#5181)

* fix vl performance bug * update code * update code --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-11-26 21:06:52 +08:00
parent 209970836e
commit bf30f45738
2 changed files with 10 additions and 2 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1574,7 +1574,7 @@ class FDConfig:
            self.max_prefill_batch = int(os.getenv("MAX_PREFILL_NUM", "3"))
            if current_platform.is_xpu():
                self.max_prefill_batch = 1
-            if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
+            if self.model_config is not None and self.model_config.enable_mm:
                self.max_prefill_batch = 1  # TODO:当前多模prefill阶段只支持并行度为1,待优化
        else:
            self.max_prefill_batch = self.scheduler_config.max_num_seqs