[Optimization] support mm prefill batch (#5313)

* support mm prefill batch * update code * update code * update code * update code * fix encoder cache bug * update code * update code * fix bug * fix paddle ocr bug * fix xpu bug * update code
2025-12-24 13:28:13 +08:00 · 2025-12-11 22:21:14 +08:00
parent 7116982995
commit 954a145d57
14 changed files with 769 additions and 296 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -241,6 +241,12 @@ class ModelConfig:

        self._post_init()

+    def disable_mm_prefill_batch(self):
+        """
+        check if the model architecture disable for mm prefill
+        """
+        return self._architecture in ["Ernie5ForCausalLM"]
+
    def _post_init(self):
        self.is_unified_ckpt = check_unified_ckpt(self.model)
        self.runner_type = self._get_runner_type(self.architectures, self.runner)
@@ -1618,7 +1624,7 @@ class FDConfig:
                and self.model_config is not None
                and self.model_config.enable_mm
            ):
-                self.max_prefill_batch = 1  # TODO:当前多模prefill阶段只支持并行度为1,待优化
+                self.max_prefill_batch = 1  # TODO:当前V0多模prefill阶段只支持并行度为1,待优化
        else:
            self.max_prefill_batch = self.scheduler_config.max_num_seqs