[Feature] support mm disable_chunked (#4803)

* support mm disable_chunked * update code * update code * update code
2025-12-24 13:28:13 +08:00 · 2025-11-06 21:32:25 +08:00
parent 6b68c58e8d
commit cc34487810
5 changed files with 421 additions and 9 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -314,6 +314,10 @@ class EngineArgs:
    """
    additional decode block num
    """
+    disable_chunked_mm_input: bool = False
+    """
+    Disable chunked_mm_input for multi-model inference.
+    """

    scheduler_name: str = "local"
    """
@@ -936,6 +940,13 @@ class EngineArgs:
            help="ports for rdma communication.",
        )

+        perf_group.add_argument(
+            "--disable-chunked-mm-input",
+            action="store_true",
+            default=EngineArgs.disable_chunked_mm_input,
+            help="Disable chunked mm input.",
+        )
+
        # Router parameters group
        router_group = parser.add_argument_group("Router")
        router_group.add_argument(