[Optimization] default compile rdma, reduce cudagraph buffer size in mm, fix some config bug (#5121)

* default compile rdma, reduce cudagraph buffer size in mm, fix some config logic * update * update * fix bug * enhance rdma compile * fix
2025-12-24 13:28:13 +08:00 · 2025-11-20 17:19:47 +08:00
parent 6fa34102e8
commit 7ac25935c7
8 changed files with 126 additions and 37 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -512,8 +512,10 @@ class EngineArgs:
                    raise ValueError(
                        "Please set --rdma_comm_ports argument when using " "rdma cache transfer protocol."
                    )
-                if len(self.rdma_comm_ports) != self.tensor_parallel_size:
-                    raise ValueError("The number of rdma comm ports must be equal to tensor parallel size.")
+                if len(self.rdma_comm_ports) != self.tensor_parallel_size * self.data_parallel_size:
+                    raise ValueError(
+                        f"The number of rdma comm ports must be equal to number of ranks ({self.data_parallel_size=} * {self.tensor_parallel_size=} = {self.data_parallel_size * self.tensor_parallel_size}), but got {len(self.rdma_comm_ports)}."
+                    )

            if envs.ENABLE_V1_KVCACHE_SCHEDULER == 1:
                if "ipc" in self.cache_transfer_protocol: