[Optimization] default compile rdma, reduce cudagraph buffer size in mm, fix some config bug (#5121)

* default compile rdma, reduce cudagraph buffer size in mm, fix some config logic

* update

* update

* fix bug

* enhance rdma compile

* fix
This commit is contained in:
Yuanle Liu
2025-11-20 17:19:47 +08:00
committed by GitHub
parent 6fa34102e8
commit 7ac25935c7
8 changed files with 126 additions and 37 deletions

View File

@@ -512,8 +512,10 @@ class EngineArgs:
raise ValueError(
"Please set --rdma_comm_ports argument when using " "rdma cache transfer protocol."
)
if len(self.rdma_comm_ports) != self.tensor_parallel_size:
raise ValueError("The number of rdma comm ports must be equal to tensor parallel size.")
if len(self.rdma_comm_ports) != self.tensor_parallel_size * self.data_parallel_size:
raise ValueError(
f"The number of rdma comm ports must be equal to number of ranks ({self.data_parallel_size=} * {self.tensor_parallel_size=} = {self.data_parallel_size * self.tensor_parallel_size}), but got {len(self.rdma_comm_ports)}."
)
if envs.ENABLE_V1_KVCACHE_SCHEDULER == 1:
if "ipc" in self.cache_transfer_protocol: