mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Optimization] default compile rdma, reduce cudagraph buffer size in mm, fix some config bug (#5121)
* default compile rdma, reduce cudagraph buffer size in mm, fix some config logic * update * update * fix bug * enhance rdma compile * fix
This commit is contained in:
@@ -512,8 +512,10 @@ class EngineArgs:
|
||||
raise ValueError(
|
||||
"Please set --rdma_comm_ports argument when using " "rdma cache transfer protocol."
|
||||
)
|
||||
if len(self.rdma_comm_ports) != self.tensor_parallel_size:
|
||||
raise ValueError("The number of rdma comm ports must be equal to tensor parallel size.")
|
||||
if len(self.rdma_comm_ports) != self.tensor_parallel_size * self.data_parallel_size:
|
||||
raise ValueError(
|
||||
f"The number of rdma comm ports must be equal to number of ranks ({self.data_parallel_size=} * {self.tensor_parallel_size=} = {self.data_parallel_size * self.tensor_parallel_size}), but got {len(self.rdma_comm_ports)}."
|
||||
)
|
||||
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER == 1:
|
||||
if "ipc" in self.cache_transfer_protocol:
|
||||
|
||||
Reference in New Issue
Block a user