custom all reduce support cuda graph (#2938)

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
2025-10-05 08:37:06 +08:00 · 2025-07-21 22:52:03 +08:00
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions
--- a/fastdeploy/model_executor/models/deepseek_v3.py
+++ b/fastdeploy/model_executor/models/deepseek_v3.py
@@ -25,7 +25,7 @@ from paddleformers.transformers import PretrainedModel
 from paddleformers.utils.log import logger

 from fastdeploy.config import FDConfig
-from fastdeploy.distributed.communication_op import tensor_model_parallel_all_reduce
+from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
 from fastdeploy.model_executor.forward_meta import ForwardMeta
 from fastdeploy.model_executor.layers.activation import SiluAndMul
 from fastdeploy.model_executor.layers.attention.attention import Attention