custom all reduce support cuda graph (#2938)

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
2025-09-30 14:22:27 +08:00 · 2025-07-21 22:52:03 +08:00
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -1024,7 +1024,7 @@ class LLMEngine:
            "do_profile": self.do_profile,
            "dynamic_load_weight": self.cfg.model_config.dynamic_load_weight,
            "disable_any_whitespace": self.cfg.disable_any_whitespace,
-            "enable-custom-all-reduce": self.cfg.parallel_config.enable_custom_all_reduce,
+            "enable_custom_all_reduce": self.cfg.parallel_config.enable_custom_all_reduce,
            "enable_logprob": self.cfg.enable_logprob,
            "enable_mm": self.cfg.enable_mm,
        }