mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-20 23:29:39 +08:00
custom all reduce support cuda graph (#2938)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
This commit is contained in:
@@ -201,6 +201,8 @@ class ParallelConfig:
|
||||
# disable any whitespace for guided decoding
|
||||
self.disable_any_whitespace: bool = True
|
||||
self.pod_ip: str = None
|
||||
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
|
||||
self.enable_custom_all_reduce: bool = False
|
||||
for key, value in args.items():
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
@@ -213,8 +215,6 @@ class ParallelConfig:
|
||||
self.moe_phase = MoEPhase.DECODER
|
||||
else:
|
||||
raise NotImplementedError
|
||||
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
|
||||
self.enable_custom_all_reduce: bool = False
|
||||
|
||||
# pd_disaggregation
|
||||
use_pd_disaggregation: int = int(os.getenv("FLAGS_use_pd_disaggregation", 0))
|
||||
|
Reference in New Issue
Block a user