mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
Modified to support custom all reduce by default (#3538)
This commit is contained in:
@@ -69,7 +69,7 @@ class GpuWorker(WorkerBase):
|
||||
gc.collect()
|
||||
paddle.device.cuda.empty_cache()
|
||||
if (
|
||||
self.parallel_config.enable_custom_all_reduce
|
||||
not self.parallel_config.disable_custom_all_reduce
|
||||
and self.parallel_config.tensor_parallel_size > 1
|
||||
and paddle.is_compiled_with_cuda()
|
||||
):
|
||||
|
||||
@@ -516,7 +516,7 @@ def parse_args():
|
||||
help="enable prefix cache",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable_custom_all_reduce",
|
||||
"--disable_custom_all_reduce",
|
||||
action="store_true",
|
||||
help="enable custom all-reduce",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user