Modified to support custom all reduce by default (#3538)

2025-12-24 13:28:13 +08:00 · 2025-08-22 16:59:05 +08:00
parent 27666ee586
commit df7c31012b
15 changed files with 18 additions and 30 deletions
--- a/fastdeploy/worker/gpu_worker.py
+++ b/fastdeploy/worker/gpu_worker.py
@@ -69,7 +69,7 @@ class GpuWorker(WorkerBase):
            gc.collect()
            paddle.device.cuda.empty_cache()
            if (
-                self.parallel_config.enable_custom_all_reduce
+                not self.parallel_config.disable_custom_all_reduce
                and self.parallel_config.tensor_parallel_size > 1
                and paddle.is_compiled_with_cuda()
            ):
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -516,7 +516,7 @@ def parse_args():
        help="enable prefix cache",
    )
    parser.add_argument(
-        "--enable_custom_all_reduce",
+        "--disable_custom_all_reduce",
        action="store_true",
        help="enable custom all-reduce",
    )