[BugFix] Fix Configs (#2849)

* fix config * fix config
2025-10-05 16:48:03 +08:00 · 2025-07-16 10:50:36 +08:00
parent 0fad10b35a
commit 101ad33332
10 changed files with 30 additions and 37 deletions
--- a/fastdeploy/worker/gpu_worker.py
+++ b/fastdeploy/worker/gpu_worker.py
@@ -61,7 +61,8 @@ class GpuWorker(WorkerBase):
            gc.collect()
            paddle.device.cuda.empty_cache()
            if self.parallel_config.enable_custom_all_reduce:
-                from fastdeploy.distributed.communication_op import use_custom_allreduce
+                from fastdeploy.distributed.communication_op import \
+                    use_custom_allreduce
                use_custom_allreduce()
        else:
            raise RuntimeError(
@@ -137,7 +138,7 @@ class GpuWorker(WorkerBase):

        available_kv_cache_memory = after_run_meminfo.total * \
            self.parallel_config.gpu_memory_utilization - after_run_meminfo.used - paddle_peak_increase
-        available_kv_cache_memory += model_block_memory_used * self.parallel_config.max_block_num
+        available_kv_cache_memory += model_block_memory_used * self.parallel_config.total_block_num

        end_time = time.perf_counter()
        logger.info((