From 533896fd63e496f6f60d97ee13bbd48c579fcd76 Mon Sep 17 00:00:00 2001 From: AIbin <37361953+chang-wenbin@users.noreply.github.com> Date: Fri, 10 Oct 2025 21:31:38 +0800 Subject: [PATCH] fix paddle_peak_increase size (#4355) --- fastdeploy/worker/gpu_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/worker/gpu_worker.py b/fastdeploy/worker/gpu_worker.py index bf1b88dab..601efd16b 100644 --- a/fastdeploy/worker/gpu_worker.py +++ b/fastdeploy/worker/gpu_worker.py @@ -141,7 +141,7 @@ class GpuWorker(WorkerBase): paddle_allocated_mem_after_run = paddle.device.cuda.max_memory_allocated(local_rank) model_block_memory_used = self.cal_theortical_kvcache() - paddle_peak_increase = paddle_reserved_mem_after_run - paddle_allocated_mem_before_run + paddle_peak_increase = paddle_allocated_mem_after_run - paddle_allocated_mem_before_run paddle.device.cuda.empty_cache()