From 39d2a1de4686f093ef0bcf68773ee811bd84c017 Mon Sep 17 00:00:00 2001 From: gaoziyuan <88373061+gzy19990617@users.noreply.github.com> Date: Thu, 10 Jul 2025 13:44:56 +0800 Subject: [PATCH] fix num_blocks_local when small size model in TP2 running mode (#2793) --- fastdeploy/worker/worker_process.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 199d6c139..10d01fbae 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -346,11 +346,11 @@ class PaddleDisWorkerProc(): model_block_memory_used) # NOTE(liuzichang): Too many block will lead to illegal memory access # We will develop dynamic limits in future. - if num_blocks_local > 20000: + if num_blocks_local > 40000: logger.info( - f"------- Reset num_blocks_local {num_blocks_local} to 20000" + f"------- Reset num_blocks_local {num_blocks_local} to 40000" ) - num_blocks_local = min(20000, num_blocks_local) + num_blocks_local = min(40000, num_blocks_local) logger.info( f"------- model_block_memory_used:{model_block_memory_used} --------" )