From c33e3629324b88603dcfcb6839291b3a2f69776e Mon Sep 17 00:00:00 2001 From: liyonghua0910 Date: Wed, 17 Sep 2025 11:42:51 +0800 Subject: [PATCH] [fix] fix key/value_cache_scales indent --- fastdeploy/worker/gpu_model_runner.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index e77d9a18f..065a1a8b1 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -60,7 +60,6 @@ else: share_external_data, speculate_schedule_cache, set_data_ipc, - unset_data_ipc, ) from fastdeploy.model_executor.pre_and_post_process import ( @@ -1179,9 +1178,13 @@ class GPUModelRunner(ModelRunnerBase): set_data_ipc(val_cache, val_cache_name) cache_kvs_list.extend([key_cache, val_cache]) if kv_cache_quant_type == "block_wise_fp8": - key_cache_scales = paddle.full(shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()) - val_cache_scales = paddle.full(shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()) - cache_kvs_list.extend([key_cache_scales, val_cache_scales]) + key_cache_scales = paddle.full( + shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype() + ) + val_cache_scales = paddle.full( + shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype() + ) + cache_kvs_list.extend([key_cache_scales, val_cache_scales]) else: logger.info(f"..attaching kv cache for layer {i}: {kv_cache_shape}") key_cache = paddle.empty(shape=[], dtype=cache_type)