[BugFix] Fix Configs (#2849)

* fix config * fix config
2025-10-04 08:16:42 +08:00 · 2025-07-16 10:50:36 +08:00
parent 0fad10b35a
commit 101ad33332
10 changed files with 30 additions and 37 deletions
--- a/fastdeploy/worker/gcu_model_runner.py
+++ b/fastdeploy/worker/gcu_model_runner.py
@@ -480,8 +480,8 @@ class GCUModelRunner(ModelRunnerBase):
        # Initialize free list
        free_list = list(
            range(
-                self.parallel_config.max_block_num - 1,
-                int(self.parallel_config.max_block_num *
+                self.parallel_config.total_block_num - 1,
+                int(self.parallel_config.total_block_num *
                    self.parallel_config.kv_cache_ratio) - 1, -1))
        self.free_list_len = len(free_list)
        self.share_inputs["free_list"] = paddle.to_tensor(free_list,
@@ -1114,7 +1114,7 @@ class GCUModelRunner(ModelRunnerBase):
        """Execute a forward pass with dummy inputs to profile the memory usage of the model."""

        # Initialize kv cache for profile run. After profile run kv cache will be reset.
-        self.num_gcu_blocks = self.parallel_config.max_block_num
+        self.num_gcu_blocks = self.parallel_config.total_block_num
        self.initialize_kv_cache()

        # 1. Profile with multimodal encoder & encoder cache