[BugFix] Fix Configs (#2849)

* fix config * fix config
2025-10-05 08:37:06 +08:00 · 2025-07-16 10:50:36 +08:00
parent 0fad10b35a
commit 101ad33332
10 changed files with 30 additions and 37 deletions
--- a/fastdeploy/worker/xpu_worker.py
+++ b/fastdeploy/worker/xpu_worker.py
@@ -66,7 +66,7 @@ class XpuWorker(WorkerBase):
            device=self.device,
            rank=self.rank,
            local_rank=self.local_rank)
-        
+
    def graph_optimize_and_warm_up_model(self) -> None:
        """
            Optimizes the inference graph using the specified optimization options.
@@ -86,9 +86,10 @@ class XpuWorker(WorkerBase):
            You may limit the usage of GPU memory
            by adjusting the `gpu_memory_utilization` parameter.
        """
-        from fastdeploy.model_executor.ops.xpu import \
-            xpu_get_free_global_memory, xpu_get_total_global_memory, xpu_get_used_global_memory
-        
+        from fastdeploy.model_executor.ops.xpu import (
+            xpu_get_free_global_memory, xpu_get_total_global_memory,
+            xpu_get_used_global_memory)
+
        total_memory = xpu_get_total_global_memory(self.local_rank)
        used_memory = xpu_get_used_global_memory(self.local_rank)
        free_memory = xpu_get_free_global_memory(self.local_rank)
@@ -98,12 +99,12 @@ class XpuWorker(WorkerBase):

        self.model_runner.prepare_profile()
        self.model_runner.profile_run()
-        
+
        total_available_memory = int(total_memory * self.parallel_config.gpu_memory_utilization)
        used_memory = xpu_get_used_global_memory(self.local_rank)
        available_kv_cache_memory = total_available_memory - used_memory
        model_block_memory_used = self.cal_theortical_kvcache()
-        available_kv_cache_memory += model_block_memory_used * self.parallel_config.max_block_num
+        available_kv_cache_memory += model_block_memory_used * self.parallel_config.total_block_num

        self.model_runner.clear_block_table()

@@ -111,7 +112,7 @@ class XpuWorker(WorkerBase):
                    used_memory: {used_memory}, available_kv_cache_memory: {available_kv_cache_memory}")
        paddle.device.xpu.empty_cache()
        return available_kv_cache_memory  # approximate value
-    
+
    def cal_theortical_kvcache(self) -> int:
        """ """
        return self.model_runner.cal_theortical_kvcache()
@@ -154,10 +155,6 @@ class XpuWorker(WorkerBase):
        """ """
        return True

-    def cal_theortical_kvcache(self) -> int:
-        """ """
-        return self.model_runner.cal_theortical_kvcache()
-
    def reinitialize_kv_cache(self, num_gpu_blocks: int) -> None:
        """ """
        self.model_runner.update_share_input_block_num(