Unify server-side and model-side Config (Part1) (#3018)

* move cache config * fix mtp
2025-10-05 08:37:06 +08:00 · 2025-07-28 10:51:52 +08:00
parent 8f426c1690
commit 6ccc10ad47
23 changed files with 243 additions and 289 deletions
--- a/fastdeploy/worker/gpu_worker.py
+++ b/fastdeploy/worker/gpu_worker.py
@@ -137,7 +137,7 @@ class GpuWorker(WorkerBase):
        pynvml.nvmlShutdown()

        available_kv_cache_memory = (
-            after_run_meminfo.total * self.parallel_config.gpu_memory_utilization
+            after_run_meminfo.total * self.cache_config.gpu_memory_utilization
            - after_run_meminfo.used
            - paddle_peak_increase
        )