fix mtp multi batch (#5521)

2025-12-24 13:28:13 +08:00 · 2025-12-12 14:11:20 +08:00
parent d67388a479
commit 6cc3cb4bcf
1 changed files with 3 additions and 0 deletions
--- a/fastdeploy/worker/xpu_model_runner.py
+++ b/fastdeploy/worker/xpu_model_runner.py
@@ -1393,6 +1393,9 @@ class XPUModelRunner(ModelRunnerBase):
        # Reset block table and kv cache with global block num
        self.initialize_kv_cache()

+        if self.speculative_method in ["mtp"]:
+            self.proposer.initialize_kv_cache(main_model_num_blocks=self.num_gpu_blocks)
+
        # Reset free list
        free_list = list(
            range(