fix mtp multi batch (#5521)

This commit is contained in:
cmcamdy
2025-12-12 14:11:20 +08:00
committed by GitHub
parent d67388a479
commit 6cc3cb4bcf

View File

@@ -1393,6 +1393,9 @@ class XPUModelRunner(ModelRunnerBase):
# Reset block table and kv cache with global block num
self.initialize_kv_cache()
if self.speculative_method in ["mtp"]:
self.proposer.initialize_kv_cache(main_model_num_blocks=self.num_gpu_blocks)
# Reset free list
free_list = list(
range(