mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
fix mtp multi batch (#5521)
This commit is contained in:
@@ -1393,6 +1393,9 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
# Reset block table and kv cache with global block num
|
||||
self.initialize_kv_cache()
|
||||
|
||||
if self.speculative_method in ["mtp"]:
|
||||
self.proposer.initialize_kv_cache(main_model_num_blocks=self.num_gpu_blocks)
|
||||
|
||||
# Reset free list
|
||||
free_list = list(
|
||||
range(
|
||||
|
||||
Reference in New Issue
Block a user