[XPU] Update doc and add scripts for downloading dependencies (#2845)

* [XPU] update xvllm download

* update supported models

* fix xpu model runner in huge memory with small model

* update doc
This commit is contained in:
yulangz
2025-07-16 11:05:56 +08:00
committed by GitHub
parent 101ad33332
commit 17314ee126
8 changed files with 295 additions and 260 deletions

View File

@@ -774,7 +774,6 @@ class XPUModelRunner(ModelRunnerBase):
del self.share_inputs["caches"]
if self.forward_meta is not None:
del self.forward_meta.caches
del self.share_inputs["block_tables"]
paddle.device.xpu.empty_cache()
def cal_theortical_kvcache(self):
@@ -817,11 +816,6 @@ class XPUModelRunner(ModelRunnerBase):
# Reset block table and kv cache with global block num
self.initialize_kv_cache()
self.share_inputs["block_tables"] = paddle.full(
[self.parallel_config.max_num_seqs, self.num_gpu_blocks],
-1,
dtype="int32")
# Reset free list
free_list = list(
range(