mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[XPU] Update doc and add scripts for downloading dependencies (#2845)
* [XPU] update xvllm download * update supported models * fix xpu model runner in huge memory with small model * update doc
This commit is contained in:
@@ -774,7 +774,6 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
del self.share_inputs["caches"]
|
||||
if self.forward_meta is not None:
|
||||
del self.forward_meta.caches
|
||||
del self.share_inputs["block_tables"]
|
||||
paddle.device.xpu.empty_cache()
|
||||
|
||||
def cal_theortical_kvcache(self):
|
||||
@@ -817,11 +816,6 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
# Reset block table and kv cache with global block num
|
||||
self.initialize_kv_cache()
|
||||
|
||||
self.share_inputs["block_tables"] = paddle.full(
|
||||
[self.parallel_config.max_num_seqs, self.num_gpu_blocks],
|
||||
-1,
|
||||
dtype="int32")
|
||||
|
||||
# Reset free list
|
||||
free_list = list(
|
||||
range(
|
||||
|
Reference in New Issue
Block a user