mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Bug fix] Fix memory allocation (#3475)
* Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Fix bug for memory allocation
This commit is contained in:
@@ -382,4 +382,5 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
logger = get_logger("cache_transfer_manager", "cache_transfer_manager.log")
|
logger = get_logger("cache_transfer_manager", "cache_transfer_manager.log")
|
||||||
|
paddle.set_device(f"gpu:{args.device_id}")
|
||||||
main()
|
main()
|
||||||
|
@@ -293,10 +293,11 @@ class Config:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if not self.cache_config.enable_chunked_prefill:
|
if not self.cache_config.enable_chunked_prefill:
|
||||||
assert self.max_num_batched_tokens >= self.max_model_len, (
|
if not int(os.getenv("FD_ENABLE_INTERNAL_ADAPTER", "0")):
|
||||||
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
|
assert self.max_num_batched_tokens >= self.max_model_len, (
|
||||||
f"should be larger than or equal to max_model_len: {self.max_model_len}"
|
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
|
||||||
)
|
f"should be larger than or equal to max_model_len: {self.max_model_len}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
assert self.max_num_batched_tokens >= self.cache_config.block_size, (
|
assert self.max_num_batched_tokens >= self.cache_config.block_size, (
|
||||||
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
|
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
|
||||||
|
Reference in New Issue
Block a user