[Bug fix] Fix memory allocation (#3475)

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Fix bug for memory allocation
This commit is contained in:
chenjian
2025-08-19 19:48:24 +08:00
committed by GitHub
parent d2f6c3b998
commit c487b62ee0
2 changed files with 6 additions and 4 deletions

View File

@@ -382,4 +382,5 @@ if __name__ == "__main__":
args = parse_args()
logger = get_logger("cache_transfer_manager", "cache_transfer_manager.log")
paddle.set_device(f"gpu:{args.device_id}")
main()

View File

@@ -293,10 +293,11 @@ class Config:
)
if not self.cache_config.enable_chunked_prefill:
assert self.max_num_batched_tokens >= self.max_model_len, (
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
f"should be larger than or equal to max_model_len: {self.max_model_len}"
)
if not int(os.getenv("FD_ENABLE_INTERNAL_ADAPTER", "0")):
assert self.max_num_batched_tokens >= self.max_model_len, (
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
f"should be larger than or equal to max_model_len: {self.max_model_len}"
)
else:
assert self.max_num_batched_tokens >= self.cache_config.block_size, (
f"max_num_batched_tokens: {self.max_num_batched_tokens} "