mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[LLM] fix serval bugs (#2878)
This commit is contained in:
@@ -854,6 +854,11 @@ class Config:
|
||||
self.max_num_batched_tokens >= self.max_model_len
|
||||
), f"max_num_batched_tokens: {self.max_num_batched_tokens} " \
|
||||
f"should be larger than or equal to max_model_len: {self.max_model_len}"
|
||||
else:
|
||||
assert (
|
||||
self.max_num_batched_tokens >= self.cache_config.block_size
|
||||
), f"max_num_batched_tokens: {self.max_num_batched_tokens} " \
|
||||
f"should be larger than or equal to block_size: {self.cache_config.block_size}"
|
||||
|
||||
if self.max_num_partial_prefills > 1:
|
||||
assert (self.cache_config.enable_chunked_prefill is True), \
|
||||
|
Reference in New Issue
Block a user