mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-17 22:21:48 +08:00
[Bug fix] Fix block num in scheduler v1 for release2.0.4 (#3314)
* fix bug for scheduler v0 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1
This commit is contained in:
@@ -234,8 +234,7 @@ class ResourceManagerV1(ResourceManager):
|
||||
llm_logger.debug(
|
||||
f"scheduler prefill task: {request} request.need_prefill_tokens {request.need_prefill_tokens} request.num_computed_tokens {request.num_computed_tokens}"
|
||||
)
|
||||
num_new_tokens = request.prompt_token_ids_len - request.num_computed_tokens
|
||||
num_new_tokens = min(num_new_tokens, token_budget)
|
||||
num_new_tokens = self._get_num_new_tokens(request, token_budget)
|
||||
num_new_block = self.get_new_block_nums(request, num_new_tokens)
|
||||
# Allocate blocks to prefill
|
||||
if self.cache_manager.can_allocate_gpu_blocks(num_new_block):
|
||||
|
Reference in New Issue
Block a user