mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)
* remove max_num_batched_tokens in parallel config * remove max_num_seqs * update test case * fix test * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -71,7 +71,7 @@ class EngineService:
|
||||
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.resource_manager = ResourceManagerV1(
|
||||
cfg.max_num_seqs,
|
||||
cfg.scheduler_config.max_num_seqs,
|
||||
cfg,
|
||||
cfg.parallel_config.tensor_parallel_size,
|
||||
cfg.splitwise_role,
|
||||
@@ -83,7 +83,7 @@ class EngineService:
|
||||
)
|
||||
else:
|
||||
self.resource_manager = ResourceManager(
|
||||
cfg.max_num_seqs,
|
||||
cfg.scheduler_config.max_num_seqs,
|
||||
cfg,
|
||||
cfg.parallel_config.tensor_parallel_size,
|
||||
cfg.splitwise_role,
|
||||
@@ -109,7 +109,7 @@ class EngineService:
|
||||
self.partial_chunked_tokens = [0] * (self.cfg.max_num_partial_prefills + 1)
|
||||
for idx in range(1, self.cfg.max_num_partial_prefills + 1):
|
||||
self.partial_chunked_tokens[idx] = (
|
||||
(self.cfg.max_num_batched_tokens // idx)
|
||||
(self.cfg.scheduler_config.max_num_batched_tokens // idx)
|
||||
// self.cfg.cache_config.block_size
|
||||
* self.cfg.cache_config.block_size
|
||||
)
|
||||
@@ -356,7 +356,7 @@ class EngineService:
|
||||
requests_chunk = [[] for _ in range(len(requests))]
|
||||
chunk_request_num = len(current_request_size)
|
||||
while chunk_request_num >= 1:
|
||||
remain_batched_tokens = self.cfg.max_num_batched_tokens
|
||||
remain_batched_tokens = self.cfg.scheduler_config.max_num_batched_tokens
|
||||
for idx in range(len(current_request_size)):
|
||||
if current_request_size[idx] <= 0:
|
||||
continue
|
||||
@@ -496,7 +496,7 @@ class EngineService:
|
||||
available_blocks=self.resource_manager.available_block_num(),
|
||||
block_size=self.cfg.cache_config.block_size,
|
||||
reserved_output_blocks=self.cfg.cache_config.enc_dec_block_num,
|
||||
max_num_batched_tokens=self.cfg.max_num_batched_tokens,
|
||||
max_num_batched_tokens=self.cfg.scheduler_config.max_num_batched_tokens,
|
||||
batch=num_prefill_batch,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user