mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)
* remove max_num_batched_tokens in parallel config * remove max_num_seqs * update test case * fix test * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -607,9 +607,11 @@ class DeepseekV3ForCausalLM(ModelForCasualLM):
|
||||
num_embeddings=fd_config.model_config.vocab_size,
|
||||
prefix="lm_head",
|
||||
)
|
||||
self.position_ids_buffer = paddle.empty([fd_config.parallel_config.max_num_batched_tokens], dtype=paddle.int32)
|
||||
self.position_ids_buffer = paddle.empty(
|
||||
[fd_config.scheduler_config.max_num_batched_tokens], dtype=paddle.int32
|
||||
)
|
||||
self.mask_encoder_batch_buffer = paddle.empty(
|
||||
[fd_config.parallel_config.max_num_batched_tokens, 1], dtype=paddle.int32
|
||||
[fd_config.scheduler_config.max_num_batched_tokens, 1], dtype=paddle.int32
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
Reference in New Issue
Block a user