mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[BugFix] max_lgprobes=-1 maps to ori_vocab_size (#4884)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* -1 ori_vobal_size * check * check * check * revert config.py
This commit is contained in:
@@ -112,10 +112,12 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
self.speculative_method = self.fd_config.speculative_config.method
|
||||
self.speculative_decoding = self.speculative_method is not None
|
||||
self.enable_logprob = fd_config.model_config.enable_logprob
|
||||
self.max_logprobs = fd_config.model_config.max_logprobs
|
||||
self.enable_early_stop = self.fd_config.early_stop_config.enable_early_stop
|
||||
self.is_pooling_model = self.fd_config.model_config.runner_type == "pooling"
|
||||
self.vocal_size = self.fd_config.model_config.vocab_size
|
||||
self.ori_vocab_size = self.fd_config.model_config.ori_vocab_size
|
||||
self.max_logprobs = (
|
||||
self.ori_vocab_size if fd_config.model_config.max_logprobs == -1 else fd_config.model_config.max_logprobs
|
||||
)
|
||||
self.prompt_logprobs_reqs: dict[str, Request] = {}
|
||||
self.in_progress_prompt_logprobs: dict[str, LogprobsTensors] = {}
|
||||
|
||||
@@ -2722,7 +2724,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
if request.prompt_token_ids is None or num_prompt_logprobs is None:
|
||||
continue
|
||||
if num_prompt_logprobs == -1:
|
||||
num_prompt_logprobs = self.vocal_size
|
||||
num_prompt_logprobs = self.ori_vocab_size
|
||||
|
||||
num_tokens = request.prefill_end_index - request.prefill_start_index
|
||||
num_prompt_tokens = len(request.prompt_token_ids)
|
||||
|
||||
Reference in New Issue
Block a user