[BugFix] max_lgprobes=-1 maps to ori_vocab_size (#4884)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* -1 ori_vobal_size

* check

* check

* check

* revert config.py
This commit is contained in:
chen
2025-11-07 22:15:40 +08:00
committed by GitHub
parent 6de1ce3b25
commit 80aedb82ce

View File

@@ -112,10 +112,12 @@ class GPUModelRunner(ModelRunnerBase):
self.speculative_method = self.fd_config.speculative_config.method
self.speculative_decoding = self.speculative_method is not None
self.enable_logprob = fd_config.model_config.enable_logprob
self.max_logprobs = fd_config.model_config.max_logprobs
self.enable_early_stop = self.fd_config.early_stop_config.enable_early_stop
self.is_pooling_model = self.fd_config.model_config.runner_type == "pooling"
self.vocal_size = self.fd_config.model_config.vocab_size
self.ori_vocab_size = self.fd_config.model_config.ori_vocab_size
self.max_logprobs = (
self.ori_vocab_size if fd_config.model_config.max_logprobs == -1 else fd_config.model_config.max_logprobs
)
self.prompt_logprobs_reqs: dict[str, Request] = {}
self.in_progress_prompt_logprobs: dict[str, LogprobsTensors] = {}
@@ -2722,7 +2724,7 @@ class GPUModelRunner(ModelRunnerBase):
if request.prompt_token_ids is None or num_prompt_logprobs is None:
continue
if num_prompt_logprobs == -1:
num_prompt_logprobs = self.vocal_size
num_prompt_logprobs = self.ori_vocab_size
num_tokens = request.prefill_end_index - request.prefill_start_index
num_prompt_tokens = len(request.prompt_token_ids)