[Bugfix] Fix uninitialized decoded_token and add corresponding unit test. (#3195)

This commit is contained in:
SunLei
2025-08-04 19:23:58 +08:00
committed by GitHub
parent 01d7586661
commit 68bc1d12c0
2 changed files with 84 additions and 1 deletions

View File

@@ -289,6 +289,10 @@ class LLM:
self.llm_engine.add_requests(tasks, current_sampling_params, enable_thinking=enable_thinking)
return req_ids
def _decode_token(self, token_id: int) -> str:
"""Decodes a single token ID into its string representation."""
return self.llm_engine.data_processor.process_logprob_response([token_id], clean_up_tokenization_spaces=False)
def _build_sample_logprobs(self, logprobs_lists: LogprobsLists, topk_logprobs: int) -> list[dict[int, Logprob]]:
"""
Constructs a list of dictionaries mapping token IDs to Logprob objects,
@@ -322,8 +326,9 @@ class LLM:
sliced_logprobs_lists = logprobs_lists.slice_columns(1, 1 + effective_topk_logprobs)
result = []
for token_ids, logprobs in zip(sliced_logprobs_lists.logprob_token_ids, sliced_logprobs_lists.logprobs):
logprob_dict = {
token_id: Logprob(logprob=logprob, rank=i + 1, decoded_token=None)
token_id: Logprob(logprob=logprob, rank=i + 1, decoded_token=self._decode_token(token_id))
for i, (token_id, logprob) in enumerate(zip(token_ids, logprobs))
}
result.append(logprob_dict)