[Feature] Support return logprob of generated tokens (#2784)

* online chat support logprobs

* check xpu

* check vl_gpu_model_runner

* only cuda support logprob

* get_worker() check platform

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
chen
2025-07-10 15:47:42 +08:00
committed by GitHub
parent 39d2a1de46
commit 823a47e64a
21 changed files with 592 additions and 105 deletions

View File

@@ -585,6 +585,7 @@ class Config:
max_capture_batch_size: int = 64,
guided_decoding_backend: Optional[str] = None,
disable_any_whitespace: bool = False,
enable_logprob: bool = False,
):
"""
Initialize the Config class.
@@ -678,6 +679,8 @@ class Config:
self.parallel_config.expert_parallel_size), 8))])
self.device_ids = os.getenv("CUDA_VISIBLE_DEVICES", self.device_ids)
self.enable_logprob = enable_logprob
self.read_from_config()
self.postprocess()
self.check()