[Feature] Support return logprob of generated tokens (#2784)

* online chat support logprobs * check xpu * check vl_gpu_model_runner * only cuda support logprob * get_worker() check platform --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
2025-10-05 08:37:06 +08:00 · 2025-07-10 15:47:42 +08:00
parent 39d2a1de46
commit 823a47e64a
21 changed files with 592 additions and 105 deletions
--- a/fastdeploy/engine/config.py
+++ b/fastdeploy/engine/config.py
@@ -585,6 +585,7 @@ class Config:
        max_capture_batch_size: int = 64,
        guided_decoding_backend: Optional[str] = None,
        disable_any_whitespace: bool = False,
+        enable_logprob: bool = False,
    ):
        """
        Initialize the Config class.
@@ -678,6 +679,8 @@ class Config:
                                        self.parallel_config.expert_parallel_size), 8))])
        self.device_ids = os.getenv("CUDA_VISIBLE_DEVICES", self.device_ids)

+        self.enable_logprob = enable_logprob
+
        self.read_from_config()
        self.postprocess()
        self.check()