mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[Feature] Support return logprob of generated tokens (#2784)
* online chat support logprobs * check xpu * check vl_gpu_model_runner * only cuda support logprob * get_worker() check platform --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -1068,6 +1068,7 @@ class LLMEngine(object):
|
||||
self.cfg.enable_static_graph_inference,
|
||||
"use_cudagraph": self.cfg.use_cudagraph,
|
||||
"disable_any_whitespace": self.cfg.disable_any_whitespace,
|
||||
"enable_logprob": self.cfg.enable_logprob,
|
||||
}
|
||||
for worker_flag, value in worker_append_flag.items():
|
||||
if value:
|
||||
|
Reference in New Issue
Block a user