mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Support return logprob of generated tokens (#2784)
* online chat support logprobs * check xpu * check vl_gpu_model_runner * only cuda support logprob * get_worker() check platform --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -296,6 +296,12 @@ class EngineArgs:
|
||||
max_capture_batch_size=64, FastDeploy will capture graphs for batches [1,64].
|
||||
"""
|
||||
|
||||
enable_logprob: bool = False
|
||||
"""
|
||||
Flag to enable logprob output. Default is False (disabled).
|
||||
Must be explicitly enabled via the `--enable-logprob` startup parameter to output logprob values.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
"""
|
||||
Post-initialization processing to set default tokenizer if not provided.
|
||||
@@ -416,6 +422,11 @@ class EngineArgs:
|
||||
help=
|
||||
"Disabled any whitespaces when using guided decoding backend XGrammar."
|
||||
)
|
||||
model_group.add_argument("--enable-logprob",
|
||||
action="store_true",
|
||||
default=EngineArgs.enable_logprob,
|
||||
help="Enable output of token-level log probabilities."
|
||||
)
|
||||
|
||||
# Parallel processing parameters group
|
||||
parallel_group = parser.add_argument_group("Parallel Configuration")
|
||||
@@ -791,4 +802,5 @@ class EngineArgs:
|
||||
max_capture_batch_size=self.max_capture_batch_size,
|
||||
guided_decoding_backend=self.guided_decoding_backend,
|
||||
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
|
||||
enable_logprob = self.enable_logprob,
|
||||
)
|
||||
|
Reference in New Issue
Block a user