[Feature] Online Chat API Support Return logprobs (#2777)

* online chat support logprobs

* check xpu

* check vl_gpu_model_runner and xpu_model_runner

* get_worker() check platform
This commit is contained in:
chen
2025-07-10 16:33:40 +08:00
committed by GitHub
parent 24f934f1f9
commit d33105baeb
22 changed files with 608 additions and 114 deletions

View File

@@ -299,6 +299,12 @@ class EngineArgs:
max_capture_batch_size=64, FastDeploy will capture graphs for batches [1,64].
"""
enable_logprob: bool = False
"""
Flag to enable logprob output. Default is False (disabled).
Must be explicitly enabled via the `--enable-logprob` startup parameter to output logprob values.
"""
def __post_init__(self):
"""
Post-initialization processing to set default tokenizer if not provided.
@@ -419,6 +425,11 @@ class EngineArgs:
help=
"Disabled any whitespaces when using guided decoding backend XGrammar."
)
model_group.add_argument("--enable-logprob",
action="store_true",
default=EngineArgs.enable_logprob,
help="Enable output of token-level log probabilities."
)
# Parallel processing parameters group
parallel_group = parser.add_argument_group("Parallel Configuration")
@@ -799,4 +810,5 @@ class EngineArgs:
guided_decoding_backend=self.guided_decoding_backend,
disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
enable_custom_all_reduce=self.enable_custom_all_reduce,
enable_logprob = self.enable_logprob,
)