[Feature][Executor] GPU Model Runner Supports prompt_logprobs and max_logprobs (#4769)

2025-12-24 13:28:13 +08:00 · 2025-11-05 10:43:25 +08:00
parent 74722308f2
commit 1c3ca48128
13 changed files with 203 additions and 22 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -736,6 +736,12 @@ def parse_args():
        action="store_true",
        help="Enable output of token-level log probabilities.",
    )
+    parser.add_argument(
+        "--max_logprobs",
+        type=int,
+        default=20,
+        help="Maximum number of log probabilities.",
+    )
    parser.add_argument(
        "--logprobs_mode",
        type=str,