[Feature][Executor] GPU Model Runner Supports prompt_logprobs and max_logprobs (#4769)

This commit is contained in:
chen
2025-11-05 10:43:25 +08:00
committed by GitHub
parent 74722308f2
commit 1c3ca48128
13 changed files with 203 additions and 22 deletions

View File

@@ -20,6 +20,8 @@ from typing import Optional
import numpy as np
from fastdeploy.worker.output import LogprobsTensors
class DecoderState(Enum):
"""DecoderState"""
@@ -38,7 +40,8 @@ class StreamTransferData:
batch_id: int
tokens: Optional[np.array] = None
speculaive_decoding: bool = False
logprobs: Optional[np.array] = None
logprobs: Optional[LogprobsTensors] = None
prompt_logprobs: Optional[LogprobsTensors] = None
accept_tokens: Optional[np.array] = None
accept_num: Optional[np.array] = None
# [num_reqs, hidden_size]