mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature][Executor] GPU Model Runner Supports prompt_logprobs and max_logprobs (#4769)
This commit is contained in:
@@ -20,6 +20,8 @@ from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.worker.output import LogprobsTensors
|
||||
|
||||
|
||||
class DecoderState(Enum):
|
||||
"""DecoderState"""
|
||||
@@ -38,7 +40,8 @@ class StreamTransferData:
|
||||
batch_id: int
|
||||
tokens: Optional[np.array] = None
|
||||
speculaive_decoding: bool = False
|
||||
logprobs: Optional[np.array] = None
|
||||
logprobs: Optional[LogprobsTensors] = None
|
||||
prompt_logprobs: Optional[LogprobsTensors] = None
|
||||
accept_tokens: Optional[np.array] = None
|
||||
accept_num: Optional[np.array] = None
|
||||
# [num_reqs, hidden_size]
|
||||
|
||||
Reference in New Issue
Block a user