mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[Feature] General support for logprobs (#2974)
* [Feature] support logprobs in chat/completions and completions endpoints * Temporarily comment out text_offset due to incorrect logic * Clean up temporary debug prints * [Feature] support logprobs in offline mode via SamplingParams * fix: serialize Logprob as dict before zmq send to fix msgpack error * refactor: remove redundant methods to simplify codebase * Fix missing fields in CompletionOutput.to_dict affecting msgpack serialization * refactor: centralize param validation in engine_client to reduce duplication * revert: rollback changes in offline_demo.py * revert: rollback changes in offline_demo.py * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -25,7 +25,7 @@ import numpy as np
|
||||
|
||||
from fastdeploy.engine.sampling_params import SamplingParams
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
from fastdeploy.worker.output import LogprobsLists, SampleLogprobs
|
||||
|
||||
|
||||
class RequestStatus(Enum):
|
||||
@@ -245,6 +245,7 @@ class CompletionOutput:
|
||||
token_ids: list[int]
|
||||
logprob: Optional[float] = None
|
||||
top_logprobs: Optional[LogprobsLists] = None
|
||||
logprobs: Optional[SampleLogprobs] = None
|
||||
draft_token_ids: list[int] = None
|
||||
text: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
@@ -259,6 +260,7 @@ class CompletionOutput:
|
||||
"token_ids": self.token_ids,
|
||||
"logprob": self.logprob,
|
||||
"top_logprobs": self.top_logprobs,
|
||||
"logprobs": self.logprobs,
|
||||
"draft_token_ids": self.draft_token_ids,
|
||||
"text": self.text,
|
||||
"reasoning_content": self.reasoning_content,
|
||||
@@ -281,7 +283,8 @@ class CompletionOutput:
|
||||
f"text={self.text!r}, "
|
||||
f"token_ids={self.token_ids}, "
|
||||
f"draft_token_ids={self.draft_token_ids}, "
|
||||
f"reasoning_content={self.reasoning_content!r}"
|
||||
f"reasoning_content={self.reasoning_content!r}, "
|
||||
f"logprobs={self.logprobs}, "
|
||||
)
|
||||
|
||||
|
||||
@@ -390,16 +393,20 @@ class RequestOutput:
|
||||
|
||||
def add(self, next_output: RequestOutput) -> None:
|
||||
"""Merge RequestOutput into this one"""
|
||||
|
||||
self.prompt = next_output.prompt
|
||||
self.prompt_token_ids = next_output.prompt_token_ids
|
||||
self.finished |= next_output.finished
|
||||
self.outputs.index = next_output.outputs.index
|
||||
self.outputs.token_ids.extend(next_output.outputs.token_ids)
|
||||
|
||||
if next_output.metrics.arrival_time is not None and self.metrics.inference_start_time is not None:
|
||||
self.metrics.model_forward_time = next_output.metrics.arrival_time - self.metrics.inference_start_time
|
||||
if next_output.metrics.arrival_time is not None and self.metrics.arrival_time is not None:
|
||||
self.metrics.model_execute_time = next_output.metrics.arrival_time - self.metrics.arrival_time
|
||||
if next_output.outputs.top_logprobs is not None:
|
||||
self.outputs.top_logprobs.logprob_token_ids.extend(next_output.outputs.top_logprobs.logprob_token_ids)
|
||||
self.outputs.top_logprobs.logprobs.extend(next_output.outputs.top_logprobs.logprobs)
|
||||
self.outputs.top_logprobs.sampled_token_ranks.extend(next_output.outputs.top_logprobs.sampled_token_ranks)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
@@ -407,8 +414,9 @@ class RequestOutput:
|
||||
f"prompt={self.prompt!r}, "
|
||||
f"prompt_token_ids={self.prompt_token_ids}, "
|
||||
f"outputs={self.outputs}, "
|
||||
f"finished={self.finished}, "
|
||||
f"num_cached_tokens={self.num_cached_tokens}, "
|
||||
f"metrics={self.metrics}, "
|
||||
f"num_cached_tokens={self.num_cached_tokens})"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
Reference in New Issue
Block a user