mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[PD Disaggregation] Add timestamp for analyzing splitwise deployment (#5317)
* Add timestamp for analyzing splitwise deployment * up * up * up * up * up * up * fix format * fix
This commit is contained in:
@@ -308,6 +308,7 @@ class ChatCompletionStreamResponse(BaseModel):
|
||||
model: str
|
||||
choices: List[ChatCompletionResponseStreamChoice]
|
||||
usage: Optional[UsageInfo] = None
|
||||
metrics: Optional[Dict] = None
|
||||
|
||||
|
||||
class CompletionResponseChoice(BaseModel):
|
||||
@@ -387,6 +388,7 @@ class CompletionStreamResponse(BaseModel):
|
||||
model: str
|
||||
choices: List[CompletionResponseStreamChoice]
|
||||
usage: Optional[UsageInfo] = None
|
||||
metrics: Optional[Dict] = None
|
||||
|
||||
|
||||
class StreamOptions(BaseModel):
|
||||
@@ -499,6 +501,8 @@ class CompletionRequest(BaseModel):
|
||||
mm_hashes: Optional[list] = None
|
||||
# doc: end-completion-extra-params
|
||||
|
||||
collect_metrics: Optional[bool] = False
|
||||
|
||||
def to_dict_for_infer(self, request_id=None, prompt=None):
|
||||
"""
|
||||
Convert the request parameters into a dictionary
|
||||
@@ -672,6 +676,8 @@ class ChatCompletionRequest(BaseModel):
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
# doc: end-chat-completion-extra-params
|
||||
|
||||
collect_metrics: Optional[bool] = False
|
||||
|
||||
def to_dict_for_infer(self, request_id=None):
|
||||
"""
|
||||
Convert the request parameters into a dictionary
|
||||
|
||||
Reference in New Issue
Block a user