[PD Disaggregation] Add timestamp for analyzing splitwise deployment (#5317)

* Add timestamp for analyzing splitwise deployment

* up

* up

* up

* up

* up

* up

* fix format

* fix
This commit is contained in:
Juncai
2025-12-08 10:08:44 +08:00
committed by GitHub
parent 0c66163dfd
commit 80efe98f8d
21 changed files with 287 additions and 188 deletions

View File

@@ -308,6 +308,7 @@ class ChatCompletionStreamResponse(BaseModel):
model: str
choices: List[ChatCompletionResponseStreamChoice]
usage: Optional[UsageInfo] = None
metrics: Optional[Dict] = None
class CompletionResponseChoice(BaseModel):
@@ -387,6 +388,7 @@ class CompletionStreamResponse(BaseModel):
model: str
choices: List[CompletionResponseStreamChoice]
usage: Optional[UsageInfo] = None
metrics: Optional[Dict] = None
class StreamOptions(BaseModel):
@@ -499,6 +501,8 @@ class CompletionRequest(BaseModel):
mm_hashes: Optional[list] = None
# doc: end-completion-extra-params
collect_metrics: Optional[bool] = False
def to_dict_for_infer(self, request_id=None, prompt=None):
"""
Convert the request parameters into a dictionary
@@ -672,6 +676,8 @@ class ChatCompletionRequest(BaseModel):
completion_token_ids: Optional[List[int]] = None
# doc: end-chat-completion-extra-params
collect_metrics: Optional[bool] = False
def to_dict_for_infer(self, request_id=None):
"""
Convert the request parameters into a dictionary