feat(log):add_request_and_response_log (#3373)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
xiaolei373
2025-08-13 23:27:41 +08:00
committed by GitHub
parent 396dba0d62
commit d4f610e4cd
3 changed files with 26 additions and 3 deletions

View File

@@ -239,6 +239,7 @@ class OpenAIServingChat:
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens),
)
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
first_iteration = False
output = res["outputs"]
@@ -273,6 +274,7 @@ class OpenAIServingChat:
logprobs=logprobs_res,
arrival_time=arrival_time,
)
if res["finished"]:
num_choices -= 1
work_process_metrics.e2e_request_latency.observe(
@@ -304,6 +306,9 @@ class OpenAIServingChat:
if len(choices) == max_streaming_response_tokens or res["finished"]:
chunk.choices = choices
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
# 打印尾包
if res["finished"]:
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
choices = []
if choices:
@@ -456,13 +461,15 @@ class OpenAIServingChat:
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)),
)
work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"])
return ChatCompletionResponse(
res = ChatCompletionResponse(
id=request_id,
created=created_time,
model=model_name,
choices=choices,
usage=usage,
)
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
return res
def _create_chat_logprobs(
self,