feat(log):add_request_and_response_log (#3373)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
xiaolei373
2025-08-13 23:27:41 +08:00
committed by GitHub
parent 396dba0d62
commit d4f610e4cd
3 changed files with 26 additions and 3 deletions

View File

@@ -251,6 +251,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
""" """
Create a chat completion for the provided prompt and parameters. Create a chat completion for the provided prompt and parameters.
""" """
api_server_logger.info(f"Chat Received request: {request.model_dump_json()}")
if app.state.dynamic_load_weight: if app.state.dynamic_load_weight:
status, msg = app.state.engine_client.is_workers_alive() status, msg = app.state.engine_client.is_workers_alive()
if not status: if not status:
@@ -279,6 +280,7 @@ async def create_completion(request: CompletionRequest):
""" """
Create a completion for the provided prompt and parameters. Create a completion for the provided prompt and parameters.
""" """
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
if app.state.dynamic_load_weight: if app.state.dynamic_load_weight:
status, msg = app.state.engine_client.is_workers_alive() status, msg = app.state.engine_client.is_workers_alive()
if not status: if not status:

View File

@@ -239,6 +239,7 @@ class OpenAIServingChat:
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens), prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens),
) )
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n" yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
first_iteration = False first_iteration = False
output = res["outputs"] output = res["outputs"]
@@ -273,6 +274,7 @@ class OpenAIServingChat:
logprobs=logprobs_res, logprobs=logprobs_res,
arrival_time=arrival_time, arrival_time=arrival_time,
) )
if res["finished"]: if res["finished"]:
num_choices -= 1 num_choices -= 1
work_process_metrics.e2e_request_latency.observe( work_process_metrics.e2e_request_latency.observe(
@@ -304,6 +306,9 @@ class OpenAIServingChat:
if len(choices) == max_streaming_response_tokens or res["finished"]: if len(choices) == max_streaming_response_tokens or res["finished"]:
chunk.choices = choices chunk.choices = choices
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
# 打印尾包
if res["finished"]:
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
choices = [] choices = []
if choices: if choices:
@@ -456,13 +461,15 @@ class OpenAIServingChat:
prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)), prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)),
) )
work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"]) work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"])
return ChatCompletionResponse( res = ChatCompletionResponse(
id=request_id, id=request_id,
created=created_time, created=created_time,
model=model_name, model=model_name,
choices=choices, choices=choices,
usage=usage, usage=usage,
) )
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
return res
def _create_chat_logprobs( def _create_chat_logprobs(
self, self,

View File

@@ -221,8 +221,7 @@ class OpenAIServingCompletion:
valid_results[rid] = data valid_results[rid] = data
num_choices -= 1 num_choices -= 1
break break
res = self.request_output_to_completion_response(
return self.request_output_to_completion_response(
final_res_batch=valid_results, final_res_batch=valid_results,
request=request, request=request,
request_id=request_id, request_id=request_id,
@@ -232,6 +231,8 @@ class OpenAIServingCompletion:
completion_batched_token_ids=completion_batched_token_ids, completion_batched_token_ids=completion_batched_token_ids,
text_after_process_list=text_after_process_list, text_after_process_list=text_after_process_list,
) )
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
return res
except Exception as e: except Exception as e:
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True) api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
raise raise
@@ -323,6 +324,9 @@ class OpenAIServingCompletion:
], ],
) )
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
api_server_logger.info(
f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}"
)
first_iteration[idx] = False first_iteration[idx] = False
self.engine_client.data_processor.process_response_dict( self.engine_client.data_processor.process_response_dict(
@@ -376,6 +380,15 @@ class OpenAIServingCompletion:
choices[-1].finish_reason = self.calc_finish_reason( choices[-1].finish_reason = self.calc_finish_reason(
request.max_tokens, output_tokens[idx], output, tool_called request.max_tokens, output_tokens[idx], output, tool_called
) )
send_idx = output.get("send_idx")
# 只有当 send_idx 明确为 0 时才记录日志
if send_idx == 0 and not request.return_token_ids:
chunk_temp = chunk
chunk_temp.choices = choices
api_server_logger.info(
f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}"
)
del chunk_temp
if len(choices) == max_streaming_response_tokens or res["finished"]: if len(choices) == max_streaming_response_tokens or res["finished"]:
chunk = CompletionStreamResponse( chunk = CompletionStreamResponse(
@@ -402,6 +415,7 @@ class OpenAIServingCompletion:
), ),
) )
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n" yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
if choices: if choices:
chunk.choices = choices chunk.choices = choices
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"