feat(log):add_request_and_response_log (#3392)

2025-10-05 16:48:03 +08:00 · 2025-08-14 14:50:48 +08:00
parent dc5d3ff5a0
commit d1d321bafd
3 changed files with 26 additions and 3 deletions
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -239,6 +239,7 @@ class OpenAIServingChat:
                                    prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=num_cached_tokens),
                                )
                            yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
+                            api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
                        first_iteration = False

                    output = res["outputs"]
@@ -265,6 +266,7 @@ class OpenAIServingChat:
                        logprobs=logprobs_res,
                        arrival_time=arrival_time,
                    )
+
                    if res["finished"]:
                        num_choices -= 1
                        work_process_metrics.e2e_request_latency.observe(
@@ -299,6 +301,9 @@ class OpenAIServingChat:
                    if len(choices) == max_streaming_response_tokens or res["finished"]:
                        chunk.choices = choices
                        yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
+                        # 打印尾包
+                        if res["finished"]:
+                            api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
                        choices = []

                if choices:
@@ -452,13 +457,15 @@ class OpenAIServingChat:
            prompt_tokens_details=PromptTokenUsageInfo(cached_tokens=final_res.get("num_cached_tokens", 0)),
        )
        work_process_metrics.e2e_request_latency.observe(time.time() - final_res["metrics"]["request_start_time"])
-        return ChatCompletionResponse(
+        res = ChatCompletionResponse(
            id=request_id,
            created=created_time,
            model=model_name,
            choices=choices,
            usage=usage,
        )
+        api_server_logger.info(f"Chat response: {res.model_dump_json()}")
+        return res

    def _create_chat_logprobs(
        self,