From 66c5addce4f06f4afd2c472570b2ac405a6b8166 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Mon, 25 Aug 2025 21:13:04 +0800 Subject: [PATCH] [Bugfix] fix api server control signal bugs (#3531) * Update serving_chat.py * Update serving_completion.py * Update serving_completion.py --- fastdeploy/entrypoints/openai/serving_chat.py | 1 + .../entrypoints/openai/serving_completion.py | 20 +++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index 05bd57183..c65f8c38d 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -115,6 +115,7 @@ class OpenAIServingChat: except Exception as e: error_msg = f"request[{request_id}] generator error: {str(e)}, {str(traceback.format_exc())}" api_server_logger.error(error_msg) + self.engine_client.semaphore.release() return ErrorResponse(code=400, message=error_msg) del current_req_dict diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index 704330373..75f890c44 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -128,22 +128,22 @@ class OpenAIServingCompletion: return ErrorResponse(code=408, message=error_msg) try: - for idx, prompt in enumerate(request_prompts): - request_id_idx = f"{request_id}-{idx}" - current_req_dict = request.to_dict_for_infer(request_id_idx, prompt) - try: + try: + for idx, prompt in enumerate(request_prompts): + request_id_idx = f"{request_id}-{idx}" + current_req_dict = request.to_dict_for_infer(request_id_idx, prompt) current_req_dict["arrival_time"] = time.time() prompt_token_ids = self.engine_client.format_and_add_data(current_req_dict) if isinstance(prompt_token_ids, np.ndarray): prompt_token_ids = prompt_token_ids.tolist() text_after_process_list.append(current_req_dict.get("text_after_process")) prompt_batched_token_ids.append(prompt_token_ids) - except Exception as e: - error_msg = f"OpenAIServingCompletion format error: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) - return ErrorResponse(message=str(e), code=400) - - del current_req_dict + del current_req_dict + except Exception as e: + error_msg = f"OpenAIServingCompletion format error: {e}, {str(traceback.format_exc())}" + api_server_logger.error(error_msg) + self.engine_client.semaphore.release() + return ErrorResponse(message=str(e), code=400) if request.stream: return self.completion_stream_generator(