From 4d6fb96cd66c448ee91d54782698999650311ee4 Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:01:14 +0800 Subject: [PATCH] [BugFix] Api server bugs (#3530) * Update serving_chat.py * Update serving_completion.py * Update serving_completion.py --- fastdeploy/entrypoints/openai/serving_chat.py | 1 + .../entrypoints/openai/serving_completion.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index e2f023340..292bf1e8b 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -99,6 +99,7 @@ class OpenAIServingChat: if isinstance(prompt_token_ids, np.ndarray): prompt_token_ids = prompt_token_ids.tolist() except Exception as e: + self.engine_client.semaphore.release() return ErrorResponse(code=400, message=str(e)) del current_req_dict diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index 7c20bb511..01fc8792c 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -109,20 +109,20 @@ class OpenAIServingCompletion: except Exception: return ErrorResponse(code=408, message=f"Request queued time exceed {self.max_waiting_time}") try: - for idx, prompt in enumerate(request_prompts): - request_id_idx = f"{request_id}-{idx}" - current_req_dict = request.to_dict_for_infer(request_id_idx, prompt) - try: + try: + for idx, prompt in enumerate(request_prompts): + request_id_idx = f"{request_id}-{idx}" + current_req_dict = request.to_dict_for_infer(request_id_idx, prompt) current_req_dict["arrival_time"] = time.time() prompt_token_ids = self.engine_client.format_and_add_data(current_req_dict) if isinstance(prompt_token_ids, np.ndarray): prompt_token_ids = prompt_token_ids.tolist() text_after_process_list.append(current_req_dict.get("text_after_process")) prompt_batched_token_ids.append(prompt_token_ids) - except Exception as e: - return ErrorResponse(message=str(e), code=400) - - del current_req_dict + del current_req_dict + except Exception as e: + self.engine_client.semaphore.release() + return ErrorResponse(message=str(e), code=400) if request.stream: return self.completion_stream_generator(