From 6a90cfd1447a47a6b866e80751d33d4232a1fade Mon Sep 17 00:00:00 2001 From: ltd0924 <32387785+ltd0924@users.noreply.github.com> Date: Thu, 28 Aug 2025 09:52:17 +0800 Subject: [PATCH] [BugFix] fix parameter is 0 (#3663) * Update engine.py * Update engine_client.py --- fastdeploy/engine/engine.py | 41 ++++++++++++++----------- fastdeploy/entrypoints/engine_client.py | 16 +++++----- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 91661f297..352217fc8 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -400,10 +400,10 @@ class LLMEngine: while self.running: try: block = True if len(added_requests) == 0 else False - if not self.cfg.enable_mm: - err, data = self.recv_request_server.receive_json_once(block) + if not self.cfg.model_config.enable_mm: + err, data = self.zmq_server.receive_json_once(block) else: - err, data = self.recv_request_server.receive_pyobj_once(block) + err, data = self.zmq_server.receive_pyobj_once(block) if err is not None: llm_logger.error("Engine stops inserting zmq task into scheduler, err:{err}") break @@ -411,19 +411,23 @@ class LLMEngine: request, insert_task = None, [] results: List[Tuple[str, Optional[str]]] = list() if data: - request = Request.from_dict(data) - start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER) - - llm_logger.debug(f"Receive request: {request}") - err_msg = None - if self.guided_decoding_checker is not None: - request, err_msg = self.guided_decoding_checker.schema_format(request) + try: + request = Request.from_dict(data) + start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER) + llm_logger.debug(f"Receive request: {request}") + except Exception as e: + llm_logger.error(f"Receive request error: {e}, {traceback.format_exc()!s}") + err_msg = str(e) + results.append((data["request_id"], err_msg)) - if err_msg is not None: - llm_logger.error(err_msg) - results.append((request.request_id, err_msg)) - else: + if self.guided_decoding_checker is not None and err_msg is None: + request, err_msg = self.guided_decoding_checker.schema_format(request) + if err_msg is not None: + llm_logger.error(f"Receive request error: {err_msg}") + results.append((request.request_id, err_msg)) + + if err_msg is None: insert_task.append(request) response = self.scheduler.put_requests(insert_task) @@ -435,9 +439,10 @@ class LLMEngine: added_requests[request.request_id] += 1 for request_id, failed in results: - added_requests[request_id] -= 1 - if added_requests[request_id] == 0: - added_requests.pop(request_id) + if request_id in added_requests: + added_requests[request_id] -= 1 + if added_requests[request_id] == 0: + added_requests.pop(request_id) if failed is None: main_process_metrics.num_requests_waiting.inc(1) @@ -451,7 +456,7 @@ class LLMEngine: ) # Since the request is not in scheduler # Send result by zmq directly - self.send_response_server.send_response(request_id, [error_result]) + self.zmq_server.send_multipart(request_id, [error_result]) except Exception as e: llm_logger.error( f"Error happend while receving new request from zmq, details={e}, " diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index 2d4b61b1f..0e29692f0 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -190,35 +190,35 @@ class EngineClient: Validate stream options """ - if data.get("n"): + if data.get("n") is not None: if data["n"] != 1: raise ValueError("n only support 1.") - if data.get("max_tokens"): + if data.get("max_tokens") is not None: if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len: raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).") - if data.get("reasoning_max_tokens"): + if data.get("reasoning_max_tokens") is not None: if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1: raise ValueError("reasoning_max_tokens must be between max_tokens and 1") - if data.get("top_p"): + if data.get("top_p") is not None: if data["top_p"] > 1 or data["top_p"] < 0: raise ValueError("top_p value can only be defined [0, 1].") - if data.get("frequency_penalty"): + if data.get("frequency_penalty") is not None: if not -2.0 <= data["frequency_penalty"] <= 2.0: raise ValueError("frequency_penalty must be in [-2, 2]") - if data.get("temperature"): + if data.get("temperature") is not None: if data["temperature"] < 0: raise ValueError("temperature must be non-negative") - if data.get("presence_penalty"): + if data.get("presence_penalty") is not None: if not -2.0 <= data["presence_penalty"] <= 2.0: raise ValueError("presence_penalty must be in [-2, 2]") - if data.get("seed"): + if data.get("seed") is not None: if not 0 <= data["seed"] <= 922337203685477580: raise ValueError("seed must be in [0, 922337203685477580]")