mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[BugFix] fix parameter is 0 (#3592)
* Update engine_client.py * fix * Update common_engine.py
This commit is contained in:
@@ -592,18 +592,23 @@ class EngineSevice:
|
|||||||
request, insert_task = None, []
|
request, insert_task = None, []
|
||||||
results: List[Tuple[str, Optional[str]]] = list()
|
results: List[Tuple[str, Optional[str]]] = list()
|
||||||
if data:
|
if data:
|
||||||
|
err_msg = None
|
||||||
|
try:
|
||||||
request = Request.from_dict(data)
|
request = Request.from_dict(data)
|
||||||
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
|
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
|
||||||
llm_logger.debug(f"Receive request: {request}")
|
llm_logger.debug(f"Receive request: {request}")
|
||||||
|
except Exception as e:
|
||||||
|
llm_logger.error(f"Receive request error: {e}, {traceback.format_exc()!s}")
|
||||||
|
err_msg = str(e)
|
||||||
|
results.append((data["request_id"], err_msg))
|
||||||
|
|
||||||
err_msg = None
|
if self.guided_decoding_checker is not None and err_msg is None:
|
||||||
if self.guided_decoding_checker is not None:
|
|
||||||
request, err_msg = self.guided_decoding_checker.schema_format(request)
|
request, err_msg = self.guided_decoding_checker.schema_format(request)
|
||||||
|
|
||||||
if err_msg is not None:
|
if err_msg is not None:
|
||||||
llm_logger.error(err_msg)
|
llm_logger.error(f"Receive request error: {err_msg}")
|
||||||
results.append((request.request_id, err_msg))
|
results.append((request.request_id, err_msg))
|
||||||
else:
|
|
||||||
|
if err_msg is None:
|
||||||
insert_task.append(request)
|
insert_task.append(request)
|
||||||
|
|
||||||
response = self.scheduler.put_requests(insert_task)
|
response = self.scheduler.put_requests(insert_task)
|
||||||
@@ -615,6 +620,7 @@ class EngineSevice:
|
|||||||
added_requests[request.request_id] += 1
|
added_requests[request.request_id] += 1
|
||||||
|
|
||||||
for request_id, failed in results:
|
for request_id, failed in results:
|
||||||
|
if request_id in added_requests:
|
||||||
added_requests[request_id] -= 1
|
added_requests[request_id] -= 1
|
||||||
if added_requests[request_id] == 0:
|
if added_requests[request_id] == 0:
|
||||||
added_requests.pop(request_id)
|
added_requests.pop(request_id)
|
||||||
@@ -631,7 +637,7 @@ class EngineSevice:
|
|||||||
)
|
)
|
||||||
# Since the request is not in scheduler
|
# Since the request is not in scheduler
|
||||||
# Send result by zmq directly
|
# Send result by zmq directly
|
||||||
self.zmq_server.send_multipart(request_id, error_result)
|
self.zmq_server.send_multipart(request_id, [error_result])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
llm_logger.error(
|
llm_logger.error(
|
||||||
f"Error happend while receving new request from zmq, details={e}, "
|
f"Error happend while receving new request from zmq, details={e}, "
|
||||||
|
@@ -216,35 +216,35 @@ class EngineClient:
|
|||||||
Validate stream options
|
Validate stream options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if data.get("n"):
|
if data.get("n") is not None:
|
||||||
if data["n"] != 1:
|
if data["n"] != 1:
|
||||||
raise ValueError("n only support 1.")
|
raise ValueError("n only support 1.")
|
||||||
|
|
||||||
if data.get("max_tokens"):
|
if data.get("max_tokens") is not None:
|
||||||
if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len:
|
if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len:
|
||||||
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
|
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
|
||||||
|
|
||||||
if data.get("reasoning_max_tokens"):
|
if data.get("reasoning_max_tokens") is not None:
|
||||||
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1:
|
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1:
|
||||||
raise ValueError("reasoning_max_tokens must be between max_tokens and 1")
|
raise ValueError("reasoning_max_tokens must be between max_tokens and 1")
|
||||||
|
|
||||||
if data.get("top_p"):
|
if data.get("top_p") is not None:
|
||||||
if data["top_p"] > 1 or data["top_p"] < 0:
|
if data["top_p"] > 1 or data["top_p"] < 0:
|
||||||
raise ValueError("top_p value can only be defined [0, 1].")
|
raise ValueError("top_p value can only be defined [0, 1].")
|
||||||
|
|
||||||
if data.get("frequency_penalty"):
|
if data.get("frequency_penalty") is not None:
|
||||||
if not -2.0 <= data["frequency_penalty"] <= 2.0:
|
if not -2.0 <= data["frequency_penalty"] <= 2.0:
|
||||||
raise ValueError("frequency_penalty must be in [-2, 2]")
|
raise ValueError("frequency_penalty must be in [-2, 2]")
|
||||||
|
|
||||||
if data.get("temperature"):
|
if data.get("temperature") is not None:
|
||||||
if data["temperature"] < 0:
|
if data["temperature"] < 0:
|
||||||
raise ValueError("temperature must be non-negative")
|
raise ValueError("temperature must be non-negative")
|
||||||
|
|
||||||
if data.get("presence_penalty"):
|
if data.get("presence_penalty") is not None:
|
||||||
if not -2.0 <= data["presence_penalty"] <= 2.0:
|
if not -2.0 <= data["presence_penalty"] <= 2.0:
|
||||||
raise ValueError("presence_penalty must be in [-2, 2]")
|
raise ValueError("presence_penalty must be in [-2, 2]")
|
||||||
|
|
||||||
if data.get("seed"):
|
if data.get("seed") is not None:
|
||||||
if not 0 <= data["seed"] <= 922337203685477580:
|
if not 0 <= data["seed"] <= 922337203685477580:
|
||||||
raise ValueError("seed must be in [0, 922337203685477580]")
|
raise ValueError("seed must be in [0, 922337203685477580]")
|
||||||
|
|
||||||
|
@@ -380,9 +380,6 @@ def test_max_tokens_min():
|
|||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
assert resp.get("detail").get("object") == "error", "max_tokens未0时API未拦截住"
|
assert resp.get("detail").get("object") == "error", "max_tokens未0时API未拦截住"
|
||||||
assert "reasoning_max_tokens must be between max_tokens and 1" in resp.get("detail").get(
|
|
||||||
"message", ""
|
|
||||||
), "未返回预期的 max_tokens 达到异常值0 的 错误信息"
|
|
||||||
|
|
||||||
|
|
||||||
def test_max_tokens_non_integer():
|
def test_max_tokens_non_integer():
|
||||||
|
Reference in New Issue
Block a user