Launch expert_service before kv_cache initialization in worker_process (#3045)

* launch expert_service before kv_cache initialization * add two signal make sure model loading and expert_service lauching finished * fix the EP bug * fix ep * update launching way * fix ep * update * roback ep * pre-commit all files --------- Co-authored-by: RAM <gstian5555@outlook.com> Co-authored-by: Divano <dddivano@outlook.com>
2025-10-05 08:37:06 +08:00 · 2025-08-11 19:38:46 +08:00
parent c27a3dc43b
commit b23af29d0b
6 changed files with 175 additions and 100 deletions
--- a/test/ce/server/test_params_boundary.py
+++ b/test/ce/server/test_params_boundary.py
@@ -7,23 +7,16 @@
 Boundary value checking for API parameters
 """

-import json

-from core import (
-    TEMPLATE,
-    URL,
-    build_request_payload,
-    send_request,
-)
+from core import TEMPLATE, URL, build_request_payload, send_request
+

 def test_max_min_1_token():
    data = {
        "stream": False,
        "messages": [{"role": "user", "content": "非洲的首都是？"}],
        "max_tokens": 1,
-        "metadata": {
-                "min_tokens": 1
-            },
+        "metadata": {"min_tokens": 1},
    }
    payload = build_request_payload(TEMPLATE, data)
    response = send_request(URL, payload).json()
@@ -33,4 +26,4 @@ def test_max_min_1_token():
    completion_tokens = response["usage"]["completion_tokens"]
    assert completion_tokens == 1, f"实际生成的token数为: {completion_tokens}, 应该为1"
    finish_reason = response["choices"][0]["finish_reason"]
-    assert finish_reason == "length", f"内容不可能完整生成, 但实际finish_reason为: {response}"
+    assert finish_reason == "length", f"内容不可能完整生成, 但实际finish_reason为: {response}"