[Feature] Set prefix caching as default (#3814)

* Set prefix caching as default * Set prefix caching as default * Set prefix caching as default * skip dynamic load scene * fix kill bug * fix kill bug * fix kill bug * fix * fix * fix ci
2025-10-25 01:20:43 +08:00 · 2025-09-16 20:34:27 +08:00
parent de8638b1e9
commit 67e6d8c691
5 changed files with 23 additions and 8 deletions
--- a/tests/model_loader/test_w4a8_model.py
+++ b/tests/model_loader/test_w4a8_model.py
@@ -29,6 +29,7 @@ FD_ENGINE_QUEUE_PORTS = [
    [9991, 9992],
 ]
 FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
+FD_CACHE_QUEUE_PORTS = [FD_CACHE_QUEUE_PORT, FD_CACHE_QUEUE_PORT + 1, FD_CACHE_QUEUE_PORT + 2, FD_CACHE_QUEUE_PORT + 3]


 models = [
@@ -54,7 +55,7 @@ def llm(request):
            max_model_len=8192,
            num_gpu_blocks_override=1024,
            engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
-            cache_queue_port=FD_CACHE_QUEUE_PORT,
+            cache_queue_port=FD_CACHE_QUEUE_PORTS[port_index],
            load_choices="default",
            enable_expert_parallel=True,
        )