[Feature] Set prefix caching as default (#3814)

* Set prefix caching as default

* Set prefix caching as default

* Set prefix caching as default

* skip dynamic load scene

* fix kill bug

* fix kill bug

* fix kill bug

* fix

* fix

* fix ci
This commit is contained in:
chenjian
2025-09-16 20:34:27 +08:00
committed by GitHub
parent de8638b1e9
commit 67e6d8c691
5 changed files with 23 additions and 8 deletions

View File

@@ -29,6 +29,7 @@ FD_ENGINE_QUEUE_PORTS = [
[9991, 9992],
]
FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
FD_CACHE_QUEUE_PORTS = [FD_CACHE_QUEUE_PORT, FD_CACHE_QUEUE_PORT + 1, FD_CACHE_QUEUE_PORT + 2, FD_CACHE_QUEUE_PORT + 3]
models = [
@@ -54,7 +55,7 @@ def llm(request):
max_model_len=8192,
num_gpu_blocks_override=1024,
engine_worker_queue_port=FD_ENGINE_QUEUE_PORTS[port_index],
cache_queue_port=FD_CACHE_QUEUE_PORT,
cache_queue_port=FD_CACHE_QUEUE_PORTS[port_index],
load_choices="default",
enable_expert_parallel=True,
)