[Fearture] Support mm model close prefix cache (#4502)

* support mm prefix cache close * add * fix * fix * fix --------- Co-authored-by: ltd0924 <luotingdan@baidu.com>
2025-10-30 03:22:05 +08:00 · 2025-10-21 09:56:47 +08:00
parent 9558912475
commit 3cd9d3060a
3 changed files with 41 additions and 0 deletions
--- a/fastdeploy/entrypoints/openai/api_server.py
+++ b/fastdeploy/entrypoints/openai/api_server.py
@@ -170,6 +170,7 @@ async def lifespan(app: FastAPI):
        enable_logprob=args.enable_logprob,
        workers=args.workers,
        tool_parser=args.tool_call_parser,
+        enable_prefix_caching=args.enable_prefix_caching,
    )
    await engine_client.connection_manager.initialize()
    app.state.dynamic_load_weight = args.dynamic_load_weight