mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Fearture] Support cache kv cache for output tokens (#4535)
* [Fearture] Support cache kv cache for output tokens * fix bug * fix ci bug * improve coverage * enable output caching by default * fix ci --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -237,6 +237,10 @@ class EngineArgs:
|
||||
"""
|
||||
Flag to enable prefix caching.
|
||||
"""
|
||||
enable_output_caching: bool = True
|
||||
"""
|
||||
Flag to enable kv cache for output tokens, only valid in V1 scheduler.
|
||||
"""
|
||||
|
||||
disable_custom_all_reduce: bool = False
|
||||
"""
|
||||
@@ -955,6 +959,13 @@ class EngineArgs:
|
||||
help="Flag to enable prefix caching.",
|
||||
)
|
||||
|
||||
perf_group.add_argument(
|
||||
"--enable-output-caching",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=EngineArgs.enable_output_caching,
|
||||
help="Flag to enable output caching.",
|
||||
)
|
||||
|
||||
perf_group.add_argument(
|
||||
"--enable-chunked-prefill",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user