mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00

* [feat] support clearing prefix cache (cherry-picked from release/2.1)
* [fix] fix ipc suffix, use port instead
* [fix] fix prefix caching not enabled
* [fix] fix key/value_cache_scales indent
* [fix] fix ep group all-reduce
* [fix] fix clear/update lock not working when workers > 1
* [chore] add preemption triggered info log
* [fix] fix code style
* [fix] fix max_num_seqs config
* [fix] do not force enable_prefix_caching=False in dynamic loading
* [fix] fix ci
* Revert "[fix] fix ci"
This reverts commit 0bc6d55cc8
.
* [fix] initialize available_gpu_block_num with max_gpu_block_num
* [fix] fix config splitwise_role
* [fix] fix clearing caches synchronization and add more logs
* [chore] print cache_ready_signal in log
* [fix] fix scheduler_config.splitwise_role
* [fix] fix cache_messager cache_ready_signal create=True
* [fix] stop cache messager from launching in mixed deployment
33 lines
424 B
Python
33 lines
424 B
Python
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class ModelWeightsStatus:
|
|
NORMAL = 0
|
|
UPDATING = 1
|
|
CLEARING = -1
|
|
CLEARED = -2
|
|
|
|
|
|
@dataclass
|
|
class PrefixTreeStatus:
|
|
NORMAL = 0
|
|
UPDATING = 1
|
|
CLEARING = -1
|
|
CLEARED = -2
|
|
|
|
|
|
@dataclass
|
|
class KVCacheStatus:
|
|
NORMAL = 0
|
|
UPDATING = 1
|
|
CLEARING = -1
|
|
CLEARED = -2
|
|
|
|
|
|
@dataclass
|
|
class ExistTaskStatus:
|
|
EMPTY = 0
|
|
EXIST = 1
|
|
REFUSE = 2
|