mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00

* [feat] support clearing prefix cache (cherry-picked from release/2.1)
* [fix] fix ipc suffix, use port instead
* [fix] fix prefix caching not enabled
* [fix] fix key/value_cache_scales indent
* [fix] fix ep group all-reduce
* [fix] fix clear/update lock not working when workers > 1
* [chore] add preemption triggered info log
* [fix] fix code style
* [fix] fix max_num_seqs config
* [fix] do not force enable_prefix_caching=False in dynamic loading
* [fix] fix ci
* Revert "[fix] fix ci"
This reverts commit 0bc6d55cc8
.
* [fix] initialize available_gpu_block_num with max_gpu_block_num
* [fix] fix config splitwise_role
* [fix] fix clearing caches synchronization and add more logs
* [chore] print cache_ready_signal in log
* [fix] fix scheduler_config.splitwise_role
* [fix] fix cache_messager cache_ready_signal create=True
* [fix] stop cache messager from launching in mixed deployment
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
from .engine_cache_queue import EngineCacheQueue
|
|
from .engine_worker_queue import EngineWorkerQueue
|
|
from .ipc_signal import IPCSignal, shared_memory_exists
|
|
from .ipc_signal_const import (
|
|
ExistTaskStatus,
|
|
KVCacheStatus,
|
|
ModelWeightsStatus,
|
|
PrefixTreeStatus,
|
|
)
|
|
from .zmq_client import ZmqIpcClient
|
|
from .zmq_server import ZmqIpcServer, ZmqTcpServer
|
|
|
|
__all__ = [
|
|
"ZmqIpcClient",
|
|
"ZmqIpcServer",
|
|
"ZmqTcpServer",
|
|
"IPCSignal",
|
|
"EngineWorkerQueue",
|
|
"EngineCacheQueue",
|
|
"shared_memory_exists",
|
|
"ExistTaskStatus",
|
|
"PrefixTreeStatus",
|
|
"ModelWeightsStatus",
|
|
"KVCacheStatus",
|
|
]
|