mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] support eplb in api_server (#4782)
* support eplb in api_server * update code * add eplb test case * update eplb * support tp+dp eplb * update test cese * update code * update code * fix bug * update copilot review * update test case name
This commit is contained in:
@@ -22,6 +22,7 @@ from .ipc_signal_const import (
|
||||
KVCacheStatus,
|
||||
ModelWeightsStatus,
|
||||
PrefixTreeStatus,
|
||||
RearrangeExpertStatus,
|
||||
)
|
||||
from .zmq_client import ZmqIpcClient
|
||||
from .zmq_server import ZmqIpcServer, ZmqTcpServer
|
||||
@@ -38,4 +39,5 @@ __all__ = [
|
||||
"PrefixTreeStatus",
|
||||
"ModelWeightsStatus",
|
||||
"KVCacheStatus",
|
||||
"RearrangeExpertStatus",
|
||||
]
|
||||
|
||||
@@ -55,10 +55,11 @@ class IPCSignal:
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
array: np.ndarray,
|
||||
dtype: np.dtype,
|
||||
array: np.ndarray = None,
|
||||
dtype: np.dtype = None,
|
||||
suffix: int = None,
|
||||
create: bool = True,
|
||||
shm_size: int = None,
|
||||
) -> None:
|
||||
"""Initialize or connect to a shared memory block.
|
||||
|
||||
@@ -68,29 +69,45 @@ class IPCSignal:
|
||||
dtype: Data type of the array (must match array.dtype).
|
||||
suffix: Suffix number that will be appended to the name.
|
||||
create: If True, creates new memory block; otherwise connects to existing.
|
||||
shm_size: Size of the shared memory block in bytes.
|
||||
|
||||
Raises:
|
||||
AssertionError: If create=True but memory already exists, or dtype mismatch.
|
||||
"""
|
||||
assert isinstance(array, np.ndarray), "Input must be a numpy array"
|
||||
assert dtype == array.dtype, "Specified dtype must match array dtype"
|
||||
|
||||
# Set a suffix for name to avoid name conflict while there are multiple engine launched
|
||||
if suffix is not None:
|
||||
name = name + f".{suffix}"
|
||||
|
||||
if create:
|
||||
llm_logger.debug(f"creating ipc signal: {name}")
|
||||
if shared_memory_exists(name):
|
||||
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
|
||||
SharedMemory(name=name, create=False).unlink()
|
||||
self.shm = SharedMemory(create=True, size=array.nbytes, name=name)
|
||||
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
|
||||
self.value[:] = array # Initialize with input array data
|
||||
if dtype is None or array is None:
|
||||
assert shm_size is not None, "shm_size must be specified if array and dtype are None"
|
||||
|
||||
if create:
|
||||
llm_logger.debug(f"creating ipc signal: {name}")
|
||||
if shared_memory_exists(name):
|
||||
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
|
||||
SharedMemory(name=name, create=False).unlink()
|
||||
self.shm = SharedMemory(create=True, size=shm_size, name=name)
|
||||
self.value = None
|
||||
else:
|
||||
llm_logger.debug(f"attaching ipc signal: {name}")
|
||||
self.shm = SharedMemory(name=name)
|
||||
self.value = None
|
||||
else:
|
||||
llm_logger.debug(f"attaching ipc signal: {name}")
|
||||
self.shm = SharedMemory(name=name)
|
||||
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
|
||||
assert isinstance(array, np.ndarray), "Input must be a numpy array"
|
||||
assert dtype == array.dtype, "Specified dtype must match array dtype"
|
||||
|
||||
if create:
|
||||
llm_logger.debug(f"creating ipc signal: {name}")
|
||||
if shared_memory_exists(name):
|
||||
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
|
||||
SharedMemory(name=name, create=False).unlink()
|
||||
self.shm = SharedMemory(create=True, size=array.nbytes, name=name)
|
||||
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
|
||||
self.value[:] = array # Initialize with input array data
|
||||
else:
|
||||
llm_logger.debug(f"attaching ipc signal: {name}")
|
||||
self.shm = SharedMemory(name=name)
|
||||
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Release system resources and unlink the shared memory block."""
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -30,3 +47,10 @@ class ExistTaskStatus:
|
||||
EMPTY = 0
|
||||
EXIST = 1
|
||||
REFUSE = 2
|
||||
|
||||
|
||||
class RearrangeExpertStatus(Enum):
|
||||
FREE = 0
|
||||
DOING = 1
|
||||
LOAD_SUCC = 2 # load weight from disk success
|
||||
DONE = 3
|
||||
|
||||
Reference in New Issue
Block a user