[Feature] support eplb in api_server (#4782)

* support eplb in api_server

* update code

* add eplb test case

* update eplb

* support tp+dp eplb

* update test cese

* update code

* update code

* fix bug

* update copilot review

* update test case name
This commit is contained in:
kevin
2025-11-24 20:22:29 +08:00
committed by GitHub
parent d5bd64336a
commit 8e4e3ff510
25 changed files with 2102 additions and 421 deletions

View File

@@ -22,6 +22,7 @@ from .ipc_signal_const import (
KVCacheStatus,
ModelWeightsStatus,
PrefixTreeStatus,
RearrangeExpertStatus,
)
from .zmq_client import ZmqIpcClient
from .zmq_server import ZmqIpcServer, ZmqTcpServer
@@ -38,4 +39,5 @@ __all__ = [
"PrefixTreeStatus",
"ModelWeightsStatus",
"KVCacheStatus",
"RearrangeExpertStatus",
]

View File

@@ -55,10 +55,11 @@ class IPCSignal:
def __init__(
self,
name: str,
array: np.ndarray,
dtype: np.dtype,
array: np.ndarray = None,
dtype: np.dtype = None,
suffix: int = None,
create: bool = True,
shm_size: int = None,
) -> None:
"""Initialize or connect to a shared memory block.
@@ -68,29 +69,45 @@ class IPCSignal:
dtype: Data type of the array (must match array.dtype).
suffix: Suffix number that will be appended to the name.
create: If True, creates new memory block; otherwise connects to existing.
shm_size: Size of the shared memory block in bytes.
Raises:
AssertionError: If create=True but memory already exists, or dtype mismatch.
"""
assert isinstance(array, np.ndarray), "Input must be a numpy array"
assert dtype == array.dtype, "Specified dtype must match array dtype"
# Set a suffix for name to avoid name conflict while there are multiple engine launched
if suffix is not None:
name = name + f".{suffix}"
if create:
llm_logger.debug(f"creating ipc signal: {name}")
if shared_memory_exists(name):
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
SharedMemory(name=name, create=False).unlink()
self.shm = SharedMemory(create=True, size=array.nbytes, name=name)
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
self.value[:] = array # Initialize with input array data
if dtype is None or array is None:
assert shm_size is not None, "shm_size must be specified if array and dtype are None"
if create:
llm_logger.debug(f"creating ipc signal: {name}")
if shared_memory_exists(name):
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
SharedMemory(name=name, create=False).unlink()
self.shm = SharedMemory(create=True, size=shm_size, name=name)
self.value = None
else:
llm_logger.debug(f"attaching ipc signal: {name}")
self.shm = SharedMemory(name=name)
self.value = None
else:
llm_logger.debug(f"attaching ipc signal: {name}")
self.shm = SharedMemory(name=name)
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
assert isinstance(array, np.ndarray), "Input must be a numpy array"
assert dtype == array.dtype, "Specified dtype must match array dtype"
if create:
llm_logger.debug(f"creating ipc signal: {name}")
if shared_memory_exists(name):
llm_logger.warning(f"ShareMemory: {name} already exists, delete it")
SharedMemory(name=name, create=False).unlink()
self.shm = SharedMemory(create=True, size=array.nbytes, name=name)
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
self.value[:] = array # Initialize with input array data
else:
llm_logger.debug(f"attaching ipc signal: {name}")
self.shm = SharedMemory(name=name)
self.value: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=self.shm.buf)
def clear(self) -> None:
"""Release system resources and unlink the shared memory block."""

View File

@@ -1,4 +1,21 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from dataclasses import dataclass
from enum import Enum
@dataclass
@@ -30,3 +47,10 @@ class ExistTaskStatus:
EMPTY = 0
EXIST = 1
REFUSE = 2
class RearrangeExpertStatus(Enum):
FREE = 0
DOING = 1
LOAD_SUCC = 2 # load weight from disk success
DONE = 3