[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)

* [Feature] update ep

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix queue ports idx

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* Update engine.py

* fix ci

* fix some bug in mixed ep

* add server fix and op fix

* rm some log

* fix code style

* ltd fix

* fix

* fix

* fix some bug

* fix bug

* fix bug

* fix style

* Update config.py

* Update splitwise_connector.py

* Update cache_messager.py

* Update __init__.py

* merge and fix

* Update engine.py

* Update common_engine.py

* Update run_ci_xpu.sh

* Update ernie_processor.py

* Update ernie_processor.py

---------

Co-authored-by: ltd0924 <ltd0924@sina.com>
Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
gaoziyuan
2025-08-26 19:59:02 +08:00
committed by GitHub
parent cbce94a00e
commit 82e64b13e1
24 changed files with 1244 additions and 1200 deletions

View File

@@ -45,6 +45,7 @@ class EngineClient:
max_model_len,
tensor_parallel_size,
pid,
port,
limit_mm_per_prompt,
mm_processor_kwargs,
# enable_mm=False,
@@ -75,13 +76,19 @@ class EngineClient:
self.data_processor = input_processor.create_processor()
self.max_model_len = max_model_len
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
array_size = min(max_chips_per_node, tensor_parallel_size * data_parallel_size)
if tensor_parallel_size < max_chips_per_node:
self.is_master = True
else:
self.is_master = False
array_size = min(max_chips_per_node, tensor_parallel_size)
self.worker_healthy_live_recorded_time_array = np.zeros(shape=[array_size], dtype=np.int32)
self.worker_healthy_live_signal = IPCSignal(
name="worker_healthy_live_signal",
array=self.worker_healthy_live_recorded_time_array,
dtype=np.int32,
suffix=pid,
suffix=port,
create=False,
)
self.semaphore = StatefulSemaphore((FD_SUPPORT_MAX_CONNECTIONS + workers - 1) // workers)
@@ -90,7 +97,7 @@ class EngineClient:
name="model_weights_status",
array=model_weights_status,
dtype=np.int32,
suffix=pid,
suffix=port,
create=False,
)
self.connection_manager = DealerConnectionManager(