[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)

* [Feature] update ep * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix queue ports idx * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * Update engine.py * fix ci * fix some bug in mixed ep * add server fix and op fix * rm some log * fix code style * ltd fix * fix * fix * fix some bug * fix bug * fix bug * fix style * Update config.py * Update splitwise_connector.py * Update cache_messager.py * Update __init__.py * merge and fix * Update engine.py * Update common_engine.py * Update run_ci_xpu.sh * Update ernie_processor.py * Update ernie_processor.py --------- Co-authored-by: ltd0924 <ltd0924@sina.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
2025-10-05 00:33:03 +08:00 · 2025-08-26 19:59:02 +08:00
parent cbce94a00e
commit 82e64b13e1
24 changed files with 1244 additions and 1200 deletions
--- a/fastdeploy/entrypoints/engine_client.py
+++ b/fastdeploy/entrypoints/engine_client.py
@@ -45,6 +45,7 @@ class EngineClient:
        max_model_len,
        tensor_parallel_size,
        pid,
+        port,
        limit_mm_per_prompt,
        mm_processor_kwargs,
        # enable_mm=False,
@@ -75,13 +76,19 @@ class EngineClient:
        self.data_processor = input_processor.create_processor()
        self.max_model_len = max_model_len
        max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
-        array_size = min(max_chips_per_node, tensor_parallel_size * data_parallel_size)
+
+        if tensor_parallel_size < max_chips_per_node:
+            self.is_master = True
+        else:
+            self.is_master = False
+
+        array_size = min(max_chips_per_node, tensor_parallel_size)
        self.worker_healthy_live_recorded_time_array = np.zeros(shape=[array_size], dtype=np.int32)
        self.worker_healthy_live_signal = IPCSignal(
            name="worker_healthy_live_signal",
            array=self.worker_healthy_live_recorded_time_array,
            dtype=np.int32,
-            suffix=pid,
+            suffix=port,
            create=False,
        )
        self.semaphore = StatefulSemaphore((FD_SUPPORT_MAX_CONNECTIONS + workers - 1) // workers)
@@ -90,7 +97,7 @@ class EngineClient:
            name="model_weights_status",
            array=model_weights_status,
            dtype=np.int32,
-            suffix=pid,
+            suffix=port,
            create=False,
        )
        self.connection_manager = DealerConnectionManager(