mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)
* [Feature] update ep * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix queue ports idx * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * Update engine.py * fix ci * fix some bug in mixed ep * add server fix and op fix * rm some log * fix code style * ltd fix * fix * fix * fix some bug * fix bug * fix bug * fix style * Update config.py * Update splitwise_connector.py * Update cache_messager.py * Update __init__.py * merge and fix * Update engine.py * Update common_engine.py * Update run_ci_xpu.sh * Update ernie_processor.py * Update ernie_processor.py --------- Co-authored-by: ltd0924 <ltd0924@sina.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
@@ -45,6 +45,7 @@ class EngineClient:
|
||||
max_model_len,
|
||||
tensor_parallel_size,
|
||||
pid,
|
||||
port,
|
||||
limit_mm_per_prompt,
|
||||
mm_processor_kwargs,
|
||||
# enable_mm=False,
|
||||
@@ -75,13 +76,19 @@ class EngineClient:
|
||||
self.data_processor = input_processor.create_processor()
|
||||
self.max_model_len = max_model_len
|
||||
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
|
||||
array_size = min(max_chips_per_node, tensor_parallel_size * data_parallel_size)
|
||||
|
||||
if tensor_parallel_size < max_chips_per_node:
|
||||
self.is_master = True
|
||||
else:
|
||||
self.is_master = False
|
||||
|
||||
array_size = min(max_chips_per_node, tensor_parallel_size)
|
||||
self.worker_healthy_live_recorded_time_array = np.zeros(shape=[array_size], dtype=np.int32)
|
||||
self.worker_healthy_live_signal = IPCSignal(
|
||||
name="worker_healthy_live_signal",
|
||||
array=self.worker_healthy_live_recorded_time_array,
|
||||
dtype=np.int32,
|
||||
suffix=pid,
|
||||
suffix=port,
|
||||
create=False,
|
||||
)
|
||||
self.semaphore = StatefulSemaphore((FD_SUPPORT_MAX_CONNECTIONS + workers - 1) // workers)
|
||||
@@ -90,7 +97,7 @@ class EngineClient:
|
||||
name="model_weights_status",
|
||||
array=model_weights_status,
|
||||
dtype=np.int32,
|
||||
suffix=pid,
|
||||
suffix=port,
|
||||
create=False,
|
||||
)
|
||||
self.connection_manager = DealerConnectionManager(
|
||||
|
Reference in New Issue
Block a user