mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Optimize] Support and robust for tpN for PD (#4595)
* [Optimize] Support and robust for tpN for PD * fix * fix * support dpM tpN for cache messager * fix * fix token counter * fix bug for merge develop * fix bug * robust cache messager for v0
This commit is contained in:
@@ -177,7 +177,7 @@ class PaddleDisWorkerProc:
|
||||
self.max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
|
||||
if self.parallel_config.data_parallel_size > 1 and not envs.FD_ENABLE_MULTI_API_SERVER:
|
||||
launched_expert_service_signal_data = np.zeros(
|
||||
shape=[min(self.parallel_config.data_parallel_size, self.max_chips_per_node)], dtype=np.int32
|
||||
shape=[self.parallel_config.data_parallel_size // self.fd_config.nnode], dtype=np.int32
|
||||
)
|
||||
self.launched_expert_service_signal = IPCSignal(
|
||||
name="launched_expert_service_signal",
|
||||
@@ -186,7 +186,12 @@ class PaddleDisWorkerProc:
|
||||
suffix=self.parallel_config.engine_pid,
|
||||
create=False,
|
||||
)
|
||||
while self.launched_expert_service_signal.value[self.local_rank % self.max_chips_per_node] == 0:
|
||||
while (
|
||||
self.launched_expert_service_signal.value[
|
||||
self.parallel_config.local_data_parallel_id % self.max_chips_per_node
|
||||
]
|
||||
== 0
|
||||
):
|
||||
pass
|
||||
|
||||
# init worker_ready_signal
|
||||
@@ -568,7 +573,7 @@ class PaddleDisWorkerProc:
|
||||
is_server=False,
|
||||
num_client=self.parallel_config.tensor_parallel_size,
|
||||
client_id=self.parallel_config.tensor_parallel_rank,
|
||||
local_data_parallel_id=self.parallel_config.data_parallel_rank,
|
||||
local_data_parallel_id=self.parallel_config.local_data_parallel_id,
|
||||
)
|
||||
|
||||
def load_model(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user