mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Launch expert_service before kv_cache initialization in worker_process (#3045)
* launch expert_service before kv_cache initialization * add two signal make sure model loading and expert_service lauching finished * fix the EP bug * fix ep * update launching way * fix ep * update * roback ep * pre-commit all files --------- Co-authored-by: RAM <gstian5555@outlook.com> Co-authored-by: Divano <dddivano@outlook.com>
This commit is contained in:
@@ -431,7 +431,19 @@ class PaddleDisWorkerProc:
|
||||
|
||||
def load_model(self) -> None:
|
||||
"""Load weights and create model"""
|
||||
|
||||
self.worker.load_model()
|
||||
loaded_model_signal_data = np.zeros(shape=[1], dtype=np.int32)
|
||||
self.loaded_model_signal = IPCSignal(
|
||||
name="loaded_model_signal",
|
||||
array=loaded_model_signal_data,
|
||||
dtype=np.int32,
|
||||
suffix=self.parallel_config.engine_pid,
|
||||
create=False,
|
||||
)
|
||||
if self.ranks > 1:
|
||||
paddle.distributed.barrier()
|
||||
self.loaded_model_signal.value[0] = 1
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
Reference in New Issue
Block a user