Launch expert_service before kv_cache initialization in worker_process (#3045)

* launch expert_service before kv_cache initialization * add two signal make sure model loading and expert_service lauching finished * fix the EP bug * fix ep * update launching way * fix ep * update * roback ep * pre-commit all files --------- Co-authored-by: RAM <gstian5555@outlook.com> Co-authored-by: Divano <dddivano@outlook.com>
2025-10-05 08:37:06 +08:00 · 2025-08-11 19:38:46 +08:00
parent c27a3dc43b
commit b23af29d0b
6 changed files with 175 additions and 100 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -431,7 +431,19 @@ class PaddleDisWorkerProc:

    def load_model(self) -> None:
        """Load weights and create model"""
+
        self.worker.load_model()
+        loaded_model_signal_data = np.zeros(shape=[1], dtype=np.int32)
+        self.loaded_model_signal = IPCSignal(
+            name="loaded_model_signal",
+            array=loaded_model_signal_data,
+            dtype=np.int32,
+            suffix=self.parallel_config.engine_pid,
+            create=False,
+        )
+        if self.ranks > 1:
+            paddle.distributed.barrier()
+        self.loaded_model_signal.value[0] = 1


 def parse_args():