[PD Disaggregation] support DP via v1 router and decouple DP and EP (#5197)

* [fix] support DP via v1 router and decouple DP and EP * [fix] fix scripts * [fix] reset model path * [fix] dp use get_output_ep, fix router port type, update scripts * [merge] merge with latest code * [chore] remove some debug log * [fix] fix code style check * [fix] fix test_multi_api_server for log_dir name * [chore] reduce logs * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-12-04 15:38:43 +08:00
parent 5cd17fd662
commit f4119d51b4
15 changed files with 394 additions and 146 deletions
--- a/custom_ops/gpu_ops/get_output.cc
+++ b/custom_ops/gpu_ops/get_output.cc
@@ -26,71 +26,70 @@
 #define MAX_BSZ 512
 // #define GET_OUTPUT_DEBUG
 struct msgdata {
-    long mtype;
-    int mtext[MAX_BSZ + 2];  // stop_flag, bsz, tokens
+  long mtype;
+  int mtext[MAX_BSZ + 2];  // stop_flag, bsz, tokens
 };

 void GetOutput(const paddle::Tensor& x,
               int64_t rank_id,
               bool wait_flag,
               int msg_queue_id) {
-    if (rank_id > 0) {
-        return;
-    }
-    static struct msgdata msg_rcv;
-    if (const char* inference_msg_queue_id_env_p =
-            std::getenv("INFERENCE_MSG_QUEUE_ID")) {
-        std::string inference_msg_queue_id_env_str(
-            inference_msg_queue_id_env_p);
-        int inference_msg_queue_id_from_env =
-            std::stoi(inference_msg_queue_id_env_str);
-#ifdef GET_OUTPUT_DEBUG
-        std::cout << "Your INFERENCE_MSG_QUEUE_ID is: "
-                  << inference_msg_queue_id_from_env << std::endl;
-#endif
-        msg_queue_id = inference_msg_queue_id_from_env;
-    }
-    static key_t key = ftok("/dev/shm", msg_queue_id);
-    static int msgid = msgget(key, IPC_CREAT | 0666);
-
-#ifdef GET_OUTPUT_DEBUG
-    std::cout << "get_output_key: " << key << std::endl;
-    std::cout << "get_output msgid: " << msgid << std::endl;
-#endif
-
-    int64_t* out_data = const_cast<int64_t*>(x.data<int64_t>());
-    int ret = -1;
-    if (!wait_flag) {
-        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, IPC_NOWAIT);
-    } else {
-        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, 0);
-    }
-    if (ret == -1) {
-        out_data[0] = -2;
-        out_data[1] = 0;
-        return;
-    }
-    int bsz = msg_rcv.mtext[1];
-
-    for (int64_t i = 0; i < bsz + 2; i++) {
-        out_data[i] = (int64_t)msg_rcv.mtext[i];
-    }
-#ifdef GET_OUTPUT_DEBUG
-    std::cout << "get_output finished: " << msgid << std::endl;
-#endif
-
+  if (rank_id > 0) {
    return;
+  }
+  static struct msgdata msg_rcv;
+  if (const char* inference_msg_queue_id_env_p =
+          std::getenv("INFERENCE_MSG_QUEUE_ID")) {
+    std::string inference_msg_queue_id_env_str(inference_msg_queue_id_env_p);
+    int inference_msg_queue_id_from_env =
+        std::stoi(inference_msg_queue_id_env_str);
+#ifdef GET_OUTPUT_DEBUG
+    std::cout << "Your INFERENCE_MSG_QUEUE_ID is: "
+              << inference_msg_queue_id_from_env << std::endl;
+#endif
+    msg_queue_id = inference_msg_queue_id_from_env;
+  }
+  static key_t key = ftok("/dev/shm", msg_queue_id);
+  static int msgid = msgget(key, IPC_CREAT | 0666);
+
+#ifdef GET_OUTPUT_DEBUG
+  std::cout << "get_output_key: " << key << std::endl;
+  std::cout << "get_output msgid: " << msgid << std::endl;
+#endif
+
+  int64_t* out_data = const_cast<int64_t*>(x.data<int64_t>());
+  int ret = -1;
+  if (!wait_flag) {
+    ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, IPC_NOWAIT);
+  } else {
+    ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ + 2) * 4, 0, 0);
+  }
+  if (ret == -1) {
+    out_data[0] = -2;
+    out_data[1] = 0;
+    return;
+  }
+  int bsz = msg_rcv.mtext[1];
+
+  for (int64_t i = 0; i < bsz + 2; i++) {
+    out_data[i] = (int64_t)msg_rcv.mtext[i];
+  }
+#ifdef GET_OUTPUT_DEBUG
+  std::cout << "get_output finished: " << msgid << std::endl;
+#endif
+
+  return;
 }

 void GetOutputStatic(const paddle::Tensor& x, int64_t rank_id, bool wait_flag) {
-    GetOutput(x, rank_id, wait_flag, 1);
+  GetOutput(x, rank_id, wait_flag, 1);
 }

 void GetOutputDynamic(const paddle::Tensor& x,
                      int64_t rank_id,
                      bool wait_flag,
                      int msg_queue_id) {
-    GetOutput(x, rank_id, wait_flag, msg_queue_id);
+  GetOutput(x, rank_id, wait_flag, msg_queue_id);
 }

 PD_BUILD_STATIC_OP(get_output)