[Feature][MTP] Support cacheKV transfer in per_chunk mode (#2890)

* support chunk_prefill both normal and speculative_decoding(mtp) * optimize pd-disaggregation config * fix bug
2025-10-05 16:48:03 +08:00 · 2025-07-17 17:58:08 +08:00
parent 67180c1ff9
commit d49f8fb30a
10 changed files with 110 additions and 27 deletions
--- a/custom_ops/gpu_ops/msg_utils.h
+++ b/custom_ops/gpu_ops/msg_utils.h
@@ -35,5 +35,5 @@ struct msgdata {

 struct msgdatakv {
    long mtype;
-    int mtext[MAX_BSZ * 2 + 2];  // encoder_count, layer_id, bid- pair
+    int mtext[MAX_BSZ * 3 + 2];  // encoder_count, layer_id, bid- pair
 };