[Feature][MTP]Support new mtp (#3656)

* update multi-draft-token strategy

* fix format

* support hybrid mtp with ngram speculative decoding method
This commit is contained in:
freeliuzc
2025-08-27 19:38:26 +08:00
committed by GitHub
parent 62659a7a73
commit c753f1fc9e
20 changed files with 501 additions and 579 deletions

View File

@@ -23,14 +23,7 @@
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
#endif
#define MAX_BSZ 256
#define MAX_DRAFT_TOKENS 6
struct msgdata {
int64_t mtype;
int mtext[MAX_BSZ * MAX_DRAFT_TOKENS + MAX_BSZ +
2]; // stop_flag, bsz, accept_num*bsz, tokens...
};
#include "speculate_msg.h"
void SpeculateGetOutput(const paddle::Tensor& x,
int64_t rank_id,
@@ -54,7 +47,7 @@ void SpeculateGetOutput(const paddle::Tensor& x,
msg_queue_id = inference_msg_queue_id_from_env;
}
static struct msgdata msg_rcv;
static struct speculate_msgdata msg_rcv;
static key_t key = ftok("./", msg_queue_id);