[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)

This commit is contained in:
freeliuzc
2025-08-26 14:29:22 +08:00
committed by GitHub
parent 0a0d2959b9
commit 52eda7fdb3
20 changed files with 454 additions and 571 deletions

View File

@@ -23,14 +23,7 @@
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
#endif
#define MAX_BSZ 256
#define MAX_DRAFT_TOKENS 6
struct msgdata {
int64_t mtype;
int mtext[MAX_BSZ * MAX_DRAFT_TOKENS + MAX_BSZ +
2]; // stop_flag, bsz, accept_num*bsz, tokens...
};
#include "speculate_msg.h"
void SpeculateGetOutput(const paddle::Tensor& x,
int64_t rank_id,
@@ -54,7 +47,7 @@ void SpeculateGetOutput(const paddle::Tensor& x,
msg_queue_id = inference_msg_queue_id_from_env;
}
static struct msgdata msg_rcv;
static struct speculate_msgdata msg_rcv;
static key_t key = ftok("./", msg_queue_id);