temporary change mtp logprob msg size (#5026)

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
Co-authored-by: gaoziyuan <88373061+gzy19990617@users.noreply.github.com>
This commit is contained in:
GoldPancake
2025-11-15 13:39:40 +08:00
committed by GitHub
parent 936a80962f
commit cbcb5c6e84
3 changed files with 8 additions and 7 deletions

View File

@@ -23,9 +23,9 @@
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
#endif
#define MAX_BSZ 512
#define MAX_BSZ 256
#define K 20
#define MAX_DRAFT_TOKEN_NUM 6
#define MAX_DRAFT_TOKEN_NUM 2
struct batch_msgdata {
int tokens[MAX_DRAFT_TOKEN_NUM * (K + 1)];

View File

@@ -23,9 +23,9 @@
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
#endif
#define MAX_BSZ 512
#define MAX_BSZ 256
#define K 20
#define MAX_DRAFT_TOKEN_NUM 6
#define MAX_DRAFT_TOKEN_NUM 2
struct batch_msgdata {
int tokens[MAX_DRAFT_TOKEN_NUM * (K + 1)];
@@ -156,7 +156,8 @@ void SpeculateSaveOutMmsgTopK(const paddle::Tensor& sampled_token_ids,
}
}
#ifdef SPECULATE_SAVE_WITH_OUTPUT_DEBUG
std::cout << "msg data: " << std::endl;
std::cout << "msg data (size: " << sizeof(msg_sed) - sizeof(long)
<< "): " << std::endl;
std::cout << "stop_flag: " << msg_sed.meta[0]
<< ", message_flag: " << msg_sed.meta[1]
<< ", bsz: " << msg_sed.meta[2] << std::endl;

View File

@@ -35,9 +35,9 @@ from fastdeploy.utils import llm_logger, spec_logger
from fastdeploy.worker.output import LogprobsLists
RECOVERY_STOP_SIGNAL = -3
MAX_BSZ = 512
MAX_BSZ = 256
K = 20
MAX_DRAFT_TOKENS = 6
MAX_DRAFT_TOKENS = 2
SPECULATE_MAX_BSZ = 256