mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
temporary change mtp logprob msg size (#5026)
Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: gaoziyuan <88373061+gzy19990617@users.noreply.github.com>
This commit is contained in:
@@ -23,9 +23,9 @@
|
||||
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
|
||||
#endif
|
||||
|
||||
#define MAX_BSZ 512
|
||||
#define MAX_BSZ 256
|
||||
#define K 20
|
||||
#define MAX_DRAFT_TOKEN_NUM 6
|
||||
#define MAX_DRAFT_TOKEN_NUM 2
|
||||
|
||||
struct batch_msgdata {
|
||||
int tokens[MAX_DRAFT_TOKEN_NUM * (K + 1)];
|
||||
|
||||
@@ -23,9 +23,9 @@
|
||||
#define PD_BUILD_STATIC_OP(name) PD_BUILD_OP(static_op_##name)
|
||||
#endif
|
||||
|
||||
#define MAX_BSZ 512
|
||||
#define MAX_BSZ 256
|
||||
#define K 20
|
||||
#define MAX_DRAFT_TOKEN_NUM 6
|
||||
#define MAX_DRAFT_TOKEN_NUM 2
|
||||
|
||||
struct batch_msgdata {
|
||||
int tokens[MAX_DRAFT_TOKEN_NUM * (K + 1)];
|
||||
@@ -156,7 +156,8 @@ void SpeculateSaveOutMmsgTopK(const paddle::Tensor& sampled_token_ids,
|
||||
}
|
||||
}
|
||||
#ifdef SPECULATE_SAVE_WITH_OUTPUT_DEBUG
|
||||
std::cout << "msg data: " << std::endl;
|
||||
std::cout << "msg data (size: " << sizeof(msg_sed) - sizeof(long)
|
||||
<< "): " << std::endl;
|
||||
std::cout << "stop_flag: " << msg_sed.meta[0]
|
||||
<< ", message_flag: " << msg_sed.meta[1]
|
||||
<< ", bsz: " << msg_sed.meta[2] << std::endl;
|
||||
|
||||
@@ -35,9 +35,9 @@ from fastdeploy.utils import llm_logger, spec_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
RECOVERY_STOP_SIGNAL = -3
|
||||
MAX_BSZ = 512
|
||||
MAX_BSZ = 256
|
||||
K = 20
|
||||
MAX_DRAFT_TOKENS = 6
|
||||
MAX_DRAFT_TOKENS = 2
|
||||
SPECULATE_MAX_BSZ = 256
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user