diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_msg.h b/custom_ops/gpu_ops/speculate_decoding/speculate_msg.h index dbc177b4c..6279027c8 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_msg.h +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_msg.h @@ -22,7 +22,7 @@ #include "paddle/extension.h" #define MAX_BSZ 256 -#define MAX_DRAFT_TOKENS 6 +#define MAX_DRAFT_TOKENS 2 // TODO: replace all msgdata in speculate-decoding struct speculate_msgdata {