mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Support limit thinking lengths (#4069)
Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
@@ -587,6 +587,7 @@ def parse_args():
|
||||
help="enable expert parallel",
|
||||
)
|
||||
parser.add_argument("--ori_vocab_size", type=int, default=None)
|
||||
parser.add_argument("--think_end_id", type=int, default=-1)
|
||||
|
||||
parser.add_argument(
|
||||
"--quantization",
|
||||
|
Reference in New Issue
Block a user