Files
FastDeploy/tests/ce/deploy/ernie45t_21b_cinn.yaml

9 lines
186 B
YAML

max_model_len: 32768
max_num_seqs: 128
tensor_parallel_size: 1
quantization: wint4
graph_optimization_config:
graph_opt_level: 2
sot_warmup_sizes: [2,16,32,64]
use_cudagraph: True