Files
FastDeploy/tests/ce/deploy/21b_sot.yaml
2025-08-26 11:25:04 +08:00

9 lines
184 B
YAML

max_model_len: 32768
max_num_seqs: 128
tensor_parallel_size: 1
quantization: wint4
use_cudagraph: True
graph_optimization_config:
graph_opt_level: 1
sot_warmup_sizes: [2,16,32,64]