Files
FastDeploy/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml
2025-09-16 15:55:12 +08:00

7 lines
266 B
YAML

tensor_parallel_size: 1
max_model_len: 131072
max_num_seqs: 32
quantization: wint4
max_num_batched_tokens: 8192
plas_attention_config: '{"plas_encoder_top_k_left": 50, "plas_encoder_top_k_right": 60, "plas_decoder_top_k_left": 100, "plas_decoder_top_k_right": 120}'