mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
7 lines
266 B
YAML
7 lines
266 B
YAML
tensor_parallel_size: 1
|
|
max_model_len: 131072
|
|
max_num_seqs: 32
|
|
quantization: wint4
|
|
max_num_batched_tokens: 8192
|
|
plas_attention_config: '{"plas_encoder_top_k_left": 50, "plas_encoder_top_k_right": 60, "plas_decoder_top_k_left": 100, "plas_decoder_top_k_right": 120}'
|