mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Doc]add plas attention config (#4128)
This commit is contained in:
6
benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml
Normal file
6
benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
tensor_parallel_size: 1
|
||||||
|
max_model_len: 131072
|
||||||
|
max_num_seqs: 32
|
||||||
|
quantization: wint4
|
||||||
|
max_num_batched_tokens: 8192
|
||||||
|
plas_attention_config: '{"plas_encoder_top_k_left": 50, "plas_encoder_top_k_right": 60, "plas_decoder_top_k_left": 100, "plas_decoder_top_k_right": 120}'
|
||||||
Reference in New Issue
Block a user