mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[CE]add x1 w4a8c8 benchamrk config (#3607)
* [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config
This commit is contained in:
8
benchmarks/yaml/request_yaml/x1.yaml
Normal file
8
benchmarks/yaml/request_yaml/x1.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
top_p: 0.95
|
||||||
|
temperature: 0.6
|
||||||
|
metadata:
|
||||||
|
min_tokens: 1
|
||||||
|
max_tokens: 65535
|
||||||
|
repetition_penalty: 1.0
|
||||||
|
frequency_penalty: 0
|
||||||
|
presence_penalty: 0
|
||||||
10
benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
Normal file
10
benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
reasoning-parser: ernie_x1
|
||||||
|
tool_call_parser: ernie_x1
|
||||||
|
tensor_parallel_size: 4
|
||||||
|
max_model_len: 65536
|
||||||
|
max_num_seqs: 128
|
||||||
|
enable_prefix_caching: True
|
||||||
|
enable_chunked_prefill: True
|
||||||
|
gpu_memory_utilization: 0.85
|
||||||
|
use_cudagraph: True
|
||||||
|
enable_custom_all_reduce: True
|
||||||
Reference in New Issue
Block a user