mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
[CE]add x1 w4a8c8 benchamrk config (#3607)
* [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config
This commit is contained in:
8
benchmarks/yaml/request_yaml/x1.yaml
Normal file
8
benchmarks/yaml/request_yaml/x1.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
top_p: 0.95
|
||||
temperature: 0.6
|
||||
metadata:
|
||||
min_tokens: 1
|
||||
max_tokens: 65535
|
||||
repetition_penalty: 1.0
|
||||
frequency_penalty: 0
|
||||
presence_penalty: 0
|
10
benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
Normal file
10
benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
reasoning-parser: ernie_x1
|
||||
tool_call_parser: ernie_x1
|
||||
tensor_parallel_size: 4
|
||||
max_model_len: 65536
|
||||
max_num_seqs: 128
|
||||
enable_prefix_caching: True
|
||||
enable_chunked_prefill: True
|
||||
gpu_memory_utilization: 0.85
|
||||
use_cudagraph: True
|
||||
enable_custom_all_reduce: True
|
Reference in New Issue
Block a user