[CE]add x1 w4a8c8 benchamrk config (#3607)

* [CE]add x1 w4a8c8 benchamrk config

* [CE]add x1 w4a8c8 benchamrk config

* [CE]add x1 w4a8c8 benchamrk config
This commit is contained in:
tianlef
2025-08-26 11:27:32 +08:00
committed by GitHub
parent a5b4866ff1
commit 0bc7d076fc
2 changed files with 18 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
top_p: 0.95
temperature: 0.6
metadata:
min_tokens: 1
max_tokens: 65535
repetition_penalty: 1.0
frequency_penalty: 0
presence_penalty: 0

View File

@@ -0,0 +1,10 @@
reasoning-parser: ernie_x1
tool_call_parser: ernie_x1
tensor_parallel_size: 4
max_model_len: 65536
max_num_seqs: 128
enable_prefix_caching: True
enable_chunked_prefill: True
gpu_memory_utilization: 0.85
use_cudagraph: True
enable_custom_all_reduce: True