From 0bc7d076fc7e94a269f33b4786c22ddd2a314284 Mon Sep 17 00:00:00 2001 From: tianlef <1095012807@qq.com> Date: Tue, 26 Aug 2025 11:27:32 +0800 Subject: [PATCH] [CE]add x1 w4a8c8 benchamrk config (#3607) * [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config * [CE]add x1 w4a8c8 benchamrk config --- benchmarks/yaml/request_yaml/x1.yaml | 8 ++++++++ benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml | 10 ++++++++++ 2 files changed, 18 insertions(+) create mode 100644 benchmarks/yaml/request_yaml/x1.yaml create mode 100644 benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml diff --git a/benchmarks/yaml/request_yaml/x1.yaml b/benchmarks/yaml/request_yaml/x1.yaml new file mode 100644 index 000000000..73dc6a900 --- /dev/null +++ b/benchmarks/yaml/request_yaml/x1.yaml @@ -0,0 +1,8 @@ +top_p: 0.95 +temperature: 0.6 +metadata: + min_tokens: 1 +max_tokens: 65535 +repetition_penalty: 1.0 +frequency_penalty: 0 +presence_penalty: 0 diff --git a/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml b/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml new file mode 100644 index 000000000..c77231c8f --- /dev/null +++ b/benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml @@ -0,0 +1,10 @@ +reasoning-parser: ernie_x1 +tool_call_parser: ernie_x1 +tensor_parallel_size: 4 +max_model_len: 65536 +max_num_seqs: 128 +enable_prefix_caching: True +enable_chunked_prefill: True +gpu_memory_utilization: 0.85 +use_cudagraph: True +enable_custom_all_reduce: True