From 153f15db3934793db8f95f39e947b623a3d1f0d1 Mon Sep 17 00:00:00 2001 From: tianlef <1095012807@qq.com> Date: Tue, 21 Oct 2025 16:41:51 +0800 Subject: [PATCH] [Doc]add deepseek wint4 ce (#4517) --- benchmarks/yaml/deepseek-32k-tp8-wint4.yaml | 9 +++++++++ benchmarks/yaml/request_yaml/deepseek-32k.yaml | 10 ++++++++++ 2 files changed, 19 insertions(+) create mode 100644 benchmarks/yaml/deepseek-32k-tp8-wint4.yaml create mode 100644 benchmarks/yaml/request_yaml/deepseek-32k.yaml diff --git a/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml new file mode 100644 index 000000000..421c8e34d --- /dev/null +++ b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml @@ -0,0 +1,9 @@ +quantization: wint4 +load_choices: "default_v1" +graph_optimization_config: + use_cudagraph: True + use_unique_memory_pool: True +no_enable_prefix_caching: True +max_num_seqs: 256 +max_model_len: 32768 +tensor_parallel_size: 8 diff --git a/benchmarks/yaml/request_yaml/deepseek-32k.yaml b/benchmarks/yaml/request_yaml/deepseek-32k.yaml new file mode 100644 index 000000000..12d1198a6 --- /dev/null +++ b/benchmarks/yaml/request_yaml/deepseek-32k.yaml @@ -0,0 +1,10 @@ +temperature: 0.8 +top_p: 0.8 +presence_penalty: 0 +repetition_penalty: 1.0 +frequency_penalty: 0 +max_tokens: 12288 +metadata: + min_tokens: 1 +chat_template_kwargs: + enable_thinking: false