diff --git a/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml new file mode 100644 index 000000000..421c8e34d --- /dev/null +++ b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml @@ -0,0 +1,9 @@ +quantization: wint4 +load_choices: "default_v1" +graph_optimization_config: + use_cudagraph: True + use_unique_memory_pool: True +no_enable_prefix_caching: True +max_num_seqs: 256 +max_model_len: 32768 +tensor_parallel_size: 8 diff --git a/benchmarks/yaml/request_yaml/deepseek-32k.yaml b/benchmarks/yaml/request_yaml/deepseek-32k.yaml new file mode 100644 index 000000000..12d1198a6 --- /dev/null +++ b/benchmarks/yaml/request_yaml/deepseek-32k.yaml @@ -0,0 +1,10 @@ +temperature: 0.8 +top_p: 0.8 +presence_penalty: 0 +repetition_penalty: 1.0 +frequency_penalty: 0 +max_tokens: 12288 +metadata: + min_tokens: 1 +chat_template_kwargs: + enable_thinking: false