[Doc]add deepseek wint4 ce (#4517)

This commit is contained in:
tianlef
2025-10-21 16:41:51 +08:00
committed by GitHub
parent fb76cdfb4f
commit 153f15db39
2 changed files with 19 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
quantization: wint4
load_choices: "default_v1"
graph_optimization_config:
use_cudagraph: True
use_unique_memory_pool: True
no_enable_prefix_caching: True
max_num_seqs: 256
max_model_len: 32768
tensor_parallel_size: 8

View File

@@ -0,0 +1,10 @@
temperature: 0.8
top_p: 0.8
presence_penalty: 0
repetition_penalty: 1.0
frequency_penalty: 0
max_tokens: 12288
metadata:
min_tokens: 1
chat_template_kwargs:
enable_thinking: false