diff --git a/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml index 421c8e34d..a09349f04 100644 --- a/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml +++ b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml @@ -3,7 +3,7 @@ load_choices: "default_v1" graph_optimization_config: use_cudagraph: True use_unique_memory_pool: True -no_enable_prefix_caching: True +enable_prefix_caching: False max_num_seqs: 256 max_model_len: 32768 tensor_parallel_size: 8