diff --git a/benchmarks/yaml/eb45-32k-wint4-ep4-tp4.yaml b/benchmarks/yaml/eb45-32k-wint4-ep4-tp4.yaml new file mode 100644 index 000000000..d05375caa --- /dev/null +++ b/benchmarks/yaml/eb45-32k-wint4-ep4-tp4.yaml @@ -0,0 +1,7 @@ +num_gpu_blocks_override: 1024 +max_model_len: 8192 +max_num_seqs: 64 +data_parallel_size: 4 +tensor_parallel_size: 1 +enable_expert_parallel: True +quantization: wint4