[benchmark] add quantization for benchmark yaml (#2995)

This commit is contained in:
xiegegege
2025-07-24 13:26:34 +08:00
committed by GitHub
parent 6235ef3881
commit e3a843f2c5
2 changed files with 2 additions and 0 deletions

View File

@@ -3,3 +3,4 @@ max_num_seqs: 96
gpu_memory_utilization: 0.9
kv_cache_ratio: 0.71
tensor_parallel_size: 4
quantization: wint4

View File

@@ -3,3 +3,4 @@ max_num_seqs: 96
gpu_memory_utilization: 0.9
kv_cache_ratio: 0.71
tensor_parallel_size: 8
quantization: wint8