[CE]add wint4 ep (#5355)

This commit is contained in:
tianlef
2025-12-03 15:17:47 +08:00
committed by GitHub
parent d5a9b75b4e
commit 04d35ace5e

View File

@@ -0,0 +1,7 @@
num_gpu_blocks_override: 1024
max_model_len: 8192
max_num_seqs: 64
data_parallel_size: 4
tensor_parallel_size: 1
enable_expert_parallel: True
quantization: wint4