Files
FastDeploy/benchmarks/yaml/x1-32k-wint8-h800-tp8.yaml
2025-06-29 23:29:37 +00:00

7 lines
139 B
YAML

tensor_parallel_size: 8
max_model_len: 32768
max_num_seqs: 32
num_gpu_blocks_override: 4096
kv_cache_ratio: 0.5
reasoning_parser: ernie-x1