[benchmark]add lite-vl and x1 yaml (#4130)

This commit is contained in:
xiegegege
2025-09-16 16:38:36 +08:00
committed by GitHub
parent 8e49d99009
commit d682c97dd3
15 changed files with 60 additions and 3 deletions

View File

@@ -6,3 +6,4 @@ tensor_parallel_size: 8
max_num_batched_tokens: 4096 max_num_batched_tokens: 4096
max_num_partial_prefills: 3 max_num_partial_prefills: 3
max_long_partial_prefills: 3 max_long_partial_prefills: 3
quantization: wint4

View File

@@ -6,3 +6,4 @@ tensor_parallel_size: 8
max_num_batched_tokens: 4096 max_num_batched_tokens: 4096
max_num_partial_prefills: 3 max_num_partial_prefills: 3
max_long_partial_prefills: 3 max_long_partial_prefills: 3
quantization: wint8

View File

@@ -0,0 +1,5 @@
max_model_len: 32768
max_num_seqs: 256
kv_cache_ratio: 0.75
tensor_parallel_size: 4
gpu_memory_utilization: 0.9

View File

@@ -13,3 +13,4 @@ pd_comm_port: "2334"
max_num_batched_tokens: 384 max_num_batched_tokens: 384
max_num_partial_prefills: 3 max_num_partial_prefills: 3
max_long_partial_prefills: 3 max_long_partial_prefills: 3
quantization: wint4

View File

@@ -10,3 +10,4 @@ engine_worker_queue_port: 6677
cache_transfer_protocol: "rdma,ipc" cache_transfer_protocol: "rdma,ipc"
rdma_comm_ports: "7675,7676,7677,7678" rdma_comm_ports: "7675,7676,7677,7678"
pd_comm_port: "2333" pd_comm_port: "2333"
quantization: wint4

View File

@@ -0,0 +1,11 @@
enable_mm: True
max_model_len: 131072
max_num_seqs: 56
gpu_memory_utilization: 0.8
kv_cache_ratio: 0.8
tensor_parallel_size: 8
quantization: wint4
limit_mm_per_prompt: '{"image": 100, "video": 100}'
enable_chunked_prefill: True
max_num_batched_tokens: 384
reasoning_parser: ernie-45-vl

View File

@@ -1,7 +1,7 @@
enable_mm: True enable_mm: True
max_model_len: 32768 max_model_len: 32768
max_num_seqs: 36 max_num_seqs: 36
gpu_memory_utilization: 0.95 gpu_memory_utilization: 0.9
kv_cache_ratio: 0.8 kv_cache_ratio: 0.8
tensor_parallel_size: 8 tensor_parallel_size: 8
quantization: wint8 quantization: wint8

View File

@@ -1,7 +1,7 @@
enable_mm: True enable_mm: True
max_model_len: 32768 max_model_len: 32768
max_num_seqs: 36 max_num_seqs: 36
gpu_memory_utilization: 0.8 gpu_memory_utilization: 0.85
kv_cache_ratio: 0.8 kv_cache_ratio: 0.8
tensor_parallel_size: 8 tensor_parallel_size: 8
quantization: wint8 quantization: wint8

View File

@@ -0,0 +1,9 @@
enable_mm: True
max_model_len: 32768
max_num_seqs: 128
gpu_memory_utilization: 0.9
kv_cache_ratio: 0.71
tensor_parallel_size: 1
enable_chunked_prefill: True
max_num_batched_tokens: 384
reasoning_parser: ernie-45-vl

View File

@@ -0,0 +1,10 @@
enable_mm: True
max_model_len: 32768
max_num_seqs: 128
gpu_memory_utilization: 0.9
kv_cache_ratio: 0.71
tensor_parallel_size: 1
enable_chunked_prefill: True
max_num_batched_tokens: 384
quantization: wint4
reasoning_parser: ernie-45-vl

View File

@@ -0,0 +1,10 @@
enable_mm: True
max_model_len: 32768
max_num_seqs: 128
gpu_memory_utilization: 0.9
kv_cache_ratio: 0.71
tensor_parallel_size: 1
enable_chunked_prefill: True
max_num_batched_tokens: 384
quantization: wint8
reasoning_parser: ernie-45-vl

View File

@@ -0,0 +1 @@
max_tokens: 131071

View File

@@ -0,0 +1 @@
max_tokens: 12288

View File

@@ -2,7 +2,7 @@ top_p: 0.95
temperature: 0.6 temperature: 0.6
metadata: metadata:
min_tokens: 1 min_tokens: 1
max_tokens: 65535 max_tokens: 131071
repetition_penalty: 1.0 repetition_penalty: 1.0
frequency_penalty: 0 frequency_penalty: 0
presence_penalty: 0 presence_penalty: 0

View File

@@ -0,0 +1,6 @@
or_parallel_size: 1
max_model_len: 131072
max_num_seqs: 32
quantization: wint8
reasoning_parser: ernie_x1
tool_call_parser: ernie_x1