mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
polish code with new pre-commit rule (#2923)
This commit is contained in:
@@ -7,4 +7,4 @@ tensor_parallel_size: 1
|
||||
enable_chunked_prefill: True
|
||||
max_num_batched_tokens: 384
|
||||
quantization: wint4
|
||||
reasoning_parser: ernie-45-vl
|
||||
reasoning_parser: ernie-45-vl
|
||||
|
||||
@@ -12,4 +12,4 @@ rdma_comm_ports: "7671,7672,7673,7674"
|
||||
pd_comm_port: "2334"
|
||||
max_num_batched_tokens: 384
|
||||
max_num_partial_prefills: 3
|
||||
max_long_partial_prefills: 3
|
||||
max_long_partial_prefills: 3
|
||||
|
||||
@@ -9,4 +9,4 @@ cache_queue_port: 55664
|
||||
engine_worker_queue_port: 6677
|
||||
cache_transfer_protocol: "rdma,ipc"
|
||||
rdma_comm_ports: "7675,7676,7677,7678"
|
||||
pd_comm_port: "2333"
|
||||
pd_comm_port: "2333"
|
||||
|
||||
@@ -10,4 +10,4 @@ engine_worker_queue_port: 6677
|
||||
num_gpu_blocks_override: 1024
|
||||
cache_transfer_protocol: "rdma"
|
||||
rdma_comm_ports: "7671,7672,7673,7674,7675,7676,7677,7678"
|
||||
pd_comm_port: "2334"
|
||||
pd_comm_port: "2334"
|
||||
|
||||
@@ -10,4 +10,4 @@ splitwise_role: decode
|
||||
engine_worker_queue_port: 6678
|
||||
cache_transfer_protocol: "rdma,ipc"
|
||||
rdma_comm_ports: "7671,7672,7673,7674"
|
||||
pd_comm_port: "2334"
|
||||
pd_comm_port: "2334"
|
||||
|
||||
@@ -9,4 +9,4 @@ cache_queue_port: 55664
|
||||
engine_worker_queue_port: 6677
|
||||
cache_transfer_protocol: "rdma,ipc"
|
||||
rdma_comm_ports: "7675,7676,7677,7678"
|
||||
pd_comm_port: "2333"
|
||||
pd_comm_port: "2333"
|
||||
|
||||
@@ -12,4 +12,4 @@ rdma_comm_ports: "7671,7672,7673,7674"
|
||||
pd_comm_port: "2334"
|
||||
max_num_batched_tokens: 384
|
||||
max_num_partial_prefills: 3
|
||||
max_long_partial_prefills: 3
|
||||
max_long_partial_prefills: 3
|
||||
|
||||
@@ -9,4 +9,4 @@ cache_queue_port: 55664
|
||||
engine_worker_queue_port: 6677
|
||||
cache_transfer_protocol: "rdma,ipc"
|
||||
rdma_comm_ports: "7675,7676,7677,7678"
|
||||
pd_comm_port: "2333"
|
||||
pd_comm_port: "2333"
|
||||
|
||||
@@ -3,4 +3,4 @@ max_num_seqs: 75
|
||||
gpu_memory_utilization: 0.85
|
||||
kv_cache_ratio: 0.75
|
||||
quantization: wint4
|
||||
tensor_parallel_size: 4
|
||||
tensor_parallel_size: 4
|
||||
|
||||
@@ -3,4 +3,4 @@ max_num_seqs: 25
|
||||
gpu_memory_utilization: 0.9
|
||||
kv_cache_ratio: 0.75
|
||||
quantization: wint8
|
||||
tensor_parallel_size: 4
|
||||
tensor_parallel_size: 4
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
metadata:
|
||||
min_tokens: 32
|
||||
max_tokens: 33
|
||||
max_tokens: 33
|
||||
|
||||
@@ -5,4 +5,4 @@ metadata:
|
||||
max_tokens: 12288
|
||||
repetition_penalty: 1.05
|
||||
frequency_penalty: 0
|
||||
presence_penalty: 0
|
||||
presence_penalty: 0
|
||||
|
||||
@@ -5,4 +5,4 @@ metadata:
|
||||
max_tokens: 12288
|
||||
repetition_penalty: 1.0
|
||||
frequency_penalty: 0
|
||||
presence_penalty: 1.5
|
||||
presence_penalty: 1.5
|
||||
|
||||
@@ -8,4 +8,4 @@ frequency_penalty: 0
|
||||
presence_penalty: 0
|
||||
skip_special_tokens: false
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
enable_thinking: true
|
||||
|
||||
@@ -3,4 +3,4 @@ max_num_seqs: 64
|
||||
gpu_memory_utilization: 0.9
|
||||
tensor_parallel_size: 8
|
||||
quantization: wint8
|
||||
reasoning_parser: ernie-x1
|
||||
reasoning_parser: ernie-x1
|
||||
|
||||
Reference in New Issue
Block a user