mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
add glm benchmark yaml (#4289)
This commit is contained in:
5
benchmarks/yaml/GLM45-air-32k-bf16.yaml
Normal file
5
benchmarks/yaml/GLM45-air-32k-bf16.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
max_model_len: 32768
|
||||
max_num_seqs: 128
|
||||
tensor_parallel_size: 4
|
||||
use_cudagraph: True
|
||||
load_choices: "default_v1"
|
||||
6
benchmarks/yaml/GLM45-air-32k-wfp8afp8.yaml
Normal file
6
benchmarks/yaml/GLM45-air-32k-wfp8afp8.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
max_model_len: 32768
|
||||
max_num_seqs: 128
|
||||
tensor_parallel_size: 4
|
||||
use_cudagraph: True
|
||||
load_choices: "default_v1"
|
||||
quantization: wfp8afp8
|
||||
8
benchmarks/yaml/request_yaml/GLM-32k.yaml
Normal file
8
benchmarks/yaml/request_yaml/GLM-32k.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
top_p: 0.95
|
||||
temperature: 0.6
|
||||
metadata:
|
||||
min_tokens: 1
|
||||
max_tokens: 12288
|
||||
repetition_penalty: 1.0
|
||||
frequency_penalty: 0
|
||||
presence_penalty: 0
|
||||
Reference in New Issue
Block a user