add glm benchmark yaml (#4289)

This commit is contained in:
tianlef
2025-09-26 14:23:29 +08:00
committed by GitHub
parent 67e693b18b
commit 8a964329f4
3 changed files with 19 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
max_model_len: 32768
max_num_seqs: 128
tensor_parallel_size: 4
use_cudagraph: True
load_choices: "default_v1"

View File

@@ -0,0 +1,6 @@
max_model_len: 32768
max_num_seqs: 128
tensor_parallel_size: 4
use_cudagraph: True
load_choices: "default_v1"
quantization: wfp8afp8

View File

@@ -0,0 +1,8 @@
top_p: 0.95
temperature: 0.6
metadata:
min_tokens: 1
max_tokens: 12288
repetition_penalty: 1.0
frequency_penalty: 0
presence_penalty: 0