mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Graph Optimization][BugFix][CI] Fix 0size bug && add unitest (#5495)
This commit is contained in:
8
tests/ce/deploy/ernie45t_21b_cinn.yaml
Normal file
8
tests/ce/deploy/ernie45t_21b_cinn.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
max_model_len: 32768
|
||||
max_num_seqs: 128
|
||||
tensor_parallel_size: 1
|
||||
quantization: wint4
|
||||
graph_optimization_config:
|
||||
graph_opt_level: 2
|
||||
sot_warmup_sizes: [2,16,32,64]
|
||||
use_cudagraph: True
|
||||
@@ -91,7 +91,7 @@ def setup_and_run_server():
|
||||
"--reasoning-parser",
|
||||
"ernie-45-vl",
|
||||
"--graph-optimization-config",
|
||||
'{"graph_opt_level": 1, "use_cudagraph": true, "full_cuda_graph": false}',
|
||||
'{"graph_opt_level": 2, "use_cudagraph": true, "full_cuda_graph": false}',
|
||||
]
|
||||
|
||||
# Start subprocess in new process group
|
||||
|
||||
Reference in New Issue
Block a user