mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
add paddleocr_vl benchmark (#4833)
* add paddleocr_vl benchmark * fix * fix * fix * fix
This commit is contained in:
97
benchmarks/paddleocr_vl/PaddleOCR-VL.yaml
Normal file
97
benchmarks/paddleocr_vl/PaddleOCR-VL.yaml
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
|
||||||
|
pipeline_name: PaddleOCR-VL
|
||||||
|
|
||||||
|
batch_size: 64
|
||||||
|
|
||||||
|
use_queues: True
|
||||||
|
|
||||||
|
use_doc_preprocessor: False
|
||||||
|
use_layout_detection: True
|
||||||
|
use_chart_recognition: False
|
||||||
|
format_block_content: False
|
||||||
|
|
||||||
|
SubModules:
|
||||||
|
LayoutDetection:
|
||||||
|
module_name: layout_detection
|
||||||
|
model_name: PP-DocLayoutV2
|
||||||
|
model_dir: null
|
||||||
|
batch_size: 8
|
||||||
|
threshold:
|
||||||
|
0: 0.5 # abstract
|
||||||
|
1: 0.5 # algorithm
|
||||||
|
2: 0.5 # aside_text
|
||||||
|
3: 0.5 # chart
|
||||||
|
4: 0.5 # content
|
||||||
|
5: 0.4 # formula
|
||||||
|
6: 0.4 # doc_title
|
||||||
|
7: 0.5 # figure_title
|
||||||
|
8: 0.5 # footer
|
||||||
|
9: 0.5 # footer
|
||||||
|
10: 0.5 # footnote
|
||||||
|
11: 0.5 # formula_number
|
||||||
|
12: 0.5 # header
|
||||||
|
13: 0.5 # header
|
||||||
|
14: 0.5 # image
|
||||||
|
15: 0.4 # formula
|
||||||
|
16: 0.5 # number
|
||||||
|
17: 0.4 # paragraph_title
|
||||||
|
18: 0.5 # reference
|
||||||
|
19: 0.5 # reference_content
|
||||||
|
20: 0.45 # seal
|
||||||
|
21: 0.5 # table
|
||||||
|
22: 0.4 # text
|
||||||
|
23: 0.4 # text
|
||||||
|
24: 0.5 # vision_footnote
|
||||||
|
layout_nms: True
|
||||||
|
layout_unclip_ratio: [1.0, 1.0]
|
||||||
|
layout_merge_bboxes_mode:
|
||||||
|
0: "union" # abstract
|
||||||
|
1: "union" # algorithm
|
||||||
|
2: "union" # aside_text
|
||||||
|
3: "large" # chart
|
||||||
|
4: "union" # content
|
||||||
|
5: "large" # display_formula
|
||||||
|
6: "large" # doc_title
|
||||||
|
7: "union" # figure_title
|
||||||
|
8: "union" # footer
|
||||||
|
9: "union" # footer
|
||||||
|
10: "union" # footnote
|
||||||
|
11: "union" # formula_number
|
||||||
|
12: "union" # header
|
||||||
|
13: "union" # header
|
||||||
|
14: "union" # image
|
||||||
|
15: "large" # inline_formula
|
||||||
|
16: "union" # number
|
||||||
|
17: "large" # paragraph_title
|
||||||
|
18: "union" # reference
|
||||||
|
19: "union" # reference_content
|
||||||
|
20: "union" # seal
|
||||||
|
21: "union" # table
|
||||||
|
22: "union" # text
|
||||||
|
23: "union" # text
|
||||||
|
24: "union" # vision_footnote
|
||||||
|
VLRecognition:
|
||||||
|
module_name: vl_recognition
|
||||||
|
model_name: PaddleOCR-VL-0.9B
|
||||||
|
model_dir: null
|
||||||
|
batch_size: 4096
|
||||||
|
genai_config:
|
||||||
|
backend: fastdeploy-server
|
||||||
|
server_url: http://127.0.0.1:8118/v1
|
||||||
|
|
||||||
|
SubPipelines:
|
||||||
|
DocPreprocessor:
|
||||||
|
pipeline_name: doc_preprocessor
|
||||||
|
batch_size: 8
|
||||||
|
use_doc_orientation_classify: True
|
||||||
|
use_doc_unwarping: True
|
||||||
|
SubModules:
|
||||||
|
DocOrientationClassify:
|
||||||
|
module_name: doc_text_orientation
|
||||||
|
model_name: PP-LCNet_x1_0_doc_ori
|
||||||
|
model_dir: null
|
||||||
|
batch_size: 8
|
||||||
|
DocUnwarping:
|
||||||
|
module_name: image_unwarping
|
||||||
|
model_name: UVDoc
|
||||||
|
model_dir: null
|
||||||
139
benchmarks/paddleocr_vl/README.md
Normal file
139
benchmarks/paddleocr_vl/README.md
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
## FastDeploy 服务化性能压测工具(PaddleOCR-VL)
|
||||||
|
|
||||||
|
本文档主要介绍如何对 [PaddleOCR-VL](https://www.paddleocr.ai/latest/version3.x/pipeline_usage/PaddleOCR-VL.html) 进行性能测试。
|
||||||
|
|
||||||
|
### 数据集:
|
||||||
|
|
||||||
|
下载数据集到本地用于性能测试:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>数据集</th>
|
||||||
|
<th>获取地址</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>OmniDocBench v1 数据集,共 981 个 pdf 文件</td>
|
||||||
|
<td><code>https://github.com/opendatalab/OmniDocBench</code></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
### 使用方式
|
||||||
|
|
||||||
|
1. 启动 FastDeploy 服务,下面为 A100-80G 测试时使用的参数,可以根据实际情况进行调整:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python -m fastdeploy.entrypoints.openai.api_server \
|
||||||
|
--model PaddlePaddle/PaddleOCR-VL \
|
||||||
|
--port 8118 \
|
||||||
|
--metrics-port 8471 \
|
||||||
|
--engine-worker-queue-port 8472 \
|
||||||
|
--cache-queue-port 55660 \
|
||||||
|
--max-model-len 16384 \
|
||||||
|
--max-num-batched-tokens 16384 \
|
||||||
|
--gpu-memory-utilization 0.7 \
|
||||||
|
--max-num-seqs 256 \
|
||||||
|
--workers 2 \
|
||||||
|
--graph-optimization-config '{"graph_opt_level":0, "use_cudagraph":true}'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. 在同一环境安装依赖后启动测试脚本:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# 安装依赖
|
||||||
|
pip install -U paddlex
|
||||||
|
# 启动测试脚本
|
||||||
|
python benchmark.py ./test_data -b 512 -o ./benchmark.json --paddlex_config_path ./PaddleOCR-VL.yaml --gpu_ids 0
|
||||||
|
```
|
||||||
|
|
||||||
|
测试脚本参数说明:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>参数</th>
|
||||||
|
<th>说明</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td><code>input_dirs</code></td>
|
||||||
|
<td>输入的目录路径,会自动识别到目录下的 pdf 或图片。可以提供一个或多个。</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>-b, --batch_size</code></td>
|
||||||
|
<td>推理时使用的批处理大小。</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>-o, --output_path</code></td>
|
||||||
|
<td>输出结果文件的路径。</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>--paddlex_config_path</code></td>
|
||||||
|
<td>PaddleX 的 YAML 配置文件路径。</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>--gpu_ids</code></td>
|
||||||
|
<td>指定要使用的 GPU 设备 ID,可提供一个或多个。</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
3. 测试结束后,会输出类似于下面的结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Throughput (file): 1.3961 files per second
|
||||||
|
Average latency (batch): 351.0812 seconds
|
||||||
|
Processed pages: 981
|
||||||
|
Throughput (page): 1.3961 pages per second
|
||||||
|
Generated tokens: 1510337
|
||||||
|
Throughput (token): 2149.5 tokens per second
|
||||||
|
GPU utilization (%): 100.0, 0.0, 68.1
|
||||||
|
GPU memory usage (MB): 77664.8, 58802.8, 74402.7
|
||||||
|
```
|
||||||
|
|
||||||
|
输出结果说明:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>参数</th>
|
||||||
|
<th>说明</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tr>
|
||||||
|
<td>Throughput (file)</td>
|
||||||
|
<td>每秒处理的文件数量</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Average latency (batch)</td>
|
||||||
|
<td>每批次处理的平均延迟时间,单位为秒</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Processed pages</td>
|
||||||
|
<td>已处理的页面总数</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Throughput (page)</td>
|
||||||
|
<td>每秒处理的页面数量</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Generated tokens</td>
|
||||||
|
<td>生成的token总数</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Throughput (token)</td>
|
||||||
|
<td>每秒生成的token数量</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>GPU utilization (%)</td>
|
||||||
|
<td>GPU 的最大、最小、平均利用率</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>GPU memory usage (MB)</td>
|
||||||
|
<td>GPU 的最大、最小、平均显存占用,单位为 MB</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
226
benchmarks/paddleocr_vl/benchmark.py
Normal file
226
benchmarks/paddleocr_vl/benchmark.py
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from operator import itemgetter
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
import pynvml
|
||||||
|
import tiktoken
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
shutdown = False
|
||||||
|
|
||||||
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
|
|
||||||
|
|
||||||
|
class Predictor(object):
|
||||||
|
def predict(self, task_info, batch_data):
|
||||||
|
task_info["start_time"] = get_curr_time()
|
||||||
|
try:
|
||||||
|
markdown, num_pages = self._predict(batch_data)
|
||||||
|
except Exception as e:
|
||||||
|
task_info["successful"] = False
|
||||||
|
print(e)
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
task_info["end_time"] = get_curr_time()
|
||||||
|
task_info["successful"] = True
|
||||||
|
task_info["processed_pages"] = num_pages
|
||||||
|
task_info["generated_tokens"] = len(encoding.encode(markdown))
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
def _predict(self, batch_data):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class PaddleXPredictor(Predictor):
|
||||||
|
def __init__(self, config_path):
|
||||||
|
from paddlex import create_pipeline
|
||||||
|
|
||||||
|
super().__init__()
|
||||||
|
self.pipeline = create_pipeline(config_path)
|
||||||
|
|
||||||
|
def _predict(self, batch_data):
|
||||||
|
results = list(self.pipeline.predict(batch_data))
|
||||||
|
return "\n\n".join(res._to_markdown(pretty=False)["markdown_texts"] for res in results), len(results)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.pipeline.close()
|
||||||
|
|
||||||
|
|
||||||
|
def monitor_device(gpu_ids, gpu_metrics_list):
|
||||||
|
try:
|
||||||
|
pynvml.nvmlInit()
|
||||||
|
handles = [pynvml.nvmlDeviceGetHandleByIndex(gpu_id) for gpu_id in gpu_ids]
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
while not shutdown:
|
||||||
|
try:
|
||||||
|
gpu_util = 0
|
||||||
|
mem_bytes = 0
|
||||||
|
|
||||||
|
for handle in handles:
|
||||||
|
gpu_util += pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
|
||||||
|
mem_bytes += pynvml.nvmlDeviceGetMemoryInfo(handle).used
|
||||||
|
|
||||||
|
gpu_metrics_list.append(
|
||||||
|
{
|
||||||
|
"utilization": gpu_util,
|
||||||
|
"memory": mem_bytes,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error monitoring GPUs: {e}")
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error initializing the GPU monitor: {e}")
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
pynvml.nvmlShutdown()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_curr_time():
|
||||||
|
return time.perf_counter()
|
||||||
|
|
||||||
|
|
||||||
|
def new_task_info():
|
||||||
|
task_info = {}
|
||||||
|
task_info["id"] = uuid.uuid4().hex
|
||||||
|
return task_info
|
||||||
|
|
||||||
|
|
||||||
|
def create_and_submit_new_task(executor, requestor, task_info_dict, input_path):
|
||||||
|
task_info = new_task_info()
|
||||||
|
task = executor.submit(
|
||||||
|
requestor.make_request,
|
||||||
|
task_info,
|
||||||
|
input_path,
|
||||||
|
)
|
||||||
|
task_info_dict[task] = task_info
|
||||||
|
|
||||||
|
return task
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("input_dirs", type=str, nargs="+", metavar="INPUT_DIR")
|
||||||
|
parser.add_argument("-b", "--batch_size", type=int, default=1)
|
||||||
|
parser.add_argument("-o", "--output_path", type=str, default="benchmark.json")
|
||||||
|
parser.add_argument("--paddlex_config_path", type=str, default="PaddleOCR-VL.yaml")
|
||||||
|
parser.add_argument("--gpu_ids", type=int, nargs="+", default=[0])
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
task_info_list = []
|
||||||
|
|
||||||
|
all_input_paths = []
|
||||||
|
for input_dir in args.input_dirs:
|
||||||
|
all_input_paths += glob.glob(os.path.join(input_dir, "*"))
|
||||||
|
all_input_paths.sort()
|
||||||
|
if len(all_input_paths) == 0:
|
||||||
|
print("No valid data")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
predictor = PaddleXPredictor(args.paddlex_config_path)
|
||||||
|
|
||||||
|
if args.batch_size < 1:
|
||||||
|
print("Invalid batch size")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
gpu_metrics_list = []
|
||||||
|
thread_device_monitor = Thread(
|
||||||
|
target=monitor_device,
|
||||||
|
args=(args.gpu_ids, gpu_metrics_list),
|
||||||
|
)
|
||||||
|
thread_device_monitor.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
start_time = get_curr_time()
|
||||||
|
batch_data = []
|
||||||
|
with open("generated_markdown.md", "w", encoding="utf-8") as f:
|
||||||
|
for i, input_path in tqdm(enumerate(all_input_paths), total=len(all_input_paths)):
|
||||||
|
batch_data.append(input_path)
|
||||||
|
if len(batch_data) == args.batch_size or i == len(all_input_paths) - 1:
|
||||||
|
task_info = new_task_info()
|
||||||
|
try:
|
||||||
|
markdown = predictor.predict(task_info, batch_data)
|
||||||
|
f.write(markdown)
|
||||||
|
f.write("\n\n")
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
task_info_list.append(task_info)
|
||||||
|
batch_data.clear()
|
||||||
|
end_time = get_curr_time()
|
||||||
|
finally:
|
||||||
|
shutdown = True
|
||||||
|
thread_device_monitor.join()
|
||||||
|
predictor.close()
|
||||||
|
|
||||||
|
total_files = len(all_input_paths)
|
||||||
|
throughput_file = total_files / (end_time - start_time)
|
||||||
|
print(f"Throughput (file): {throughput_file:.4f} files per second")
|
||||||
|
duration_list_batch = [info["end_time"] - info["start_time"] for info in task_info_list]
|
||||||
|
avg_latency_batch = sum(duration_list_batch) / len(duration_list_batch)
|
||||||
|
print(f"Average latency (batch): {avg_latency_batch:.4f} seconds")
|
||||||
|
|
||||||
|
successful_files = sum(map(lambda x: x["successful"], task_info_list))
|
||||||
|
if successful_files:
|
||||||
|
processed_pages = sum(info.get("processed_pages", 0) for info in task_info_list)
|
||||||
|
throughput_page = processed_pages / (end_time - start_time)
|
||||||
|
print(f"Processed pages: {processed_pages}")
|
||||||
|
print(f"Throughput (page): {throughput_page:.4f} pages per second")
|
||||||
|
generated_tokens = sum(info.get("generated_tokens", 0) for info in task_info_list)
|
||||||
|
throughput_token = generated_tokens / (end_time - start_time)
|
||||||
|
print(f"Generated tokens: {generated_tokens}")
|
||||||
|
print(f"Throughput (token): {throughput_token:.1f} tokens per second")
|
||||||
|
else:
|
||||||
|
processed_pages = None
|
||||||
|
throughput_page = None
|
||||||
|
generated_tokens = None
|
||||||
|
throughput_token = None
|
||||||
|
|
||||||
|
if gpu_metrics_list:
|
||||||
|
gpu_util_list = list(map(itemgetter("utilization"), gpu_metrics_list))
|
||||||
|
print(
|
||||||
|
f"GPU utilization (%): {max(gpu_util_list):.1f}, {min(gpu_util_list):.1f}, {sum(gpu_util_list) / len(gpu_util_list):.1f}"
|
||||||
|
)
|
||||||
|
gpu_mem_list = list(map(itemgetter("memory"), gpu_metrics_list))
|
||||||
|
print(
|
||||||
|
f"GPU memory usage (MB): {max(gpu_mem_list) / 1024**2:.1f}, {min(gpu_mem_list) / 1024**2:.1f}, {sum(gpu_mem_list) / len(gpu_mem_list) / 1024**2:.1f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
dic = {
|
||||||
|
"input_dirs": args.input_dirs,
|
||||||
|
"batch_size": args.batch_size,
|
||||||
|
"total_files": total_files,
|
||||||
|
"throughput_file": throughput_file,
|
||||||
|
"avg_latency_batch": avg_latency_batch,
|
||||||
|
"duration_list": duration_list_batch,
|
||||||
|
"successful_files": successful_files,
|
||||||
|
"processed_pages": processed_pages,
|
||||||
|
"throughput_page": throughput_page,
|
||||||
|
"generated_tokens": generated_tokens,
|
||||||
|
"throughput_token": throughput_token,
|
||||||
|
"gpu_metrics_list": gpu_metrics_list,
|
||||||
|
}
|
||||||
|
with open(args.output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(
|
||||||
|
dic,
|
||||||
|
f,
|
||||||
|
ensure_ascii=False,
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
|
print(f"Config and results saved to {args.output_path}")
|
||||||
Reference in New Issue
Block a user