diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..b9019f3f0
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "poros/third_party/googletest"]
+ path = poros/third_party/googletest
+ url = https://github.com/google/googletest.git
+[submodule "poros/third_party/gflags"]
+ path = poros/third_party/gflags
+ url = https://github.com/gflags/gflags.git
diff --git a/README_CN.md b/README_CN.md
index c03ee7b54..c5a7b2633 100755
--- a/README_CN.md
+++ b/README_CN.md
@@ -44,7 +44,7 @@
- **⚡️FastDeploy**是一款**全场景**、**易用灵活**、**极致高效**的AI推理部署工具, 支持**云边端**部署。提供超过 🔥160+ **Text**,**Vision**, **Speech**和**跨模态**模型📦**开箱即用**的部署体验,并实现🔚**端到端**的推理性能优化。包括 [物体检测](./examples/vision/detection)、[字符识别(OCR)](./examples/vision/ocr)、[人脸](./examples/vision/facedet)、[人像扣图](./examples/vision/matting)、[多目标跟踪系统](./examples/vision/tracking/pptracking)、[NLP](./examples/text)、[Stable Difussion文图生成](./examples/multimodal/stable_diffusion)、[TTS](./examples/audio/pp-tts) 等几十种任务场景,满足开发者**多场景、多硬件、多平台**的产业部署需求。
+ **⚡️FastDeploy**是一款**全场景**、**易用灵活**、**极致高效**的AI推理部署工具, 支持**云边端**部署。提供超过 🔥160+ **Text**,**Vision**, **Speech**和**跨模态**模型📦**开箱即用**的部署体验,并实现🔚**端到端**的推理性能优化。包括 [物体检测](./examples/vision/detection)、[字符识别(OCR)](./examples/vision/ocr)、[人脸](./examples/vision/facedet)、[人像扣图](./examples/vision/matting)、[多目标跟踪系统](./examples/vision/tracking/pptracking)、[NLP](./examples/text)、[Stable Diffusion文图生成](./examples/multimodal/stable_diffusion)、[TTS](./examples/audio/pp-tts) 等几十种任务场景,满足开发者**多场景、多硬件、多平台**的产业部署需求。
diff --git a/README_EN.md b/README_EN.md
index 48e66e506..7761fd895 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -41,7 +41,7 @@ English | [简体中文](README_CN.md) | [हिन्दी](./docs/docs_i18n/R
**⚡️FastDeploy** is an **Easy-to-use** and **High Performance** AI model deployment toolkit for Cloud, Mobile and Edge with 📦**out-of-the-box and unified experience**, 🔚**end-to-end optimization** for over **🔥160+ Text, Vision, Speech and Cross-modal AI models**.
-Including [image classification](examples/vision/classification), [object detection](examples/vision/detection), [OCR](./examples/vision/ocr), [face detection](./examples/vision/facedet), [matting](./examples/vision/matting), [pp-tracking](./examples/vision/tracking/pptracking), [NLP](./examples/text), [stable difussion](./examples/multimodal/stable_diffusion), [TTS](./examples/audio/pp-tts) and other tasks to meet developers' industrial deployment needs for **multi-scenario**, **multi-hardware** and **multi-platform**.
+Including [image classification](examples/vision/classification), [object detection](examples/vision/detection), [OCR](./examples/vision/ocr), [face detection](./examples/vision/facedet), [matting](./examples/vision/matting), [pp-tracking](./examples/vision/tracking/pptracking), [NLP](./examples/text), [stable diffusion](./examples/multimodal/stable_diffusion), [TTS](./examples/audio/pp-tts) and other tasks to meet developers' industrial deployment needs for **multi-scenario**, **multi-hardware** and **multi-platform**.
diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
index f839eb228..a6f0b87c2 100755
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -13,7 +13,9 @@ add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
-add_executable(benchmark_ppocr ${PROJECT_SOURCE_DIR}/benchmark_ppocr.cc)
+add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
+add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
+add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
@@ -21,12 +23,16 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
- target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags pthread)
+ target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
+ target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
+ target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
else()
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
- target_link_libraries(benchmark_ppocr ${FASTDEPLOY_LIBS} gflags)
+ target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
+ target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
+ target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
endif()
diff --git a/benchmark/cpp/README.md b/benchmark/cpp/README.md
new file mode 100644
index 000000000..abdbabaf6
--- /dev/null
+++ b/benchmark/cpp/README.md
@@ -0,0 +1,137 @@
+# FastDeploy C++ Benchmarks
+
+## 1. 编译选项
+以下选项为benchmark相关的编译选项,在编译用来跑benchmark的sdk时,必须开启。
+
+|选项|需要设置的值|说明|
+|---|---|---|
+| ENABLE_BENCHMARK | ON | 默认OFF, 是否打开BENCHMARK模式 |
+| ENABLE_VISION | ON | 默认OFF,是否编译集成视觉模型的部署模块 |
+| ENABLE_TEXT | ON | 默认OFF,是否编译集成文本NLP模型的部署模块 |
+
+运行FastDeploy C++ Benchmark,需先准备好相应的环境,并在ENABLE_BENCHMARK=ON模式下从源码编译FastDeploy C++ SDK. 以下将按照硬件维度,来说明相应的系统环境要求。不同环境下的详细要求,请参考[FastDeploy环境要求](../../docs/cn/build_and_install)
+
+## 2. Benchmark 参数设置说明
+
+
+
+
+| 参数 | 作用 |
+| -------------------- | ------------------------------------------ |
+| --model | 模型路径 |
+| --image | 图片路径 |
+| --device | 选择 CPU/GPU/XPU,默认为 CPU |
+| --cpu_thread_nums | CPU 线程数,默认为 8 |
+| --device_id | GPU/XPU 卡号,默认为 0 |
+| --warmup | 跑benchmark的warmup次数,默认为 200 |
+| --repeat | 跑benchmark的循环次数,默认为 1000 |
+| --profile_mode | 指定需要测试性能的模式,可选值为`[runtime, end2end]`,默认为 runtime |
+| --include_h2d_d2h | 是否把H2D+D2H的耗时统计在内,该参数只在profile_mode为runtime时有效,默认为 false |
+| --backend | 指定后端类型,有default, ort, ov, trt, paddle, paddle_trt, lite 等,为default时,会自动选择最优后端,推荐设置为显式设置明确的backend。默认为 default |
+| --use_fp16 | 是否开启fp16,当前只对 trt, paddle-trt, lite后端有效,默认为 false |
+| --collect_memory_info | 是否记录 cpu/gpu memory信息,默认 false |
+| --sampling_interval | 记录 cpu/gpu memory信息采样时间间隔,单位ms,默认为 50 |
+
+## 3. X86_64 CPU 和 NVIDIA GPU 环境下运行 Benchmark
+
+### 3.1 环境准备
+
+Linux上编译需满足:
+ - gcc/g++ >= 5.4(推荐8.2)
+ - cmake >= 3.18.0
+ - CUDA >= 11.2
+ - cuDNN >= 8.2
+ - TensorRT >= 8.5
+
+在GPU上编译FastDeploy需要准备好相应的CUDA环境以及TensorRT,详细文档请参考[GPU编译文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)。
+
+### 3.2 编译FastDeploy C++ SDK
+```bash
+# 源码编译SDK
+git clone https://github.com/PaddlePaddle/FastDeploy.git -b develop
+cd FastDeploy
+mkdir build && cd build
+cmake .. -DWITH_GPU=ON \
+ -DENABLE_ORT_BACKEND=ON \
+ -DENABLE_PADDLE_BACKEND=ON \
+ -DENABLE_OPENVINO_BACKEND=ON \
+ -DENABLE_TRT_BACKEND=ON \
+ -DENABLE_VISION=ON \
+ -DENABLE_TEXT=ON \
+ -DENABLE_BENCHMARK=ON \ # 开启benchmark模式
+ -DTRT_DIRECTORY=/Paddle/TensorRT-8.5.2.2 \
+ -DCUDA_DIRECTORY=/usr/local/cuda \
+ -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk
+
+make -j12
+make install
+
+# 配置SDK路径
+cd ..
+export FD_GPU_SDK=${PWD}/build/compiled_fastdeploy_sdk
+```
+### 3.3 编译 Benchmark 示例
+```bash
+cd benchmark/cpp
+mkdir build && cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${FD_GPU_SDK}
+make -j4
+```
+
+### 3.4 运行 Benchmark 示例
+
+在X86 CPU + NVIDIA GPU下,FastDeploy 目前支持多种推理后端,下面以 PaddleYOLOv8 为例,跑出多后端在 CPU/GPU 对应 benchmark 数据。
+
+- 下载模型文件和测试图片
+```bash
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar -zxvf yolov8_s_500e_coco.tgz
+```
+
+- 运行 yolov8 benchmark 示例
+
+```bash
+
+# 统计性能
+# CPU
+# Paddle Inference
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime
+
+# ONNX Runtime
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ort --profile_mode runtime
+
+# OpenVINO
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ov --profile_mode runtime
+
+# GPU
+# Paddle Inference
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
+
+# Paddle Inference + TensorRT
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle_trt --profile_mode runtime --warmup 200 --repeat 2000
+
+# Paddle Inference + TensorRT + FP16
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
+
+# ONNX Runtime
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend ort --profile_mode runtime --warmup 200 --repeat 2000
+
+# TensorRT
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
+
+# TensorRT + FP16
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend trt --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
+
+# 统计内存显存占用
+# 增加--collect_memory_info选项
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime --collect_memory_info
+```
+注意,为避免对性能统计产生影响,测试性能时,最好不要开启内存显存统计的功能,当指定--collect_memory_info参数时,只有内存显存参数是稳定可靠的。更多参数设置,请参考[参数设置说明](#参数设置说明)
+
+
+## 4. ARM CPU 环境下运行 Benchmark
+- TODO
+
+## 5. 昆仑芯 XPU 环境下运行 Benchmark
+- TODO
diff --git a/benchmark/cpp/benchmark_ppcls.cc b/benchmark/cpp/benchmark_ppcls.cc
index 734a09a48..b8bfcc989 100755
--- a/benchmark/cpp/benchmark_ppcls.cc
+++ b/benchmark/cpp/benchmark_ppcls.cc
@@ -16,6 +16,9 @@
#include "macros.h"
#include "option.h"
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
@@ -31,9 +34,28 @@ int main(int argc, char* argv[]) {
auto model_file = FLAGS_model + sep + "inference.pdmodel";
auto params_file = FLAGS_model + sep + "inference.pdiparams";
auto config_file = FLAGS_model + sep + "inference_cls.yaml";
- auto model_ppcls = fastdeploy::vision::classification::PaddleClasModel(
+ auto model_ppcls = vision::classification::PaddleClasModel(
model_file, params_file, config_file, option);
- fastdeploy::vision::ClassifyResult res;
+ vision::ClassifyResult res;
+ // Run once at least
+ model_ppcls.Predict(im, &res);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ // Save result to -> disk.
+ std::string cls_result_path = "ppcls_result.txt";
+ benchmark::ResultManager::SaveClassifyResult(res, cls_result_path);
+ // Load result from <- disk.
+ vision::ClassifyResult res_loaded;
+ benchmark::ResultManager::LoadClassifyResult(&res_loaded, cls_result_path);
+ // Calculate diff between two results.
+ auto cls_diff =
+ benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+ std::cout << "Labels diff: mean=" << cls_diff.labels.mean
+ << ", max=" << cls_diff.labels.max
+ << ", min=" << cls_diff.labels.min << std::endl;
+ std::cout << "Scores diff: mean=" << cls_diff.scores.mean
+ << ", max=" << cls_diff.scores.max
+ << ", min=" << cls_diff.scores.min << std::endl;
BENCHMARK_MODEL(model_ppcls, model_ppcls.Predict(im, &res))
#endif
return 0;
diff --git a/benchmark/cpp/benchmark_ppocr.cc b/benchmark/cpp/benchmark_ppocr.cc
deleted file mode 100755
index 398d0feb0..000000000
--- a/benchmark/cpp/benchmark_ppocr.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "flags.h"
-#include "macros.h"
-#include "option.h"
-
-int main(int argc, char* argv[]) {
-#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
- // Initialization
- auto option = fastdeploy::RuntimeOption();
- if (!CreateRuntimeOption(&option, argc, argv, true)) {
- return -1;
- }
- auto im = cv::imread(FLAGS_image);
- auto im_rec = cv::imread(FLAGS_image_rec);
- // Detection Model
- auto det_model_file =
- FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdmodel";
- auto det_params_file =
- FLAGS_model + sep + FLAGS_det_model + sep + "inference.pdiparams";
- // Classification Model
- auto cls_model_file =
- FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdmodel";
- auto cls_params_file =
- FLAGS_model + sep + FLAGS_cls_model + sep + "inference.pdiparams";
- // Recognition Model
- auto rec_model_file =
- FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdmodel";
- auto rec_params_file =
- FLAGS_model + sep + FLAGS_rec_model + sep + "inference.pdiparams";
- auto rec_label_file = FLAGS_rec_label_file;
- if (FLAGS_backend == "paddle_trt") {
- option.paddle_infer_option.collect_trt_shape = true;
- }
- auto det_option = option;
- auto cls_option = option;
- auto rec_option = option;
- if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
- det_option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
- {1, 3, 960, 960});
- cls_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
- {8, 3, 48, 1024});
- rec_option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
- {8, 3, 48, 2304});
- }
- auto det_model = fastdeploy::vision::ocr::DBDetector(
- det_model_file, det_params_file, det_option);
- auto cls_model = fastdeploy::vision::ocr::Classifier(
- cls_model_file, cls_params_file, cls_option);
- auto rec_model = fastdeploy::vision::ocr::Recognizer(
- rec_model_file, rec_params_file, rec_label_file, rec_option);
- // Only for runtime
- if (FLAGS_profile_mode == "runtime") {
- std::vector
> boxes_result;
- std::cout << "====Detection model====" << std::endl;
- BENCHMARK_MODEL(det_model, det_model.Predict(im, &boxes_result));
- int32_t cls_label;
- float cls_score;
- std::cout << "====Classification model====" << std::endl;
- BENCHMARK_MODEL(cls_model,
- cls_model.Predict(im_rec, &cls_label, &cls_score));
- std::string text;
- float rec_score;
- std::cout << "====Recognization model====" << std::endl;
- BENCHMARK_MODEL(rec_model, rec_model.Predict(im_rec, &text, &rec_score));
- }
- auto model_ppocrv3 =
- fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
- fastdeploy::vision::OCRResult res;
- if (FLAGS_profile_mode == "end2end") {
- BENCHMARK_MODEL(model_ppocrv3, model_ppocrv3.Predict(im, &res))
- }
- auto vis_im = fastdeploy::vision::VisOcr(im, res);
- cv::imwrite("vis_result.jpg", vis_im);
- std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-#endif
- return 0;
-}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_cls.cc b/benchmark/cpp/benchmark_ppocr_cls.cc
new file mode 100644
index 000000000..0ddd939bc
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_cls.cc
@@ -0,0 +1,57 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+ // Initialization
+ auto option = fastdeploy::RuntimeOption();
+ if (!CreateRuntimeOption(&option, argc, argv, true)) {
+ return -1;
+ }
+ auto im = cv::imread(FLAGS_image);
+ // Classification Model
+ auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
+ auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
+ if (FLAGS_backend == "paddle_trt") {
+ option.paddle_infer_option.collect_trt_shape = true;
+ }
+ if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+ option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
+ {8, 3, 48, 1024});
+ }
+ auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
+ cls_model_file, cls_params_file, option);
+ int32_t res_label;
+ float res_score;
+ // Run once at least
+ model_ppocr_cls.Predict(im, &res_label, &res_score);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ int32_t res_label_expect = 0;
+ float res_score_expect = 1.0;
+ // Calculate diff between two results.
+ auto ppocr_cls_label_diff = res_label - res_label_expect;
+ auto ppocr_cls_score_diff = res_score - res_score_expect;
+ std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
+ std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
+ << std::endl;
+ BENCHMARK_MODEL(model_ppocr_cls,
+ model_ppocr_cls.Predict(im, &res_label, &res_score));
+#endif
+ return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_det.cc b/benchmark/cpp/benchmark_ppocr_det.cc
new file mode 100644
index 000000000..f98b1c9f3
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_det.cc
@@ -0,0 +1,63 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+ // Initialization
+ auto option = fastdeploy::RuntimeOption();
+ if (!CreateRuntimeOption(&option, argc, argv, true)) {
+ return -1;
+ }
+ auto im = cv::imread(FLAGS_image);
+ // Detection Model
+ auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
+ auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
+ if (FLAGS_backend == "paddle_trt") {
+ option.paddle_infer_option.collect_trt_shape = true;
+ }
+ if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+ option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
+ {1, 3, 960, 960});
+ }
+ auto model_ppocr_det =
+ vision::ocr::DBDetector(det_model_file, det_params_file, option);
+ std::vector> res;
+ // Run once at least
+ model_ppocr_det.Predict(im, &res);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ // Save result to -> disk.
+ std::string ppocr_det_result_path = "ppocr_det_result.txt";
+ benchmark::ResultManager::SaveOCRDetResult(res, ppocr_det_result_path);
+ // Load result from <- disk.
+ std::vector> res_loaded;
+ benchmark::ResultManager::LoadOCRDetResult(&res_loaded,
+ ppocr_det_result_path);
+ // Calculate diff between two results.
+ auto ppocr_det_diff =
+ benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+ std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
+ << ", max=" << ppocr_det_diff.boxes.max
+ << ", min=" << ppocr_det_diff.boxes.min << std::endl;
+ BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
+#endif
+ return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppocr_rec.cc b/benchmark/cpp/benchmark_ppocr_rec.cc
new file mode 100644
index 000000000..71bb6b353
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppocr_rec.cc
@@ -0,0 +1,59 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+ // Initialization
+ auto option = fastdeploy::RuntimeOption();
+ if (!CreateRuntimeOption(&option, argc, argv, true)) {
+ return -1;
+ }
+ auto im = cv::imread(FLAGS_image);
+ // Recognition Model
+ auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
+ auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
+ if (FLAGS_backend == "paddle_trt") {
+ option.paddle_infer_option.collect_trt_shape = true;
+ }
+ if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
+ option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
+ {8, 3, 48, 2304});
+ }
+ auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
+ rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
+ std::string text;
+ float rec_score;
+ // Run once at least
+ model_ppocr_rec.Predict(im, &text, &rec_score);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ std::string text_expect = "上海斯格威铂尔大酒店";
+ float res_score_expect = 0.993308;
+ // Calculate diff between two results.
+ auto ppocr_rec_text_diff = text.compare(text_expect);
+ auto ppocr_rec_score_diff = rec_score - res_score_expect;
+ std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
+ std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
+ << std::endl;
+ BENCHMARK_MODEL(model_ppocr_rec,
+ model_ppocr_rec.Predict(im, &text, &rec_score));
+#endif
+ return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppseg.cc b/benchmark/cpp/benchmark_ppseg.cc
index 23b98b3f5..02968cf2a 100755
--- a/benchmark/cpp/benchmark_ppseg.cc
+++ b/benchmark/cpp/benchmark_ppseg.cc
@@ -16,6 +16,9 @@
#include "macros.h"
#include "option.h"
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
@@ -34,11 +37,33 @@ int main(int argc, char* argv[]) {
option.trt_option.SetShape("x", {1, 3, 192, 192}, {1, 3, 192, 192},
{1, 3, 192, 192});
}
- auto model_ppseg = fastdeploy::vision::segmentation::PaddleSegModel(
+ auto model_ppseg = vision::segmentation::PaddleSegModel(
model_file, params_file, config_file, option);
- fastdeploy::vision::SegmentationResult res;
+ vision::SegmentationResult res;
+ // Run once at least
+ model_ppseg.Predict(im, &res);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ // Save result to -> disk.
+ std::string seg_result_path = "ppseg_result.txt";
+ benchmark::ResultManager::SaveSegmentationResult(res, seg_result_path);
+ // Load result from <- disk.
+ vision::SegmentationResult res_loaded;
+ benchmark::ResultManager::LoadSegmentationResult(&res_loaded,
+ seg_result_path);
+ // Calculate diff between two results.
+ auto seg_diff =
+ benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+ std::cout << "Labels diff: mean=" << seg_diff.labels.mean
+ << ", max=" << seg_diff.labels.max
+ << ", min=" << seg_diff.labels.min << std::endl;
+ if (res_loaded.contain_score_map) {
+ std::cout << "Scores diff: mean=" << seg_diff.scores.mean
+ << ", max=" << seg_diff.scores.max
+ << ", min=" << seg_diff.scores.min << std::endl;
+ }
BENCHMARK_MODEL(model_ppseg, model_ppseg.Predict(im, &res))
- auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
+ auto vis_im = vision::VisSegmentation(im, res, 0.5);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
#endif
diff --git a/benchmark/cpp/benchmark_precision_ppyolov8.cc b/benchmark/cpp/benchmark_precision_ppyolov8.cc
index caea3be19..7792d98c6 100644
--- a/benchmark/cpp/benchmark_precision_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_precision_ppyolov8.cc
@@ -45,12 +45,16 @@ int main(int argc, char* argv[]) {
benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
// Calculate diff between two results.
auto det_diff =
- benchmark::ResultManager::CalculateDiffStatis(&res, &res_loaded);
- std::cout << "diff: mean=" << det_diff.mean << ",max=" << det_diff.max
- << ",min=" << det_diff.min << std::endl;
+ benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+ std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
+ << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
+ << std::endl;
+ std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
+ << ", max=" << det_diff.labels.max
+ << ", min=" << det_diff.labels.min << std::endl;
// 2. Test tensor diff
std::cout << "=============== Test tensor diff =================\n";
- std::vector bacth_res;
+ std::vector batch_res;
std::vector input_tensors, output_tensors;
std::vector imgs;
imgs.push_back(im);
@@ -62,7 +66,7 @@ int main(int argc, char* argv[]) {
input_tensors[2].name = "im_shape";
input_tensors.pop_back();
model_ppyolov8.Infer(input_tensors, &output_tensors);
- model_ppyolov8.GetPostprocessor().Run(output_tensors, &bacth_res);
+ model_ppyolov8.GetPostprocessor().Run(output_tensors, &batch_res);
// Save tensor to -> disk.
auto& tensor_dump = output_tensors[0];
std::string det_tensor_path = "ppyolov8_tensor.txt";
@@ -71,11 +75,11 @@ int main(int argc, char* argv[]) {
fastdeploy::FDTensor tensor_loaded;
benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
// Calculate diff between two tensors.
- auto det_tensor_diff = benchmark::ResultManager::CalculateDiffStatis(
- &tensor_dump, &tensor_loaded);
- std::cout << "diff: mean=" << det_tensor_diff.mean
- << ",max=" << det_tensor_diff.max << ",min=" << det_tensor_diff.min
- << std::endl;
+ auto det_tensor_diff =
+ benchmark::ResultManager::CalculateDiffStatis(tensor_dump, tensor_loaded);
+ std::cout << "Tensor diff: mean=" << det_tensor_diff.data.mean
+ << ", max=" << det_tensor_diff.data.max
+ << ", min=" << det_tensor_diff.data.min << std::endl;
// 3. Run profiling
BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
auto vis_im = vision::VisDetection(im, res);
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
index 07c36e31e..848851de9
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -16,6 +16,9 @@
#include "macros.h"
#include "option.h"
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
@@ -24,11 +27,29 @@ int main(int argc, char* argv[]) {
return -1;
}
auto im = cv::imread(FLAGS_image);
- auto model_yolov5 =
- fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
- fastdeploy::vision::DetectionResult res;
+ auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
+ vision::DetectionResult res;
+ // Run once at least
+ model_yolov5.Predict(im, &res);
+ // 1. Test result diff
+ std::cout << "=============== Test result diff =================\n";
+ // Save result to -> disk.
+ std::string det_result_path = "yolov5_result.txt";
+ benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
+ // Load result from <- disk.
+ vision::DetectionResult res_loaded;
+ benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
+ // Calculate diff between two results.
+ auto det_diff =
+ benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+ std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
+ << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
+ << std::endl;
+ std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
+ << ", max=" << det_diff.labels.max
+ << ", min=" << det_diff.labels.min << std::endl;
BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
- auto vis_im = fastdeploy::vision::VisDetection(im, res);
+ auto vis_im = vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
#endif
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index 4802abe8a..e32e39eab 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -44,12 +44,6 @@ DEFINE_bool(
DEFINE_bool(
collect_memory_info, false, "Whether to collect memory info");
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
-// Only for ppocr
-DEFINE_string(det_model, "", "Path of Detection model of PPOCR.");
-DEFINE_string(cls_model, "", "Path of Classification model of PPOCR.");
-DEFINE_string(rec_model, "", "Path of Recognization model of PPOCR.");
-DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
-DEFINE_string(image_rec, "", "Path of Recognization img file of PPOCR.");
static void PrintUsage() {
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
@@ -63,6 +57,7 @@ static void PrintUsage() {
}
static void PrintBenchmarkInfo() {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Get model name
std::vector model_names;
fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
@@ -97,5 +92,6 @@ static void PrintBenchmarkInfo() {
<< "ms" << std::endl;
}
std::cout << ss.str() << std::endl;
+#endif
return;
}
diff --git a/benchmark/python/README.md b/benchmark/python/README.md
index b1f96c1be..eef8d4fc0 100644
--- a/benchmark/python/README.md
+++ b/benchmark/python/README.md
@@ -2,8 +2,8 @@
在跑benchmark前,需确认以下两个步骤
-* 1. 软硬件环境满足要求,参考[FastDeploy环境要求](..//docs/cn/build_and_install/download_prebuilt_libraries.md)
-* 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../docs/cn/build_and_install/download_prebuilt_libraries.md)
+* 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+* 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../docs/cn/build_and_install/download_prebuilt_libraries.md)
FastDeploy 目前支持多种推理后端,下面以 PaddleClas MobileNetV1 为例,跑出多后端在 CPU/GPU 对应 benchmark 数据
diff --git a/docs/cn/build_and_install/download_prebuilt_libraries.md b/docs/cn/build_and_install/download_prebuilt_libraries.md
index 903b25045..afbc6b2af 100755
--- a/docs/cn/build_and_install/download_prebuilt_libraries.md
+++ b/docs/cn/build_and_install/download_prebuilt_libraries.md
@@ -20,9 +20,11 @@ FastDeploy提供各平台预编译库,供开发者直接下载安装使用。
支持CPU和Nvidia GPU的部署,默认集成Paddle Inference、ONNX Runtime、OpenVINO以及TensorRT推理后端,Vision视觉模型模块,Text文本NLP模型模块
+版本信息:Paddle Inference==2.4-dev5,ONNXRuntime==1.12.0,OpenVINO==2022.2.0.dev20220829,TensorRT==8.5.2.2
+
### Python安装
-Release版本(当前最新1.0.3)安装
+Release版本(当前最新1.0.4)安装
```bash
pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
```
@@ -43,8 +45,8 @@ Release版本
| 平台 | 文件 | 说明 |
| :--- | :--- | :---- |
-| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.3.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 |
-| Windows x64 | [fastdeploy-win-x64-gpu-1.0.3.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.3.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 |
+| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.4.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2编译产出 |
+| Windows x64 | [fastdeploy-win-x64-gpu-1.0.4.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.4.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2编译产出 |
Develop版本(Nightly build)
@@ -63,9 +65,11 @@ Develop版本(Nightly build)
仅支持CPU部署,默认集成Paddle Inference、ONNX Runtime、OpenVINO, Vision视觉模型模块(Linux aarch64和Mac OSX下仅集成ONNX Runtime模块), Text文本NLP模型模块。
+版本信息:Paddle Inference==2.4-dev5,ONNXRuntime==1.12.0,OpenVINO==2022.2.0.dev20220829
+
### Python安装
-Release版本(当前最新1.0.3)安装
+Release版本(当前最新1.0.4)安装
```bash
pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
```
@@ -81,23 +85,23 @@ Release版本
| 平台 | 文件 | 说明 |
| :--- | :--- | :---- |
-| Linux x64 | [fastdeploy-linux-x64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.3.tgz) | g++ 8.2编译产出 |
-| Windows x64 | [fastdeploy-win-x64-1.0.3.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.3.zip) | Visual Studio 16 2019编译产出 |
-| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.3.tgz) | clang++ 10.0.0编译产出|
-| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.3.tgz) | clang++ 13.0.0编译产出 |
-| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.3.tgz) | gcc 6.3编译产出 |
-| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | CV API,NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
-| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | 包含 FastTokenizer、UIE 等 Text API,CV API,NDK 25 及 clang++编译产出, 支持arm64-v8a及armeabi-v7a |
-| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.3-shared.tgz) | 仅包含 FastTokenizer、UIE 等 Text API,NDK 25 及 clang++ 编译产出, 不包含 OpenCV 等 CV API。 支持 arm64-v8a 及 armeabi-v7a |
+| Linux x64 | [fastdeploy-linux-x64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz) | g++ 8.2编译产出 |
+| Windows x64 | [fastdeploy-win-x64-1.0.4.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.4.zip) | Visual Studio 16 2019编译产出 |
+| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.4.tgz) | clang++ 10.0.0编译产出|
+| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.4.tgz) | clang++ 13.0.0编译产出 |
+| Linux aarch64 | [fastdeploy-linux-aarch64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.4.tgz) | gcc 6.3编译产出 |
+| Android armv7&v8 | [fastdeploy-android-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz) | CV API,NDK 25及clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.4-shared.tgz) | 包含 FastTokenizer、UIE 等 Text API,CV API,NDK 25 及 clang++编译产出, 支持arm64-v8a及armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.4-shared.tgz) | 仅包含 FastTokenizer、UIE 等 Text API,NDK 25 及 clang++ 编译产出, 不包含 OpenCV 等 CV API。 支持 arm64-v8a 及 armeabi-v7a |
## Java SDK安装
-Release版本(Java SDK 目前仅支持Android,版本为1.0.3)
+Release版本(Java SDK 目前仅支持Android,版本为1.0.4)
| 平台 | 文件 | 说明 |
| :--- | :--- | :---- |
-| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | CV API,NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
-| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | 包含 FastTokenizer、UIE 等 Text API,CV API,NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-1.0.4.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.4.aar) | CV API,NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.4.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.4.aar) | 包含 FastTokenizer、UIE 等 Text API,CV API,NDK 20 编译产出, minSdkVersion 15, targetSdkVersion 28 |
Develop版本(Nightly build)
diff --git a/docs/cn/build_and_install/jetson.md b/docs/cn/build_and_install/jetson.md
index f2579be5b..7fa221727 100644
--- a/docs/cn/build_and_install/jetson.md
+++ b/docs/cn/build_and_install/jetson.md
@@ -4,6 +4,8 @@
FastDeploy当前在Jetson仅支持ONNX Runtime CPU和TensorRT GPU/Paddle Inference三种后端推理
+- 如若编译过程,出现错误提示`Could not find a package configuration file provided by "Python" with any of the following names: PythonConfig.cmake python-config.cmake`,请尝试将[cmake升级至3.25或最新版本](https://cmake.org/download/)解决。
+
## C++ SDK编译安装
编译需满足
diff --git a/docs/cn/faq/rknpu2/build.md b/docs/cn/faq/rknpu2/build.md
index 3334387b7..bd6a636a8 100644
--- a/docs/cn/faq/rknpu2/build.md
+++ b/docs/cn/faq/rknpu2/build.md
@@ -11,9 +11,11 @@ FastDeploy当前在RK平台上支持后端引擎如下:
## 编译FastDeploy SDK
+针对RK356X和RK3588的性能差异,我们提供了两种编译FastDeploy的方式。
+
### 板端编译FastDeploy C++ SDK
-RKNPU2暂时仅支持linux系统, 以下教程在RK3568(debian 10)、RK3588(debian 11) 环境下完成。
+针对RK3588,其CPU性能较强,板端编译的速度还是可以接受的,我们推荐在板端上进行编译。以下教程在RK356X(debian10),RK3588(debian 11) 环境下完成。
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
@@ -33,6 +35,9 @@ make install
```
### 交叉编译FastDeploy C++ SDK
+
+针对RK356X,其CPU性能较弱,我们推荐使用交叉编译进行编译。以下教程在Ubuntu 22.04环境下完成。
+
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
@@ -54,9 +59,11 @@ make -j8
make install
```
+如果你找不到编译工具,你可以复制[交叉编译工具](https://bj.bcebos.com/paddle2onnx/libs/gcc-linaro-6.3.1-2017.zip)进行下载。
+
### 板端编译Python SDK
-RKNPU2暂时仅支持linux系统, 以下教程在RK3568(debian 10)、RK3588(debian 11) 环境下完成。Python打包依赖`wheel`,编译前请先执行`pip install wheel`
+Python SDK的编译暂时仅支持板端编译, 以下教程在RK3568(debian 10)、RK3588(debian 11) 环境下完成。Python打包依赖`wheel`,编译前请先执行`pip install wheel`
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
@@ -69,8 +76,15 @@ cd python
export ENABLE_ORT_BACKEND=ON
export ENABLE_RKNPU2_BACKEND=ON
export ENABLE_VISION=ON
+
+# 请根据你的开发版的不同,选择RK3588和RK356X
export RKNN2_TARGET_SOC=RK3588
+
+# 如果你的核心板的运行内存大于等于8G,我们建议您执行以下命令进行编译。
python3 setup.py build
+# 值得注意的是,如果你的核心板的运行内存小于8G,我们建议您执行以下命令进行编译。
+python3 setup.py build -j1
+
python3 setup.py bdist_wheel
cd dist
pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl
diff --git a/docs/en/build_and_install/download_prebuilt_libraries.md b/docs/en/build_and_install/download_prebuilt_libraries.md
index 56bc864a0..db9ca1230 100644
--- a/docs/en/build_and_install/download_prebuilt_libraries.md
+++ b/docs/en/build_and_install/download_prebuilt_libraries.md
@@ -21,9 +21,11 @@ This document is divided into two parts:
FastDeploy supports Computer Vision, Text and NLP model deployment on CPU and Nvidia GPU with Paddle Inference, ONNX Runtime, OpenVINO and TensorRT inference backends.
+version information: Paddle Inference==2.4-dev5, ONNXRuntime==1.12.0, OpenVINO==2022.2.0.dev20220829, TensorRT==8.5.2.2
+
### Python SDK
-Install the released version(the newest 1.0.3 for now)
+Install the released version(the newest 1.0.4 for now)
```
pip install fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
@@ -43,12 +45,12 @@ conda config --add channels conda-forge && conda install cudatoolkit=11.2 cudnn=
### C++ SDK
-Install the released version(Latest 1.0.3)
+Install the released version(Latest 1.0.4)
| Platform | File | Description |
|:----------- |:--------------------------------------------------------------------------------------------------------------------- |:--------------------------------------------------------- |
-| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.3.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 |
-| Windows x64 | [fastdeploy-win-x64-gpu-1.0.3.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.3.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
+| Linux x64 | [fastdeploy-linux-x64-gpu-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-1.0.4.tgz) | g++ 8.2, CUDA 11.2, cuDNN 8.2 |
+| Windows x64 | [fastdeploy-win-x64-gpu-1.0.4.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-gpu-1.0.4.zip) | Visual Studio 16 2019, CUDA 11.2, cuDNN 8.2 |
Install the Develop version(Nightly build)
@@ -68,9 +70,11 @@ Install the Develop version(Nightly build)
FastDeploy supports computer vision, text and NLP model deployment on CPU with Paddle Inference, ONNX Runtime, OpenVINO inference backends. It should be noted that under Linux aarch64 and Mac OSX, only the ONNX Runtime is supported for now.
+version information: Paddle Inference==2.4-dev5, ONNXRuntime==1.12.0, OpenVINO==2022.2.0.dev20220829
+
### Python SDK
-Install the released version(Latest 1.0.3 for now)
+Install the released version(Latest 1.0.4 for now)
```
pip install fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
@@ -84,27 +88,27 @@ pip install fastdeploy-python==0.0.0 -f https://www.paddlepaddle.org.cn/whl/fast
### C++ SDK
-Install the released version(Latest 1.0.3 for now, Android is 1.0.3)
+Install the released version(Latest 1.0.4 for now, Android is 1.0.4)
| Platform | File | Description |
|:------------- |:--------------------------------------------------------------------------------------------------------------------- |:------------------------------ |
-| Linux x64 | [fastdeploy-linux-x64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.3.tgz) | g++ 8.2 |
-| Windows x64 | [fastdeploy-win-x64-1.0.3.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.3.zip) | Visual Studio 16 2019 |
-| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.3.tgz) | clang++ 10.0.0|
-| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.3.tgz) | clang++ 13.0.0 |
-| Linux aarch64 | [fastdeploy-osx-arm64-1.0.3.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.3.tgz) | gcc 6.3 |
-| Android armv7&v8 | [fastdeploy-android-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.3-shared.tgz) | CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a |
-| Android armv7&v8 | [fastdeploy-android-with-text-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.3-shared.tgz) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a |
-| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.3-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.3-shared.tgz) | only contains Text API, such as FastTokenizer and UIE, NDK 25, clang++, does not contain CV API, support arm64-v8a and armeabi-v7a |
+| Linux x64 | [fastdeploy-linux-x64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz) | g++ 8.2 |
+| Windows x64 | [fastdeploy-win-x64-1.0.4.zip](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-win-x64-1.0.4.zip) | Visual Studio 16 2019 |
+| Mac OSX x64 | [fastdeploy-osx-x86_64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-x86_64-1.0.4.tgz) | clang++ 10.0.0|
+| Mac OSX arm64 | [fastdeploy-osx-arm64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-osx-arm64-1.0.4.tgz) | clang++ 13.0.0 |
+| Linux aarch64 | [fastdeploy-osx-arm64-1.0.4.tgz](https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-aarch64-1.0.4.tgz) | gcc 6.3 |
+| Android armv7&v8 | [fastdeploy-android-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz) | CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-1.0.4-shared.tgz) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 25, clang++, support arm64-v8a and armeabi-v7a |
+| Android armv7&v8 | [fastdeploy-android-with-text-only-1.0.4-shared.tgz](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-with-text-only-1.0.4-shared.tgz) | only contains Text API, such as FastTokenizer and UIE, NDK 25, clang++, does not contain CV API, support arm64-v8a and armeabi-v7a |
## Java SDK
-Install the released version(Android is 1.0.3 pre-release)
+Install the released version(Android is 1.0.4 pre-release)
| Platform | File | Description |
| :--- | :--- | :---- |
-| Android Java SDK | [fastdeploy-android-sdk-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.3.aar) | CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
-| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.3.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.3.aar) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-1.0.4.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-1.0.4.aar) | CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
+| Android Java SDK | [fastdeploy-android-sdk-with-text-1.0.4.aar](https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-sdk-with-text-1.0.4.aar) | contains Text API, such as FastTokenizer and UIE, CV API, NDK 20, minSdkVersion 15, targetSdkVersion 28 |
Install the Develop version(Nightly build)
diff --git a/docs/en/build_and_install/jetson.md b/docs/en/build_and_install/jetson.md
index ddbb67f22..0826ecc03 100644
--- a/docs/en/build_and_install/jetson.md
+++ b/docs/en/build_and_install/jetson.md
@@ -4,6 +4,8 @@ English | [中文](../../cn/build_and_install/jetson.md)
FastDeploy supports CPU inference with ONNX Runtime and GPU inference with Nvidia TensorRT/Paddle Inference on Nvidia Jetson platform
+- If there's error occurs, shows `Could not find a package configuration file provided by "Python" with any of the following names: PythonConfig.cmake python-config.cmake`, please try to [upgrade cmake to 3.25 or newer version](https://cmake.org/download/) to solve the problem.
+-
## How to Build and Install FastDeploy C++ Library
Prerequisite for Compiling on NVIDIA Jetson:
diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc
old mode 100644
new mode 100755
index 825cb5977..a66bdb6c0
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -298,16 +298,17 @@ bool ResultManager::LoadFDTensor(FDTensor* tensor, const std::string& path) {
return true;
}
-TensorDiff ResultManager::CalculateDiffStatis(FDTensor* lhs, FDTensor* rhs) {
- if (lhs->Numel() != rhs->Numel() || lhs->Dtype() != rhs->Dtype()) {
+TensorDiff ResultManager::CalculateDiffStatis(const FDTensor& lhs,
+ const FDTensor& rhs) {
+ if (lhs.Numel() != rhs.Numel() || lhs.Dtype() != rhs.Dtype()) {
FDASSERT(false,
"The size and dtype of input FDTensor must be equal!"
" But got size %d, %d, dtype %s, %s",
- lhs->Numel(), rhs->Numel(), Str(lhs->Dtype()).c_str(),
- Str(rhs->Dtype()).c_str())
+ lhs.Numel(), rhs.Numel(), Str(lhs.Dtype()).c_str(),
+ Str(rhs.Dtype()).c_str())
}
- FDDataType dtype = lhs->Dtype();
- int numel = lhs->Numel();
+ FDDataType dtype = lhs.Dtype();
+ int numel = lhs.Numel();
if (dtype != FDDataType::FP32 && dtype != FDDataType::INT64 &&
dtype != FDDataType::INT32) {
FDASSERT(false, "Only support FP32/INT64/INT32 now, but got %s",
@@ -315,36 +316,36 @@ TensorDiff ResultManager::CalculateDiffStatis(FDTensor* lhs, FDTensor* rhs) {
}
if (dtype == FDDataType::INT64) {
std::vector tensor_diff(numel);
- const int64_t* lhs_data_ptr = static_cast(lhs->CpuData());
- const int64_t* rhs_data_ptr = static_cast(rhs->CpuData());
+ const int64_t* lhs_data_ptr = static_cast(lhs.CpuData());
+ const int64_t* rhs_data_ptr = static_cast(rhs.CpuData());
for (int i = 0; i < numel; ++i) {
tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
}
TensorDiff diff;
- CalculateStatisInfo(tensor_diff.data(), numel, &(diff.mean),
- &(diff.max), &(diff.min));
+ CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean),
+ &(diff.data.max), &(diff.data.min));
return diff;
} else if (dtype == FDDataType::INT32) {
std::vector tensor_diff(numel);
- const int32_t* lhs_data_ptr = static_cast(lhs->CpuData());
- const int32_t* rhs_data_ptr = static_cast(rhs->CpuData());
+ const int32_t* lhs_data_ptr = static_cast(lhs.CpuData());
+ const int32_t* rhs_data_ptr = static_cast(rhs.CpuData());
for (int i = 0; i < numel; ++i) {
tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
}
TensorDiff diff;
- CalculateStatisInfo(tensor_diff.data(), numel, &(diff.mean),
- &(diff.max), &(diff.min));
+ CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean),
+ &(diff.data.max), &(diff.data.min));
return diff;
} else { // FP32
std::vector tensor_diff(numel);
- const float* lhs_data_ptr = static_cast(lhs->CpuData());
- const float* rhs_data_ptr = static_cast(rhs->CpuData());
+ const float* lhs_data_ptr = static_cast(lhs.CpuData());
+ const float* rhs_data_ptr = static_cast(rhs.CpuData());
for (int i = 0; i < numel; ++i) {
tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
}
TensorDiff diff;
- CalculateStatisInfo(tensor_diff.data(), numel, &(diff.mean),
- &(diff.max), &(diff.min));
+ CalculateStatisInfo(tensor_diff.data(), numel, &(diff.data.mean),
+ &(diff.data.max), &(diff.data.min));
return diff;
}
}
@@ -399,6 +400,108 @@ bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
return true;
}
+bool ResultManager::SaveClassifyResult(const vision::ClassifyResult& res,
+ const std::string& path) {
+ if (res.label_ids.empty()) {
+ FDERROR << "ClassifyResult can not be empty!" << std::endl;
+ return false;
+ }
+ std::ofstream fs(path, std::ios::out);
+ if (!fs.is_open()) {
+ FDERROR << "Fail to open file:" << path << std::endl;
+ return false;
+ }
+ fs.precision(20);
+ // label_ids
+ fs << "label_ids" << KEY_VALUE_SEP;
+ for (int i = 0; i < res.label_ids.size(); ++i) {
+ if (i < res.label_ids.size() - 1) {
+ fs << res.label_ids[i] << VALUE_SEP;
+ } else {
+ fs << res.label_ids[i];
+ }
+ }
+ fs << "\n";
+ // scores
+ fs << "scores" << KEY_VALUE_SEP;
+ for (int i = 0; i < res.scores.size(); ++i) {
+ if (i < res.scores.size() - 1) {
+ fs << res.scores[i] << VALUE_SEP;
+ } else {
+ fs << res.scores[i];
+ }
+ }
+ fs << "\n";
+ fs.close();
+ return true;
+}
+
+bool ResultManager::SaveSegmentationResult(
+ const vision::SegmentationResult& res, const std::string& path) {
+ if (res.label_map.empty()) {
+ FDERROR << "SegmentationResult can not be empty!" << std::endl;
+ return false;
+ }
+ std::ofstream fs(path, std::ios::out);
+ if (!fs.is_open()) {
+ FDERROR << "Fail to open file:" << path << std::endl;
+ return false;
+ }
+ fs.precision(20);
+ // label_map
+ fs << "label_map" << KEY_VALUE_SEP;
+ for (int i = 0; i < res.label_map.size(); ++i) {
+ if (i < res.label_map.size() - 1) {
+ fs << static_cast(res.label_map[i]) << VALUE_SEP;
+ } else {
+ fs << static_cast(res.label_map[i]);
+ }
+ }
+ fs << "\n";
+ // score_map
+ if (res.contain_score_map) {
+ fs << "score_map" << KEY_VALUE_SEP;
+ for (int i = 0; i < res.score_map.size(); ++i) {
+ if (i < res.score_map.size() - 1) {
+ fs << res.score_map[i] << VALUE_SEP;
+ } else {
+ fs << res.score_map[i];
+ }
+ }
+ fs << "\n";
+ }
+ fs.close();
+ return true;
+}
+
+bool ResultManager::SaveOCRDetResult(const std::vector>& res,
+ const std::string& path) {
+ if (res.empty()) {
+ FDERROR << "OCRDetResult can not be empty!" << std::endl;
+ return false;
+ }
+ std::ofstream fs(path, std::ios::out);
+ if (!fs.is_open()) {
+ FDERROR << "Fail to open file:" << path << std::endl;
+ return false;
+ }
+ fs.precision(20);
+ // boxes
+ fs << "boxes" << KEY_VALUE_SEP;
+ for (int i = 0; i < res.size(); ++i) {
+ for (int j = 0; j < 8; ++j) {
+ if ((i == res.size() - 1) && (j == 7)) {
+ fs << res[i][j];
+ } else {
+ fs << res[i][j] << VALUE_SEP;
+ }
+ }
+ }
+ fs << "\n";
+ fs.close();
+ return true;
+}
+
bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
const std::string& path) {
if (!CheckFileExists(path)) {
@@ -432,32 +535,104 @@ bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
return true;
}
-DetectionDiff ResultManager::CalculateDiffStatis(vision::DetectionResult* lhs,
- vision::DetectionResult* rhs,
- float score_threshold) {
+bool ResultManager::LoadClassifyResult(vision::ClassifyResult* res,
+ const std::string& path) {
+ if (!CheckFileExists(path)) {
+ FDERROR << "Can't found file from" << path << std::endl;
+ return false;
+ }
+ auto lines = ReadLines(path);
+ std::map> data;
+ // label_ids
+ data = SplitDataLine(lines[0]);
+ res->Resize(data.begin()->second.size());
+ for (int i = 0; i < data.begin()->second.size(); ++i) {
+ res->label_ids[i] = std::stoi(data.begin()->second[i]);
+ }
+ // scores
+ data = SplitDataLine(lines[1]);
+ for (int i = 0; i < data.begin()->second.size(); ++i) {
+ res->scores[i] = std::stof(data.begin()->second[i]);
+ }
+ return true;
+}
+
+bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
+ const std::string& path) {
+ if (!CheckFileExists(path)) {
+ FDERROR << "Can't found file from" << path << std::endl;
+ return false;
+ }
+ auto lines = ReadLines(path);
+ if (lines.size() > 1) {
+ res->contain_score_map = true;
+ }
+ std::map> data;
+ // label_map
+ data = SplitDataLine(lines[0]);
+ res->Resize(data.begin()->second.size());
+ for (int i = 0; i < data.begin()->second.size(); ++i) {
+ res->label_map[i] = std::stoi(data.begin()->second[i]);
+ }
+ // score_map
+ if (lines.size() > 1) {
+ data = SplitDataLine(lines[1]);
+ for (int i = 0; i < data.begin()->second.size(); ++i) {
+ res->score_map[i] = std::stof(data.begin()->second[i]);
+ }
+ }
+ return true;
+}
+
+bool ResultManager::LoadOCRDetResult(std::vector>* res,
+ const std::string& path) {
+ if (!CheckFileExists(path)) {
+ FDERROR << "Can't found file from" << path << std::endl;
+ return false;
+ }
+ auto lines = ReadLines(path);
+ std::map> data;
+ // boxes
+ data = SplitDataLine(lines[0]);
+ int boxes_num = data.begin()->second.size() / 8;
+ res->resize(boxes_num);
+ for (int i = 0; i < boxes_num; ++i) {
+ for (int j = 0; j < 8; ++j) {
+ (*res)[i][j] = std::stoi(data.begin()->second[i * 8 + j]);
+ }
+ }
+ return true;
+}
+
+DetectionDiff ResultManager::CalculateDiffStatis(
+ const vision::DetectionResult& lhs, const vision::DetectionResult& rhs,
+ const float& score_threshold) {
+ vision::DetectionResult lhs_sort = lhs;
+ vision::DetectionResult rhs_sort = rhs;
// lex sort by x(w) & y(h)
- vision::utils::LexSortDetectionResultByXY(lhs);
- vision::utils::LexSortDetectionResultByXY(rhs);
+ vision::utils::LexSortDetectionResultByXY(&lhs_sort);
+ vision::utils::LexSortDetectionResultByXY(&rhs_sort);
// get value diff & trunc it by score_threshold
- const int boxes_num = std::min(lhs->boxes.size(), rhs->boxes.size());
+ const int boxes_num = std::min(lhs_sort.boxes.size(), rhs_sort.boxes.size());
std::vector boxes_diff;
std::vector scores_diff;
std::vector labels_diff;
// TODO(qiuyanjun): process the diff of masks.
for (int i = 0; i < boxes_num; ++i) {
- if (lhs->scores[i] > score_threshold && rhs->scores[i] > score_threshold) {
- scores_diff.push_back(lhs->scores[i] - rhs->scores[i]);
- labels_diff.push_back(lhs->label_ids[i] - rhs->label_ids[i]);
- boxes_diff.push_back(lhs->boxes[i][0] - rhs->boxes[i][0]);
- boxes_diff.push_back(lhs->boxes[i][1] - rhs->boxes[i][1]);
- boxes_diff.push_back(lhs->boxes[i][2] - rhs->boxes[i][2]);
- boxes_diff.push_back(lhs->boxes[i][3] - rhs->boxes[i][3]);
+ if (lhs_sort.scores[i] > score_threshold &&
+ rhs_sort.scores[i] > score_threshold) {
+ scores_diff.push_back(lhs_sort.scores[i] - rhs_sort.scores[i]);
+ labels_diff.push_back(lhs_sort.label_ids[i] - rhs_sort.label_ids[i]);
+ boxes_diff.push_back(lhs_sort.boxes[i][0] - rhs_sort.boxes[i][0]);
+ boxes_diff.push_back(lhs_sort.boxes[i][1] - rhs_sort.boxes[i][1]);
+ boxes_diff.push_back(lhs_sort.boxes[i][2] - rhs_sort.boxes[i][2]);
+ boxes_diff.push_back(lhs_sort.boxes[i][3] - rhs_sort.boxes[i][3]);
}
}
FDASSERT(boxes_diff.size() > 0,
"Can't get any valid boxes while score_threshold is %f, "
"The boxes.size of lhs is %d, the boxes.size of rhs is %d",
- score_threshold, lhs->boxes.size(), rhs->boxes.size())
+ score_threshold, lhs_sort.boxes.size(), rhs_sort.boxes.size())
DetectionDiff diff;
CalculateStatisInfo(boxes_diff.data(), boxes_diff.size(),
@@ -469,11 +644,78 @@ DetectionDiff ResultManager::CalculateDiffStatis(vision::DetectionResult* lhs,
CalculateStatisInfo(labels_diff.data(), labels_diff.size(),
&(diff.labels.mean), &(diff.labels.max),
&(diff.labels.min));
- diff.mean = diff.boxes.mean;
- diff.max = diff.boxes.max;
- diff.min = diff.boxes.min;
return diff;
}
+
+ClassifyDiff ResultManager::CalculateDiffStatis(
+ const vision::ClassifyResult& lhs, const vision::ClassifyResult& rhs) {
+ const int class_nums = std::min(lhs.label_ids.size(), rhs.label_ids.size());
+ std::vector scores_diff;
+ std::vector labels_diff;
+ for (int i = 0; i < class_nums; ++i) {
+ scores_diff.push_back(lhs.scores[i] - rhs.scores[i]);
+ labels_diff.push_back(lhs.label_ids[i] - rhs.label_ids[i]);
+ }
+
+ ClassifyDiff diff;
+ CalculateStatisInfo(scores_diff.data(), scores_diff.size(),
+ &(diff.scores.mean), &(diff.scores.max),
+ &(diff.scores.min));
+ CalculateStatisInfo(labels_diff.data(), labels_diff.size(),
+ &(diff.labels.mean), &(diff.labels.max),
+ &(diff.labels.min));
+ return diff;
+}
+
+SegmentationDiff ResultManager::CalculateDiffStatis(
+ const vision::SegmentationResult& lhs,
+ const vision::SegmentationResult& rhs) {
+ const int pixel_nums = std::min(lhs.label_map.size(), rhs.label_map.size());
+ std::vector labels_diff;
+ std::vector scores_diff;
+ for (int i = 0; i < pixel_nums; ++i) {
+ labels_diff.push_back(lhs.label_map[i] - rhs.label_map[i]);
+ if (lhs.contain_score_map && rhs.contain_score_map) {
+ scores_diff.push_back(lhs.score_map[i] - rhs.score_map[i]);
+ }
+ }
+ SegmentationDiff diff;
+ CalculateStatisInfo(labels_diff.data(), labels_diff.size(),
+ &(diff.labels.mean), &(diff.labels.max),
+ &(diff.labels.min));
+ if (lhs.contain_score_map && rhs.contain_score_map) {
+ CalculateStatisInfo(scores_diff.data(), scores_diff.size(),
+ &(diff.scores.mean), &(diff.scores.max),
+ &(diff.scores.min));
+ }
+ return diff;
+}
+
+OCRDetDiff ResultManager::CalculateDiffStatis(
+ const std::vector>& lhs,
+ const std::vector>& rhs) {
+ const int boxes_nums = std::min(lhs.size(), rhs.size());
+ std::vector> lhs_sort = lhs;
+ std::vector> rhs_sort = rhs;
+ // lex sort by x(w) & y(h)
+ vision::utils::LexSortOCRDetResultByXY(&lhs_sort);
+ vision::utils::LexSortOCRDetResultByXY(&rhs_sort);
+ // get value diff
+ const int boxes_num = std::min(lhs_sort.size(), rhs_sort.size());
+ std::vector boxes_diff;
+ for (int i = 0; i < boxes_num; ++i) {
+ for (int j = 0; j < 8; ++j) {
+ boxes_diff.push_back(lhs_sort[i][j] - rhs_sort[i][j]);
+ }
+ }
+
+ OCRDetDiff diff;
+ CalculateStatisInfo(boxes_diff.data(), boxes_diff.size(),
+ &(diff.boxes.mean), &(diff.boxes.max),
+ &(diff.boxes.min));
+ return diff;
+}
+
#endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK
diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h
index fc7835745..2ad0ae4aa 100755
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -101,14 +101,31 @@ struct FASTDEPLOY_DECL EvalStatis {
double max = -1.0;
};
-struct FASTDEPLOY_DECL TensorDiff: public BaseDiff, public EvalStatis {};
+struct FASTDEPLOY_DECL TensorDiff: public BaseDiff {
+ EvalStatis data;
+};
#if defined(ENABLE_VISION)
-struct FASTDEPLOY_DECL DetectionDiff: public BaseDiff, public EvalStatis {
+struct FASTDEPLOY_DECL DetectionDiff: public BaseDiff {
EvalStatis boxes;
EvalStatis scores;
EvalStatis labels;
};
+
+struct FASTDEPLOY_DECL ClassifyDiff: public BaseDiff {
+ EvalStatis scores;
+ EvalStatis labels;
+};
+
+struct FASTDEPLOY_DECL SegmentationDiff: public BaseDiff {
+ EvalStatis scores;
+ EvalStatis labels;
+};
+
+struct FASTDEPLOY_DECL OCRDetDiff: public BaseDiff {
+ EvalStatis boxes;
+};
+
#endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK
@@ -119,18 +136,38 @@ struct FASTDEPLOY_DECL ResultManager {
static bool SaveFDTensor(const FDTensor& tensor, const std::string& path);
static bool LoadFDTensor(FDTensor* tensor, const std::string& path);
/// Calculate diff value between two FDTensor results.
- static TensorDiff CalculateDiffStatis(FDTensor* lhs,
- FDTensor* rhs);
+ static TensorDiff CalculateDiffStatis(const FDTensor& lhs,
+ const FDTensor& rhs);
#if defined(ENABLE_VISION)
/// Save & Load functions for basic results.
static bool SaveDetectionResult(const vision::DetectionResult& res,
const std::string& path);
static bool LoadDetectionResult(vision::DetectionResult* res,
const std::string& path);
+ static bool SaveClassifyResult(const vision::ClassifyResult& res,
+ const std::string& path);
+ static bool LoadClassifyResult(vision::ClassifyResult* res,
+ const std::string& path);
+ static bool SaveSegmentationResult(const vision::SegmentationResult& res,
+ const std::string& path);
+ static bool LoadSegmentationResult(vision::SegmentationResult* res,
+ const std::string& path);
+ static bool SaveOCRDetResult(const std::vector>& res,
+ const std::string& path);
+ static bool LoadOCRDetResult(std::vector>* res,
+ const std::string& path);
/// Calculate diff value between two basic results.
- static DetectionDiff CalculateDiffStatis(vision::DetectionResult* lhs,
- vision::DetectionResult* rhs,
- float score_threshold = 0.3f);
+ static DetectionDiff CalculateDiffStatis(const vision::DetectionResult& lhs,
+ const vision::DetectionResult& rhs,
+ const float& score_threshold = 0.3f);
+ static ClassifyDiff CalculateDiffStatis(const vision::ClassifyResult& lhs,
+ const vision::ClassifyResult& rhs);
+ static SegmentationDiff CalculateDiffStatis(
+ const vision::SegmentationResult& lhs,
+ const vision::SegmentationResult& rhs);
+ static OCRDetDiff CalculateDiffStatis(
+ const std::vector>& lhs,
+ const std::vector>& rhs);
#endif // ENABLE_VISION
#endif // ENABLE_BENCHMARK
};
diff --git a/fastdeploy/function/gather_scatter_along_axis.h b/fastdeploy/function/gather_scatter_along_axis.h
index bd1093af1..fa627a411 100644
--- a/fastdeploy/function/gather_scatter_along_axis.h
+++ b/fastdeploy/function/gather_scatter_along_axis.h
@@ -26,7 +26,8 @@ namespace function {
@param out The output tensor which stores the result.
@param axis Axis which will be gathered.
*/
-void GatherAlongAxis(const FDTensor& x, const FDTensor& index, FDTensor* result,
+FASTDEPLOY_DECL void GatherAlongAxis(const FDTensor& x,
+ const FDTensor& index, FDTensor* result,
int axis);
} // namespace function
diff --git a/fastdeploy/pybind/main.cc.in b/fastdeploy/pybind/main.cc.in
index 5da3ef9fc..5e11ee808 100755
--- a/fastdeploy/pybind/main.cc.in
+++ b/fastdeploy/pybind/main.cc.in
@@ -156,6 +156,8 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
"Make programer easier to deploy deeplearning model, save time to save "
"the world!";
+ m.def("set_logger", &SetLogger);
+
BindFDTensor(m);
BindRuntime(m);
BindFDModel(m);
diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h
index fd2975cc9..652b72401 100755
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -67,10 +67,12 @@ struct PaddleBackendOption {
/// initialize memory size(MB) for GPU
int gpu_mem_init_size = 100;
+ /// Disable type of operators run on TensorRT
void DisableTrtOps(const std::vector& ops) {
trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
}
+ /// Delete pass by name
void DeletePass(const std::string& pass_name) {
delete_pass_names.push_back(pass_name);
}
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index dc804e926..1fc45e990 100755
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -98,6 +98,33 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
}
}
+bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
+ if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
+ return false;
+ }
+
+ auto option = runtime_option;
+ option.paddle_infer_option.model_file = runtime_option.model_file;
+ option.paddle_infer_option.params_file = runtime_option.params_file;
+ option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
+ option.paddle_infer_option.device = runtime_option.device;
+ option.paddle_infer_option.device_id = runtime_option.device_id;
+ option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
+ option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
+ option.paddle_infer_option.trt_option = runtime_option.trt_option;
+ option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
+ if (option.model_from_memory_) {
+ return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
+ } else {
+ std::string model_buffer = "";
+ std::string params_buffer = "";
+ FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
+ FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
+ return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
+ }
+ return false;
+}
+
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const PaddleBackendOption& option) {
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.h b/fastdeploy/runtime/backends/paddle/paddle_backend.h
index 60079fed6..f662ca2b6 100755
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -54,12 +54,7 @@ class PaddleBackend : public BaseBackend {
public:
PaddleBackend() {}
virtual ~PaddleBackend() = default;
- void BuildOption(const PaddleBackendOption& option);
-
- bool InitFromPaddle(const std::string& model_buffer,
- const std::string& params_buffer,
- const PaddleBackendOption& option = PaddleBackendOption());
-
+ bool Init(const RuntimeOption& option);
bool Infer(std::vector& inputs, std::vector* outputs,
bool copy_to_fd = true) override;
@@ -77,6 +72,12 @@ class PaddleBackend : public BaseBackend {
std::vector GetOutputInfos() override;
private:
+ void BuildOption(const PaddleBackendOption& option);
+
+ bool InitFromPaddle(const std::string& model_buffer,
+ const std::string& params_buffer,
+ const PaddleBackendOption& option = PaddleBackendOption());
+
void
CollectShapeRun(paddle_infer::Predictor* predictor,
const std::map>& shape) const;
diff --git a/fastdeploy/runtime/backends/tensorrt/option.h b/fastdeploy/runtime/backends/tensorrt/option.h
index 5cee0a7e3..ff28e3e3b 100755
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
@@ -30,6 +30,9 @@ struct TrtBackendOption {
/// `max_workspace_size` for TensorRT
size_t max_workspace_size = 1 << 30;
+ /// Enable log while converting onnx model to tensorrt
+ bool enable_log_info = false;
+
/*
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
*/
diff --git a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
index d781256a5..f46f27f95 100644
--- a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
+++ b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
@@ -21,6 +21,7 @@ void BindTrtOption(pybind11::module& m) {
pybind11::class_(m, "TrtBackendOption")
.def(pybind11::init())
.def_readwrite("enable_fp16", &TrtBackendOption::enable_fp16)
+ .def_readwrite("enable_log_info", &TrtBackendOption::enable_log_info)
.def_readwrite("max_batch_size", &TrtBackendOption::max_batch_size)
.def_readwrite("max_workspace_size",
&TrtBackendOption::max_workspace_size)
diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
index 74bd3ae4f..99ccbe4c7 100644
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -114,6 +114,13 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
}
bool TrtBackend::Init(const RuntimeOption& runtime_option) {
+ auto trt_option = runtime_option.trt_option;
+ trt_option.model_file = runtime_option.model_file;
+ trt_option.params_file = runtime_option.params_file;
+ trt_option.model_format = runtime_option.model_format;
+ trt_option.gpu_id = runtime_option.device_id;
+ trt_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
+ trt_option.external_stream_ = runtime_option.external_stream_;
if (runtime_option.device != Device::GPU) {
FDERROR << "TrtBackend only supports Device::GPU, but now it's "
<< runtime_option.device << "." << std::endl;
@@ -130,7 +137,7 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
if (runtime_option.model_from_memory_) {
return InitFromPaddle(runtime_option.model_file,
runtime_option.params_file,
- runtime_option.trt_option);
+ trt_option);
} else {
std::string model_buffer;
std::string params_buffer;
@@ -141,17 +148,17 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
"Failed to read parameters file %s.",
runtime_option.params_file.c_str());
return InitFromPaddle(model_buffer, params_buffer,
- runtime_option.trt_option);
+ trt_option);
}
} else {
if (runtime_option.model_from_memory_) {
- return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option);
+ return InitFromOnnx(runtime_option.model_file, trt_option);
} else {
std::string model_buffer;
FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
"Failed to read model file %s.",
runtime_option.model_file.c_str());
- return InitFromOnnx(model_buffer, runtime_option.trt_option);
+ return InitFromOnnx(model_buffer, trt_option);
}
}
return true;
@@ -525,6 +532,9 @@ void TrtBackend::AllocateOutputsBuffer(std::vector* outputs,
}
bool TrtBackend::BuildTrtEngine() {
+ if (option_.enable_log_info) {
+ FDTrtLogger::Get()->SetLog(true, true);
+ }
auto config =
FDUniquePtr(builder_->createBuilderConfig());
if (!config) {
diff --git a/fastdeploy/runtime/backends/tensorrt/utils.h b/fastdeploy/runtime/backends/tensorrt/utils.h
old mode 100644
new mode 100755
index 3d4c11f31..91402b305
--- a/fastdeploy/runtime/backends/tensorrt/utils.h
+++ b/fastdeploy/runtime/backends/tensorrt/utils.h
@@ -220,20 +220,31 @@ class FDTrtLogger : public nvinfer1::ILogger {
logger = new FDTrtLogger();
return logger;
}
+ void SetLog(bool enable_info = false, bool enable_warning = false) {
+ enable_info_ = enable_info;
+ enable_warning_ = enable_warning;
+ }
+
void log(nvinfer1::ILogger::Severity severity,
const char* msg) noexcept override {
if (severity == nvinfer1::ILogger::Severity::kINFO) {
- // Disable this log
- // FDINFO << msg << std::endl;
+ if (enable_info_) {
+ FDINFO << msg << std::endl;
+ }
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
- // Disable this log
- // FDWARNING << msg << std::endl;
+ if (enable_warning_) {
+ FDWARNING << msg << std::endl;
+ }
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
FDERROR << msg << std::endl;
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
FDASSERT(false, "%s", msg);
}
}
+
+ private:
+ bool enable_info_ = false;
+ bool enable_warning_ = false;
};
struct ShapeRangeInfo {
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index 0e6eecf32..df000c9ac 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -154,19 +154,10 @@ bool Runtime::Init(const RuntimeOption& _option) {
} else if (option.backend == Backend::SOPHGOTPU) {
CreateSophgoNPUBackend();
} else if (option.backend == Backend::POROS) {
- FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
- "Backend::POROS only supports Device::CPU/Device::GPU.");
- FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
- "Backend::POROS only supports model format of "
- "ModelFormat::TORCHSCRIPT.");
- FDINFO << "Runtime initialized with Backend::POROS in " << option.device
- << "." << std::endl;
- return true;
+ CreatePorosBackend();
} else {
- FDERROR << "Runtime only support "
- "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
- "backend now."
- << std::endl;
+ std::string msg = Str(GetAvailableBackends());
+ FDERROR << "The compiled FastDeploy only supports " << msg << ", " << option.backend << " is not supported now." << std::endl;
return false;
}
backend_->benchmark_option_ = option.benchmark_option;
@@ -264,43 +255,9 @@ void Runtime::ReleaseModelMemoryBuffer() {
}
void Runtime::CreatePaddleBackend() {
- FDASSERT(
- option.device == Device::CPU || option.device == Device::GPU ||
- option.device == Device::IPU,
- "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
- FDASSERT(
- option.model_format == ModelFormat::PADDLE,
- "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
#ifdef ENABLE_PADDLE_BACKEND
- option.paddle_infer_option.model_file = option.model_file;
- option.paddle_infer_option.params_file = option.params_file;
- option.paddle_infer_option.model_from_memory_ = option.model_from_memory_;
- option.paddle_infer_option.device = option.device;
- option.paddle_infer_option.device_id = option.device_id;
- option.paddle_infer_option.enable_pinned_memory = option.enable_pinned_memory;
- option.paddle_infer_option.external_stream_ = option.external_stream_;
- option.paddle_infer_option.trt_option = option.trt_option;
- option.paddle_infer_option.trt_option.gpu_id = option.device_id;
backend_ = utils::make_unique();
- auto casted_backend = dynamic_cast(backend_.get());
-
- if (option.model_from_memory_) {
- FDASSERT(
- casted_backend->InitFromPaddle(option.model_file, option.params_file,
- option.paddle_infer_option),
- "Load model from Paddle failed while initliazing PaddleBackend.");
- ReleaseModelMemoryBuffer();
- } else {
- std::string model_buffer = "";
- std::string params_buffer = "";
- FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
- "Fail to read binary from model file");
- FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
- "Fail to read binary from parameter file");
- FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
- option.paddle_infer_option),
- "Load model from Paddle failed while initliazing PaddleBackend.");
- }
+ FDASSERT(backend_->Init(option), "Failed to initialized Paddle Inference backend.");
#else
FDASSERT(false,
"PaddleBackend is not available, please compiled with "
@@ -339,12 +296,6 @@ void Runtime::CreateOrtBackend() {
void Runtime::CreateTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
- option.trt_option.model_file = option.model_file;
- option.trt_option.params_file = option.params_file;
- option.trt_option.model_format = option.model_format;
- option.trt_option.gpu_id = option.device_id;
- option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
- option.trt_option.external_stream_ = option.external_stream_;
backend_ = utils::make_unique();
FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
#else
diff --git a/fastdeploy/utils/utils.cc b/fastdeploy/utils/utils.cc
index 760c10406..c39b6adab 100644
--- a/fastdeploy/utils/utils.cc
+++ b/fastdeploy/utils/utils.cc
@@ -13,18 +13,27 @@
// limitations under the License.
#include "fastdeploy/utils/utils.h"
+
#include
namespace fastdeploy {
+bool FDLogger::enable_info = true;
+bool FDLogger::enable_warning = true;
+
+void SetLogger(bool enable_info, bool enable_warning) {
+ FDLogger::enable_info = enable_info;
+ FDLogger::enable_warning = enable_warning;
+}
+
FDLogger::FDLogger(bool verbose, const std::string& prefix) {
verbose_ = verbose;
line_ = "";
-#ifdef __ANDROID__
+#ifdef __ANDROID__
prefix_ = std::string("[FastDeploy]") + prefix;
#else
prefix_ = prefix;
-#endif
+#endif
}
FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) {
diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h
index d44b7f187..cfc666bb2 100644
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h
@@ -43,6 +43,9 @@ namespace fastdeploy {
class FASTDEPLOY_DECL FDLogger {
public:
+ static bool enable_info;
+ static bool enable_warning;
+
FDLogger() {
line_ = "";
prefix_ = "[FastDeploy]";
@@ -90,11 +93,12 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
<< __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#define FDWARNING \
- FDLogger(true, "[WARNING]") \
+ FDLogger(fastdeploy::FDLogger::enable_warning, "[WARNING]") \
<< __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
#define FDINFO \
- FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
+ FDLogger(fastdeploy::FDLogger::enable_info, "[INFO]") \
+ << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#define FDASSERT(condition, format, ...) \
@@ -214,6 +218,10 @@ std::string Str(const std::vector& shape) {
return oss.str();
}
+/// Set behaviour of logging while using FastDeploy
+FASTDEPLOY_DECL void SetLogger(bool enable_info = true,
+ bool enable_warning = true);
+
template
void CalculateStatisInfo(const void* src_ptr, int size, double* mean,
double* max, double* min) {
diff --git a/fastdeploy/vision/classification/contrib/resnet.cc b/fastdeploy/vision/classification/contrib/resnet.cc
index 2eed67992..fffbeada6 100644
--- a/fastdeploy/vision/classification/contrib/resnet.cc
+++ b/fastdeploy/vision/classification/contrib/resnet.cc
@@ -13,23 +13,22 @@
// limitations under the License.
#include "fastdeploy/vision/classification/contrib/resnet.h"
-#include "fastdeploy/vision/utils/utils.h"
#include "fastdeploy/utils/perf.h"
+#include "fastdeploy/vision/utils/utils.h"
namespace fastdeploy {
namespace vision {
namespace classification {
-ResNet::ResNet(const std::string& model_file,
- const std::string& params_file,
+ResNet::ResNet(const std::string& model_file, const std::string& params_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
// In constructor, the 3 steps below are necessary.
// 1. set the Backend 2. set RuntimeOption 3. call Initialize()
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER};
@@ -42,7 +41,6 @@ ResNet::ResNet(const std::string& model_file,
}
bool ResNet::Initialize() {
-
// In this function, the 3 steps below are necessary.
// 1. assign values to the global variables 2. call InitRuntime()
@@ -57,14 +55,15 @@ bool ResNet::Initialize() {
return true;
}
-
bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
+ // In this function, the preprocess need be implemented according to the
+ // original Repos,
+ // The result of preprocess has to be saved in FDTensor variable, because the
+ // input of Infer() need to be std::vector.
+ // 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into
+ // FDTensor variable.
-// In this function, the preprocess need be implemented according to the original Repos,
-// The result of preprocess has to be saved in FDTensor variable, because the input of Infer() need to be std::vector.
-// 1. Resize 2. BGR2RGB 3. Normalize 4. HWC2CHW 5. Put the result into FDTensor variable.
-
- if (mat->Height()!=size[0] || mat->Width()!=size[1]){
+ if (mat->Height() != size[0] || mat->Width() != size[1]) {
int interp = cv::INTER_LINEAR;
Resize::Run(mat, size[1], size[0], -1, -1, interp);
}
@@ -75,20 +74,23 @@ bool ResNet::Preprocess(Mat* mat, FDTensor* output) {
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
-bool ResNet::Postprocess(FDTensor& infer_result,
- ClassifyResult* result, int topk) {
-
- // In this function, the postprocess need be implemented according to the original Repos,
- // Finally the reslut of postprocess should be saved in ClassifyResult variable.
- // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult variable.
+bool ResNet::Postprocess(FDTensor& infer_result, ClassifyResult* result,
+ int topk) {
+ // In this function, the postprocess need be implemented according to the
+ // original Repos,
+ // Finally the reslut of postprocess should be saved in ClassifyResult
+ // variable.
+ // 1. Softmax 2. Choose topk labels 3. Put the result into ClassifyResult
+ // variable.
int num_classes = infer_result.shape[1];
function::Softmax(infer_result, &infer_result);
- const float* infer_result_buffer = reinterpret_cast(infer_result.Data());
+ const float* infer_result_buffer =
+ reinterpret_cast(infer_result.Data());
topk = std::min(num_classes, topk);
result->label_ids =
utils::TopKIndices(infer_result_buffer, num_classes, topk);
@@ -100,8 +102,8 @@ bool ResNet::Postprocess(FDTensor& infer_result,
}
bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
-
- // In this function, the Preprocess(), Infer(), and Postprocess() are called sequentially.
+ // In this function, the Preprocess(), Infer(), and Postprocess() are called
+ // sequentially.
Mat mat(*im);
std::vector processed_data(1);
@@ -128,7 +130,6 @@ bool ResNet::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
return true;
}
-
} // namespace classification
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
index e252ba0ee..35b3e17bb 100644
--- a/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
+++ b/fastdeploy/vision/classification/contrib/yolov5cls/preprocessor.cc
@@ -20,18 +20,19 @@ namespace vision {
namespace classification {
YOLOv5ClsPreprocessor::YOLOv5ClsPreprocessor() {
- size_ = {224, 224}; //{h,w}
+ size_ = {224, 224}; //{h,w}
}
-bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
- std::map>* im_info) {
+bool YOLOv5ClsPreprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
// process after image load
double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()),
- static_cast(mat->Width()));
+ static_cast(mat->Width()));
// yolov5cls's preprocess steps
// 1. CenterCrop
@@ -54,20 +55,22 @@ bool YOLOv5ClsPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
-bool YOLOv5ClsPreprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool YOLOv5ClsPreprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
+ std::vector tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/classification/ppcls/preprocessor.cc b/fastdeploy/vision/classification/ppcls/preprocessor.cc
index ef0da9ce5..619ba87fd 100644
--- a/fastdeploy/vision/classification/ppcls/preprocessor.cc
+++ b/fastdeploy/vision/classification/ppcls/preprocessor.cc
@@ -14,7 +14,6 @@
#include "fastdeploy/vision/classification/ppcls/preprocessor.h"
-#include "fastdeploy/function/concat.h"
#include "yaml-cpp/yaml.h"
namespace fastdeploy {
@@ -102,13 +101,17 @@ void PaddleClasPreprocessor::DisablePermute() {
bool PaddleClasPreprocessor::Apply(FDMatBatch* image_batch,
std::vector* outputs) {
+ if (!initialized_) {
+ FDERROR << "The preprocessor is not initialized." << std::endl;
+ return false;
+ }
for (size_t j = 0; j < processors_.size(); ++j) {
- ProcLib lib = ProcLib::DEFAULT;
+ image_batch->proc_lib = proc_lib_;
if (initial_resize_on_cpu_ && j == 0 &&
processors_[j]->Name().find("Resize") == 0) {
- lib = ProcLib::OPENCV;
+ image_batch->proc_lib = ProcLib::OPENCV;
}
- if (!(*(processors_[j].get()))(image_batch, lib)) {
+ if (!(*(processors_[j].get()))(image_batch)) {
FDERROR << "Failed to processs image in " << processors_[j]->Name() << "."
<< std::endl;
return false;
diff --git a/fastdeploy/vision/classification/ppcls/preprocessor.h b/fastdeploy/vision/classification/ppcls/preprocessor.h
index fc347fc3d..ac2e82ef1 100644
--- a/fastdeploy/vision/classification/ppcls/preprocessor.h
+++ b/fastdeploy/vision/classification/ppcls/preprocessor.h
@@ -55,6 +55,7 @@ class FASTDEPLOY_DECL PaddleClasPreprocessor : public ProcessorManager {
private:
bool BuildPreprocessPipelineFromConfig();
+ bool initialized_ = false;
std::vector> processors_;
// for recording the switch of hwc2chw
bool disable_permute_ = false;
diff --git a/fastdeploy/vision/common/processors/base.cc b/fastdeploy/vision/common/processors/base.cc
index 9c4a0177e..7e34d07bf 100644
--- a/fastdeploy/vision/common/processors/base.cc
+++ b/fastdeploy/vision/common/processors/base.cc
@@ -20,9 +20,9 @@
namespace fastdeploy {
namespace vision {
-bool Processor::operator()(FDMat* mat, ProcLib lib) {
- ProcLib target = lib;
- if (lib == ProcLib::DEFAULT) {
+bool Processor::operator()(FDMat* mat) {
+ ProcLib target = mat->proc_lib;
+ if (mat->proc_lib == ProcLib::DEFAULT) {
target = DefaultProcLib::default_lib;
}
if (target == ProcLib::FLYCV) {
@@ -52,9 +52,14 @@ bool Processor::operator()(FDMat* mat, ProcLib lib) {
return ImplByOpenCV(mat);
}
-bool Processor::operator()(FDMatBatch* mat_batch, ProcLib lib) {
- ProcLib target = lib;
- if (lib == ProcLib::DEFAULT) {
+bool Processor::operator()(FDMat* mat, ProcLib lib) {
+ mat->proc_lib = lib;
+ return operator()(mat);
+}
+
+bool Processor::operator()(FDMatBatch* mat_batch) {
+ ProcLib target = mat_batch->proc_lib;
+ if (mat_batch->proc_lib == ProcLib::DEFAULT) {
target = DefaultProcLib::default_lib;
}
if (target == ProcLib::FLYCV) {
diff --git a/fastdeploy/vision/common/processors/base.h b/fastdeploy/vision/common/processors/base.h
index 786e88672..a1c64a2c1 100644
--- a/fastdeploy/vision/common/processors/base.h
+++ b/fastdeploy/vision/common/processors/base.h
@@ -100,10 +100,13 @@ class FASTDEPLOY_DECL Processor {
return true;
}
- virtual bool operator()(FDMat* mat, ProcLib lib = ProcLib::DEFAULT);
+ virtual bool operator()(FDMat* mat);
- virtual bool operator()(FDMatBatch* mat_batch,
- ProcLib lib = ProcLib::DEFAULT);
+ // This function is for backward compatibility, will be removed in the near
+ // future, please use operator()(FDMat* mat) instead and set proc_lib in mat.
+ virtual bool operator()(FDMat* mat, ProcLib lib);
+
+ virtual bool operator()(FDMatBatch* mat_batch);
};
} // namespace vision
diff --git a/fastdeploy/vision/common/processors/center_crop.cc b/fastdeploy/vision/common/processors/center_crop.cc
index 1857f7a81..f220ac376 100644
--- a/fastdeploy/vision/common/processors/center_crop.cc
+++ b/fastdeploy/vision/common/processors/center_crop.cc
@@ -14,12 +14,6 @@
#include "fastdeploy/vision/common/processors/center_crop.h"
-#ifdef ENABLE_CVCUDA
-#include
-
-#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
-#endif
-
namespace fastdeploy {
namespace vision {
@@ -75,9 +69,8 @@ bool CenterCrop::ImplByCvCuda(FDMat* mat) {
int offset_x = static_cast((mat->Width() - width_) / 2);
int offset_y = static_cast((mat->Height() - height_) / 2);
- cvcuda::CustomCrop crop_op;
NVCVRectI crop_roi = {offset_x, offset_y, width_, height_};
- crop_op(mat->Stream(), src_tensor, dst_tensor, crop_roi);
+ cvcuda_crop_op_(mat->Stream(), src_tensor, dst_tensor, crop_roi);
mat->SetTensor(mat->output_cache);
mat->SetWidth(width_);
diff --git a/fastdeploy/vision/common/processors/center_crop.h b/fastdeploy/vision/common/processors/center_crop.h
index 3ca3a7391..0eddde0ed 100644
--- a/fastdeploy/vision/common/processors/center_crop.h
+++ b/fastdeploy/vision/common/processors/center_crop.h
@@ -15,6 +15,11 @@
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include
+
+#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
+#endif
namespace fastdeploy {
namespace vision {
@@ -38,6 +43,9 @@ class FASTDEPLOY_DECL CenterCrop : public Processor {
private:
int height_;
int width_;
+#ifdef ENABLE_CVCUDA
+ cvcuda::CustomCrop cvcuda_crop_op_;
+#endif
};
} // namespace vision
diff --git a/fastdeploy/vision/common/processors/manager.cc b/fastdeploy/vision/common/processors/manager.cc
index 070354da1..2f751ab80 100644
--- a/fastdeploy/vision/common/processors/manager.cc
+++ b/fastdeploy/vision/common/processors/manager.cc
@@ -31,14 +31,14 @@ void ProcessorManager::UseCuda(bool enable_cv_cuda, int gpu_id) {
}
FDASSERT(cudaStreamCreate(&stream_) == cudaSuccess,
"[ERROR] Error occurs while creating cuda stream.");
- DefaultProcLib::default_lib = ProcLib::CUDA;
+ proc_lib_ = ProcLib::CUDA;
#else
FDASSERT(false, "FastDeploy didn't compile with WITH_GPU.");
#endif
if (enable_cv_cuda) {
#ifdef ENABLE_CVCUDA
- DefaultProcLib::default_lib = ProcLib::CVCUDA;
+ proc_lib_ = ProcLib::CVCUDA;
#else
FDASSERT(false, "FastDeploy didn't compile with CV-CUDA.");
#endif
@@ -46,16 +46,11 @@ void ProcessorManager::UseCuda(bool enable_cv_cuda, int gpu_id) {
}
bool ProcessorManager::CudaUsed() {
- return (DefaultProcLib::default_lib == ProcLib::CUDA ||
- DefaultProcLib::default_lib == ProcLib::CVCUDA);
+ return (proc_lib_ == ProcLib::CUDA || proc_lib_ == ProcLib::CVCUDA);
}
bool ProcessorManager::Run(std::vector* images,
std::vector* outputs) {
- if (!initialized_) {
- FDERROR << "The preprocessor is not initialized." << std::endl;
- return false;
- }
if (images->size() == 0) {
FDERROR << "The size of input images should be greater than 0."
<< std::endl;
@@ -70,6 +65,7 @@ bool ProcessorManager::Run(std::vector* images,
FDMatBatch image_batch(images);
image_batch.input_cache = &batch_input_cache_;
image_batch.output_cache = &batch_output_cache_;
+ image_batch.proc_lib = proc_lib_;
for (size_t i = 0; i < images->size(); ++i) {
if (CudaUsed()) {
diff --git a/fastdeploy/vision/common/processors/manager.h b/fastdeploy/vision/common/processors/manager.h
index 48b5575c4..aa6dde56a 100644
--- a/fastdeploy/vision/common/processors/manager.h
+++ b/fastdeploy/vision/common/processors/manager.h
@@ -17,6 +17,7 @@
#include "fastdeploy/utils/utils.h"
#include "fastdeploy/vision/common/processors/mat.h"
#include "fastdeploy/vision/common/processors/mat_batch.h"
+#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
@@ -78,7 +79,7 @@ class FASTDEPLOY_DECL ProcessorManager {
std::vector* outputs) = 0;
protected:
- bool initialized_ = false;
+ ProcLib proc_lib_ = ProcLib::DEFAULT;
private:
#ifdef WITH_GPU
diff --git a/fastdeploy/vision/common/processors/mat.h b/fastdeploy/vision/common/processors/mat.h
index 13ae76abd..85f121b90 100644
--- a/fastdeploy/vision/common/processors/mat.h
+++ b/fastdeploy/vision/common/processors/mat.h
@@ -145,6 +145,7 @@ struct FASTDEPLOY_DECL Mat {
ProcLib mat_type = ProcLib::OPENCV;
Layout layout = Layout::HWC;
Device device = Device::CPU;
+ ProcLib proc_lib = ProcLib::DEFAULT;
// Create FD Mat from FD Tensor. This method only create a
// new FD Mat with zero copy and it's data pointer is reference
diff --git a/fastdeploy/vision/common/processors/mat_batch.cc b/fastdeploy/vision/common/processors/mat_batch.cc
index f625d6d4d..aa154f334 100644
--- a/fastdeploy/vision/common/processors/mat_batch.cc
+++ b/fastdeploy/vision/common/processors/mat_batch.cc
@@ -67,6 +67,7 @@ FDTensor* CreateCachedGpuInputTensor(FDMatBatch* mat_batch) {
FDTensor* tensor = CreateCachedGpuInputTensor(&(*mats)[i]);
(*mats)[i].SetTensor(tensor);
}
+ mat_batch->device = Device::GPU;
return mat_batch->Tensor();
} else {
FDASSERT(false, "FDMat is on unsupported device: %d", src->device);
diff --git a/fastdeploy/vision/common/processors/mat_batch.h b/fastdeploy/vision/common/processors/mat_batch.h
index 090d8bb59..9d876a911 100644
--- a/fastdeploy/vision/common/processors/mat_batch.h
+++ b/fastdeploy/vision/common/processors/mat_batch.h
@@ -60,6 +60,7 @@ struct FASTDEPLOY_DECL FDMatBatch {
ProcLib mat_type = ProcLib::OPENCV;
FDMatBatchLayout layout = FDMatBatchLayout::NHWC;
Device device = Device::CPU;
+ ProcLib proc_lib = ProcLib::DEFAULT;
// False: the data is stored in the mats separately
// True: the data is stored in the fd_tensor continuously in 4 dimensions
diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cu b/fastdeploy/vision/common/processors/normalize_and_permute.cu
index 7f6320ba4..da3f4ffb1 100644
--- a/fastdeploy/vision/common/processors/normalize_and_permute.cu
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.cu
@@ -85,6 +85,8 @@ bool NormalizeAndPermute::ImplByCuda(FDMatBatch* mat_batch) {
// NHWC -> NCHW
std::swap(mat_batch->output_cache->shape[1],
mat_batch->output_cache->shape[3]);
+ std::swap(mat_batch->output_cache->shape[2],
+ mat_batch->output_cache->shape[3]);
// Copy alpha and beta to GPU
gpu_alpha_.Resize({1, 1, static_cast(alpha_.size())}, FDDataType::FP32,
diff --git a/fastdeploy/vision/common/processors/pad.cc b/fastdeploy/vision/common/processors/pad.cc
index 278e8d4b7..2db1fba20 100644
--- a/fastdeploy/vision/common/processors/pad.cc
+++ b/fastdeploy/vision/common/processors/pad.cc
@@ -91,6 +91,60 @@ bool Pad::ImplByFlyCV(Mat* mat) {
}
#endif
+#ifdef ENABLE_CVCUDA
+bool Pad::ImplByCvCuda(FDMat* mat) {
+ if (mat->layout != Layout::HWC) {
+ FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
+ return false;
+ }
+ if (mat->Channels() > 4) {
+ FDERROR << "Pad: Only support channels <= 4." << std::endl;
+ return false;
+ }
+ if (mat->Channels() != value_.size()) {
+ FDERROR << "Pad: Require input channels equals to size of padding value, "
+ "but now channels = "
+ << mat->Channels()
+ << ", the size of padding values = " << value_.size() << "."
+ << std::endl;
+ return false;
+ }
+
+ float4 value;
+ if (value_.size() == 1) {
+ value = make_float4(value_[0], 0.0f, 0.0f, 0.0f);
+ } else if (value_.size() == 2) {
+ value = make_float4(value_[0], value_[1], 0.0f, 0.0f);
+ } else if (value_.size() == 3) {
+ value = make_float4(value_[0], value_[1], value_[2], 0.0f);
+ } else {
+ value = make_float4(value_[0], value_[1], value_[2], value_[3]);
+ }
+
+ // Prepare input tensor
+ FDTensor* src = CreateCachedGpuInputTensor(mat);
+ auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+ int height = mat->Height() + top_ + bottom_;
+ int width = mat->Height() + left_ + right_;
+
+ // Prepare output tensor
+ mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+ "output_cache", Device::GPU);
+ auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+ cvcuda_pad_op_(mat->Stream(), src_tensor, dst_tensor, top_, left_,
+ NVCV_BORDER_CONSTANT, value);
+
+ mat->SetTensor(mat->output_cache);
+ mat->SetWidth(width);
+ mat->SetHeight(height);
+ mat->device = Device::GPU;
+ mat->mat_type = ProcLib::CVCUDA;
+ return true;
+}
+#endif
+
bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left,
const int& right, const std::vector& value, ProcLib lib) {
auto p = Pad(top, bottom, left, right, value);
diff --git a/fastdeploy/vision/common/processors/pad.h b/fastdeploy/vision/common/processors/pad.h
index 661632e77..5d025c720 100644
--- a/fastdeploy/vision/common/processors/pad.h
+++ b/fastdeploy/vision/common/processors/pad.h
@@ -15,6 +15,11 @@
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include
+
+#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
+#endif
namespace fastdeploy {
namespace vision {
@@ -32,6 +37,9 @@ class FASTDEPLOY_DECL Pad : public Processor {
bool ImplByOpenCV(Mat* mat);
#ifdef ENABLE_FLYCV
bool ImplByFlyCV(Mat* mat);
+#endif
+#ifdef ENABLE_CVCUDA
+ bool ImplByCvCuda(FDMat* mat);
#endif
std::string Name() { return "Pad"; }
@@ -39,12 +47,23 @@ class FASTDEPLOY_DECL Pad : public Processor {
const int& right, const std::vector& value,
ProcLib lib = ProcLib::DEFAULT);
+ bool SetPaddingSize(int top, int bottom, int left, int right) {
+ top_ = top;
+ bottom_ = bottom;
+ left_ = left;
+ right_ = right;
+ return true;
+ }
+
private:
int top_;
int bottom_;
int left_;
int right_;
std::vector value_;
+#ifdef ENABLE_CVCUDA
+ cvcuda::CopyMakeBorder cvcuda_pad_op_;
+#endif
};
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/common/processors/resize.cc b/fastdeploy/vision/common/processors/resize.cc
index 0de6ddfc7..806eab643 100644
--- a/fastdeploy/vision/common/processors/resize.cc
+++ b/fastdeploy/vision/common/processors/resize.cc
@@ -14,12 +14,6 @@
#include "fastdeploy/vision/common/processors/resize.h"
-#ifdef ENABLE_CVCUDA
-#include
-
-#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
-#endif
-
namespace fastdeploy {
namespace vision {
@@ -152,9 +146,8 @@ bool Resize::ImplByCvCuda(FDMat* mat) {
auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
// CV-CUDA Interp value is compatible with OpenCV
- cvcuda::Resize resize_op;
- resize_op(mat->Stream(), src_tensor, dst_tensor,
- NVCVInterpolationType(interp_));
+ cvcuda_resize_op_(mat->Stream(), src_tensor, dst_tensor,
+ NVCVInterpolationType(interp_));
mat->SetTensor(mat->output_cache);
mat->SetWidth(width_);
diff --git a/fastdeploy/vision/common/processors/resize.h b/fastdeploy/vision/common/processors/resize.h
index 2b4f88a35..607287d80 100644
--- a/fastdeploy/vision/common/processors/resize.h
+++ b/fastdeploy/vision/common/processors/resize.h
@@ -15,6 +15,11 @@
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include
+
+#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
+#endif
namespace fastdeploy {
namespace vision {
@@ -61,6 +66,9 @@ class FASTDEPLOY_DECL Resize : public Processor {
float scale_h_ = -1.0;
int interp_ = 1;
bool use_scale_ = false;
+#ifdef ENABLE_CVCUDA
+ cvcuda::Resize cvcuda_resize_op_;
+#endif
};
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/common/processors/resize_by_short.cc b/fastdeploy/vision/common/processors/resize_by_short.cc
index 535652fc7..7fe644e0d 100644
--- a/fastdeploy/vision/common/processors/resize_by_short.cc
+++ b/fastdeploy/vision/common/processors/resize_by_short.cc
@@ -14,12 +14,6 @@
#include "fastdeploy/vision/common/processors/resize_by_short.h"
-#ifdef ENABLE_CVCUDA
-#include
-
-#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
-#endif
-
namespace fastdeploy {
namespace vision {
@@ -102,9 +96,8 @@ bool ResizeByShort::ImplByCvCuda(FDMat* mat) {
auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
// CV-CUDA Interp value is compatible with OpenCV
- cvcuda::Resize resize_op;
- resize_op(mat->Stream(), src_tensor, dst_tensor,
- NVCVInterpolationType(interp_));
+ cvcuda_resize_op_(mat->Stream(), src_tensor, dst_tensor,
+ NVCVInterpolationType(interp_));
mat->SetTensor(mat->output_cache);
mat->SetWidth(width);
@@ -144,9 +137,8 @@ bool ResizeByShort::ImplByCvCuda(FDMatBatch* mat_batch) {
CreateCvCudaImageBatchVarShape(dst_tensors, dst_batch);
// CV-CUDA Interp value is compatible with OpenCV
- cvcuda::Resize resize_op;
- resize_op(mat_batch->Stream(), src_batch, dst_batch,
- NVCVInterpolationType(interp_));
+ cvcuda_resize_op_(mat_batch->Stream(), src_batch, dst_batch,
+ NVCVInterpolationType(interp_));
for (size_t i = 0; i < mat_batch->mats->size(); ++i) {
FDMat* mat = &(*(mat_batch->mats))[i];
diff --git a/fastdeploy/vision/common/processors/resize_by_short.h b/fastdeploy/vision/common/processors/resize_by_short.h
index 99078c708..08bec6438 100644
--- a/fastdeploy/vision/common/processors/resize_by_short.h
+++ b/fastdeploy/vision/common/processors/resize_by_short.h
@@ -15,6 +15,11 @@
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
+#ifdef ENABLE_CVCUDA
+#include
+
+#include "fastdeploy/vision/common/processors/cvcuda_utils.h"
+#endif
namespace fastdeploy {
namespace vision {
@@ -49,6 +54,9 @@ class FASTDEPLOY_DECL ResizeByShort : public Processor {
std::vector max_hw_;
int interp_;
bool use_scale_;
+#ifdef ENABLE_CVCUDA
+ cvcuda::Resize cvcuda_resize_op_;
+#endif
};
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
index d48d9ddc4..1c8d20c0f 100755
--- a/fastdeploy/vision/common/result.cc
+++ b/fastdeploy/vision/common/result.cc
@@ -26,6 +26,11 @@ void ClassifyResult::Clear() {
scores.clear();
}
+void ClassifyResult::Resize(int size) {
+ label_ids.resize(size);
+ scores.resize(size);
+}
+
std::string ClassifyResult::Str() {
std::string out;
out = "ClassifyResult(\nlabel_ids: ";
diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h
index 7c4efde23..6b40bf314 100755
--- a/fastdeploy/vision/common/result.h
+++ b/fastdeploy/vision/common/result.h
@@ -51,6 +51,9 @@ struct FASTDEPLOY_DECL ClassifyResult : public BaseResult {
std::vector scores;
ResultType type = ResultType::CLASSIFY;
+ /// Resize ClassifyResult data buffer
+ void Resize(int size);
+
/// Clear ClassifyResult
void Clear();
diff --git a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
index f4ff11e8f..7b8fcc399 100644
--- a/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/fastestdet/preprocessor.cc
@@ -20,26 +20,27 @@ namespace vision {
namespace detection {
FastestDetPreprocessor::FastestDetPreprocessor() {
- size_ = {352, 352}; //{h,w}
+ size_ = {352, 352}; //{h,w}
}
-bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
- std::map>* im_info) {
+bool FastestDetPreprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
// process after image load
double ratio = (size_[0] * 1.0) / std::max(static_cast(mat->Height()),
- static_cast(mat->Width()));
+ static_cast(mat->Width()));
// fastestdet's preprocess steps
// 1. resize
// 2. convert_and_permute(swap_rb=false)
- Resize::Run(mat, size_[0], size_[1]); //resize
+ Resize::Run(mat, size_[0], size_[1]); // resize
std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
std::vector beta = {0.0f, 0.0f, 0.0f};
- //convert to float and HWC2CHW
+ // convert to float and HWC2CHW
ConvertAndPermute::Run(mat, alpha, beta, false);
// Record output shape of preprocessed image
@@ -47,20 +48,22 @@ bool FastestDetPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
-bool FastestDetPreprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool FastestDetPreprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
+ std::vector tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/fastdeploy/vision/detection/contrib/nanodet_plus.cc
index 2babae49c..0b89cdbe2 100644
--- a/fastdeploy/vision/detection/contrib/nanodet_plus.cc
+++ b/fastdeploy/vision/detection/contrib/nanodet_plus.cc
@@ -117,8 +117,8 @@ NanoDetPlus::NanoDetPlus(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -182,7 +182,7 @@ bool NanoDetPlus::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
old mode 100755
new mode 100644
index 8678ea181..88c34352b
--- a/fastdeploy/vision/detection/contrib/scaledyolov4.cc
+++ b/fastdeploy/vision/detection/contrib/scaledyolov4.cc
@@ -62,8 +62,8 @@ ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER};
@@ -144,7 +144,7 @@ bool ScaledYOLOv4::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/detection/contrib/yolor.cc b/fastdeploy/vision/detection/contrib/yolor.cc
old mode 100755
new mode 100644
index dd4ef728a..cad66eb08
--- a/fastdeploy/vision/detection/contrib/yolor.cc
+++ b/fastdeploy/vision/detection/contrib/yolor.cc
@@ -61,8 +61,8 @@ YOLOR::YOLOR(const std::string& model_file, const std::string& params_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER};
@@ -142,7 +142,7 @@ bool YOLOR::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
old mode 100755
new mode 100644
index 846e25131..658987b75
--- a/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5Preprocessor::LetterBox(FDMat* mat) {
}
}
-bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
- std::map>* im_info) {
+bool YOLOv5Preprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
-bool YOLOv5Preprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool YOLOv5Preprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
+ std::vector tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov5lite.cc b/fastdeploy/vision/detection/contrib/yolov5lite.cc
index be4116eed..8d8f325dc 100644
--- a/fastdeploy/vision/detection/contrib/yolov5lite.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5lite.cc
@@ -195,7 +195,7 @@ bool YOLOv5Lite::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
@@ -253,7 +253,7 @@ bool YOLOv5Lite::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_);
output->device = Device::GPU;
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
#else
FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
index b880ed337..e5bd82630 100644
--- a/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5seg/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv5SegPreprocessor::LetterBox(FDMat* mat) {
}
}
-bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
- std::map>* im_info) {
+bool YOLOv5SegPreprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv5SegPreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
-bool YOLOv5SegPreprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool YOLOv5SegPreprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
+ std::vector tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov6.cc b/fastdeploy/vision/detection/contrib/yolov6.cc
index cae9ce3a6..bf3368242 100644
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -168,7 +168,7 @@ bool YOLOv6::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
@@ -226,7 +226,7 @@ bool YOLOv6::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_);
output->device = Device::GPU;
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
#else
FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
old mode 100755
new mode 100644
index 91e22f32b..3374e16bb
--- a/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/preprocessor.cc
@@ -64,8 +64,9 @@ void YOLOv7Preprocessor::LetterBox(FDMat* mat) {
}
}
-bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
- std::map>* im_info) {
+bool YOLOv7Preprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
@@ -82,20 +83,22 @@ bool YOLOv7Preprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
-bool YOLOv7Preprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool YOLOv7Preprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
+ std::vector tensors(images->size());
for (size_t i = 0; i < images->size(); ++i) {
if (!Preprocess(&(*images)[i], &tensors[i], &(*ims_info)[i])) {
FDERROR << "Failed to preprocess input image." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
old mode 100755
new mode 100644
index daf4ee66b..af7ff0e5c
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_ort.cc
@@ -137,7 +137,7 @@ bool YOLOv7End2EndORT::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
@@ -235,7 +235,8 @@ bool YOLOv7End2EndORT::Predict(cv::Mat* im, DetectionResult* result,
return false;
}
- if (!Postprocess(reused_output_tensors_[0], result, im_info, conf_threshold)) {
+ if (!Postprocess(reused_output_tensors_[0], result, im_info,
+ conf_threshold)) {
FDERROR << "Failed to post process." << std::endl;
return false;
}
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
index 49961df65..e969771a2 100644
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -169,7 +169,7 @@ bool YOLOv7End2EndTRT::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
@@ -227,7 +227,7 @@ bool YOLOv7End2EndTRT::CudaPreprocess(
output->SetExternalData({mat->Channels(), size[0], size[1]}, FDDataType::FP32,
input_tensor_cuda_buffer_device_);
output->device = Device::GPU;
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
#else
FDERROR << "CUDA src code was not enabled." << std::endl;
diff --git a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
index 1c6d9f62c..ebb8b28cd 100644
--- a/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
+++ b/fastdeploy/vision/detection/contrib/yolov8/preprocessor.cc
@@ -83,7 +83,7 @@ bool YOLOv8Preprocessor::Preprocess(
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/detection/contrib/yolox.cc b/fastdeploy/vision/detection/contrib/yolox.cc
index c1c071826..e7d931c42 100755
--- a/fastdeploy/vision/detection/contrib/yolox.cc
+++ b/fastdeploy/vision/detection/contrib/yolox.cc
@@ -129,7 +129,7 @@ bool YOLOX::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
index f7b689575..0b914fb05 100644
--- a/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
+++ b/fastdeploy/vision/facealign/contrib/face_landmark_1000.cc
@@ -70,7 +70,7 @@ bool FaceLandmark1000::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facealign/contrib/pfld.cc b/fastdeploy/vision/facealign/contrib/pfld.cc
index 5978f10b7..d57427090 100644
--- a/fastdeploy/vision/facealign/contrib/pfld.cc
+++ b/fastdeploy/vision/facealign/contrib/pfld.cc
@@ -22,13 +22,12 @@ namespace vision {
namespace facealign {
-PFLD::PFLD(const std::string& model_file,
- const std::string& params_file,
+PFLD::PFLD(const std::string& model_file, const std::string& params_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -71,12 +70,13 @@ bool PFLD::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
-bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
- const std::map>& im_info) {
+bool PFLD::Postprocess(
+ FDTensor& infer_result, FaceAlignmentResult* result,
+ const std::map>& im_info) {
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
if (infer_result.dtype != FDDataType::FP32) {
FDERROR << "Only support post process with float32 data." << std::endl;
@@ -84,8 +84,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
}
auto iter_in = im_info.find("input_shape");
- FDASSERT(iter_in != im_info.end(),
- "Cannot find input_shape from im_info.");
+ FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
int in_h = iter_in->second[0];
int in_w = iter_in->second[1];
@@ -97,8 +96,7 @@ bool PFLD::Postprocess(FDTensor& infer_result, FaceAlignmentResult* result,
x = std::min(std::max(0.f, x), 1.0f);
y = std::min(std::max(0.f, y), 1.0f);
// decode landmarks (default 106 landmarks)
- result->landmarks.emplace_back(
- std::array{x * in_w, y * in_h});
+ result->landmarks.emplace_back(std::array{x * in_w, y * in_h});
}
return true;
diff --git a/fastdeploy/vision/facealign/contrib/pipnet.cc b/fastdeploy/vision/facealign/contrib/pipnet.cc
index 27ec35c0d..3af16fa91 100644
--- a/fastdeploy/vision/facealign/contrib/pipnet.cc
+++ b/fastdeploy/vision/facealign/contrib/pipnet.cc
@@ -632,7 +632,7 @@ bool PIPNet::Preprocess(Mat* mat, FDTensor* output,
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facedet/contrib/retinaface.cc b/fastdeploy/vision/facedet/contrib/retinaface.cc
index 6f38f5636..cd5f93ab9 100644
--- a/fastdeploy/vision/facedet/contrib/retinaface.cc
+++ b/fastdeploy/vision/facedet/contrib/retinaface.cc
@@ -81,8 +81,8 @@ RetinaFace::RetinaFace(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -145,7 +145,7 @@ bool RetinaFace::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facedet/contrib/ultraface.cc b/fastdeploy/vision/facedet/contrib/ultraface.cc
index e7dd99dc4..cf398b2e4 100644
--- a/fastdeploy/vision/facedet/contrib/ultraface.cc
+++ b/fastdeploy/vision/facedet/contrib/ultraface.cc
@@ -27,7 +27,7 @@ UltraFace::UltraFace(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
+ valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
@@ -90,7 +90,7 @@ bool UltraFace::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facedet/contrib/yolov5face.cc b/fastdeploy/vision/facedet/contrib/yolov5face.cc
index d508e905a..3fb309bea 100644
--- a/fastdeploy/vision/facedet/contrib/yolov5face.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov5face.cc
@@ -64,8 +64,8 @@ YOLOv5Face::YOLOv5Face(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -115,11 +115,11 @@ bool YOLOv5Face::Preprocess(
// process after image load
float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()),
size[0] * 1.0f / static_cast(mat->Width()));
-#ifndef __ANDROID__
- // Because of the low CPU performance on the Android device,
- // we decided to hide this extra resize. It won't make much
+#ifndef __ANDROID__
+ // Because of the low CPU performance on the Android device,
+ // we decided to hide this extra resize. It won't make much
// difference to the final result.
- if (std::fabs(ratio - 1.0f) > 1e-06) {
+ if (std::fabs(ratio - 1.0f) > 1e-06) {
int interp = cv::INTER_LINEAR;
if (ratio > 1.0) {
interp = cv::INTER_LINEAR;
@@ -128,7 +128,7 @@ bool YOLOv5Face::Preprocess(
int resize_w = int(round(static_cast(mat->Width()) * ratio));
Resize::Run(mat, resize_w, resize_h, -1, -1, interp);
}
-#endif
+#endif
// yolov5face's preprocess steps
// 1. letterbox
// 2. BGR->RGB
@@ -149,9 +149,9 @@ bool YOLOv5Face::Preprocess(
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
-
+
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
index ad5dd7e33..7af63f585 100644
--- a/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/preprocessor.cc
@@ -32,10 +32,12 @@ Yolov7FacePreprocessor::Yolov7FacePreprocessor() {
max_wh_ = 7680.0;
}
-bool Yolov7FacePreprocessor::Run(std::vector* images, std::vector* outputs,
- std::vector>>* ims_info) {
+bool Yolov7FacePreprocessor::Run(
+ std::vector* images, std::vector* outputs,
+ std::vector>>* ims_info) {
if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
ims_info->resize(images->size());
@@ -56,8 +58,9 @@ bool Yolov7FacePreprocessor::Run(std::vector* images, std::vector>* im_info){
+bool Yolov7FacePreprocessor::Preprocess(
+ FDMat* mat, FDTensor* output,
+ std::map>* im_info) {
// Record the shape of image and the shape of preprocessed image
(*im_info)["input_shape"] = {static_cast(mat->Height()),
static_cast(mat->Width())};
@@ -75,13 +78,13 @@ bool Yolov7FacePreprocessor::Preprocess(FDMat* mat, FDTensor* output,
static_cast(mat->Width())};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
void Yolov7FacePreprocessor::LetterBox(FDMat* mat) {
float scale =
- std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
+ std::min(size_[1] * 1.0 / mat->Height(), size_[0] * 1.0 / mat->Width());
if (!is_scale_up_) {
scale = std::min(scale, 1.0f);
}
diff --git a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
old mode 100755
new mode 100644
index 8e8f95950..cb0d90310
--- a/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/adaface/preprocessor.cc
@@ -26,8 +26,7 @@ AdaFacePreprocessor::AdaFacePreprocessor() {
permute_ = true;
}
-bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
-
+bool AdaFacePreprocessor::Preprocess(FDMat* mat, FDTensor* output) {
// face recognition model's preprocess steps in insightface
// reference: insightface/recognition/arcface_torch/inference.py
// 1. Resize
@@ -48,14 +47,15 @@ bool AdaFacePreprocessor::Preprocess(FDMat * mat, FDTensor* output) {
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
bool AdaFacePreprocessor::Run(std::vector* images,
std::vector* outputs) {
if (images->empty()) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
+ FDERROR << "The size of input images should be greater than 0."
+ << std::endl;
return false;
}
FDASSERT(images->size() == 1, "Only support batch = 1 now.");
diff --git a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
index 398a7016e..e7f55cf65 100644
--- a/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
+++ b/fastdeploy/vision/faceid/contrib/insightface/preprocessor.cc
@@ -50,7 +50,7 @@ bool InsightFaceRecognitionPreprocessor::Preprocess(FDMat* mat,
}
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
diff --git a/fastdeploy/vision/headpose/contrib/fsanet.cc b/fastdeploy/vision/headpose/contrib/fsanet.cc
index 59f25ac5a..c22909134 100644
--- a/fastdeploy/vision/headpose/contrib/fsanet.cc
+++ b/fastdeploy/vision/headpose/contrib/fsanet.cc
@@ -22,13 +22,12 @@ namespace vision {
namespace headpose {
-FSANet::FSANet(const std::string& model_file,
- const std::string& params_file,
+FSANet::FSANet(const std::string& model_file, const std::string& params_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::OPENVINO, Backend::ORT};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
@@ -52,7 +51,7 @@ bool FSANet::Initialize() {
}
bool FSANet::Preprocess(Mat* mat, FDTensor* output,
- std::map>* im_info) {
+ std::map>* im_info) {
// Resize
int resize_w = size[0];
int resize_h = size[1];
@@ -62,7 +61,8 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
// Normalize
std::vector alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f};
- std::vector beta = {-127.5f / 128.0f, -127.5f / 128.0f, -127.5f / 128.0f};
+ std::vector beta = {-127.5f / 128.0f, -127.5f / 128.0f,
+ -127.5f / 128.0f};
Convert::Run(mat, alpha, beta);
// Record output shape of preprocessed image
@@ -72,12 +72,13 @@ bool FSANet::Preprocess(Mat* mat, FDTensor* output,
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
-bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
- const std::map>& im_info) {
+bool FSANet::Postprocess(
+ FDTensor& infer_result, HeadPoseResult* result,
+ const std::map>& im_info) {
FDASSERT(infer_result.shape[0] == 1, "Only support batch = 1 now.");
if (infer_result.dtype != FDDataType::FP32) {
FDERROR << "Only support post process with float32 data." << std::endl;
@@ -85,8 +86,7 @@ bool FSANet::Postprocess(FDTensor& infer_result, HeadPoseResult* result,
}
auto iter_in = im_info.find("input_shape");
- FDASSERT(iter_in != im_info.end(),
- "Cannot find input_shape from im_info.");
+ FDASSERT(iter_in != im_info.end(), "Cannot find input_shape from im_info.");
int in_h = iter_in->second[0];
int in_w = iter_in->second[1];
diff --git a/fastdeploy/vision/matting/contrib/modnet.cc b/fastdeploy/vision/matting/contrib/modnet.cc
index c3a89733d..05141a926 100644
--- a/fastdeploy/vision/matting/contrib/modnet.cc
+++ b/fastdeploy/vision/matting/contrib/modnet.cc
@@ -77,7 +77,7 @@ bool MODNet::Preprocess(Mat* mat, FDTensor* output,
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
- output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c
+ output->shape.insert(output->shape.begin(), 1); // reshape to n, c, h, w
return true;
}
@@ -106,8 +106,8 @@ bool MODNet::Postprocess(
float* alpha_ptr = static_cast(alpha_tensor.Data());
// cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr);
// Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy.
- Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
- alpha_ptr); // ref-only, zero copy.
+ Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+ alpha_ptr); // ref-only, zero copy.
if ((out_h != ipt_h) || (out_w != ipt_w)) {
Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1);
}
diff --git a/fastdeploy/vision/matting/contrib/rvm.cc b/fastdeploy/vision/matting/contrib/rvm.cc
old mode 100755
new mode 100644
index 258205cf8..2b16aab83
--- a/fastdeploy/vision/matting/contrib/rvm.cc
+++ b/fastdeploy/vision/matting/contrib/rvm.cc
@@ -74,7 +74,7 @@ bool RobustVideoMatting::Preprocess(
(*im_info)["output_shape"] = {mat->Height(), mat->Width()};
mat->ShareWithTensor(output);
- output->ExpandDim(0); // reshape to n, h, w, c
+ output->ExpandDim(0); // reshape to n, c, h, w
return true;
}
@@ -118,16 +118,16 @@ bool RobustVideoMatting::Postprocess(
// for alpha
float* alpha_ptr = static_cast(alpha.Data());
- Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
- alpha_ptr); // ref-only, zero copy.
+ Mat alpha_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+ alpha_ptr); // ref-only, zero copy.
if ((out_h != in_h) || (out_w != in_w)) {
Resize::Run(&alpha_resized, in_w, in_h, -1, -1);
}
// for foreground
float* fgr_ptr = static_cast(fgr.Data());
- Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
- fgr_ptr); // ref-only, zero copy.
+ Mat fgr_resized = Mat::Create(out_h, out_w, 1, FDDataType::FP32,
+ fgr_ptr); // ref-only, zero copy.
if ((out_h != in_h) || (out_w != in_w)) {
Resize::Run(&fgr_resized, in_w, in_h, -1, -1);
}
diff --git a/fastdeploy/vision/matting/ppmatting/ppmatting_pybind.cc b/fastdeploy/vision/matting/ppmatting/ppmatting_pybind.cc
index 97837fa6f..a16d36f72 100644
--- a/fastdeploy/vision/matting/ppmatting/ppmatting_pybind.cc
+++ b/fastdeploy/vision/matting/ppmatting/ppmatting_pybind.cc
@@ -21,8 +21,8 @@ void BindPPMatting(pybind11::module& m) {
.def("predict",
[](vision::matting::PPMatting& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
- vision::MattingResult* res = new vision::MattingResult();
- self.Predict(&mat, res);
+ vision::MattingResult res;
+ self.Predict(&mat, &res);
return res;
});
}
diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
old mode 100755
new mode 100644
index cd07cc262..7dd0ac84a
--- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc
+++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
+
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
@@ -26,11 +27,11 @@ DBDetector::DBDetector(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT,
- Backend::OPENVINO};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
- valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+ valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+ Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE};
@@ -54,7 +55,8 @@ bool DBDetector::Initialize() {
}
std::unique_ptr DBDetector::Clone() const {
- std::unique_ptr clone_model = utils::make_unique(DBDetector(*this));
+ std::unique_ptr clone_model =
+ utils::make_unique(DBDetector(*this));
clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model;
}
@@ -69,14 +71,15 @@ bool DBDetector::Predict(const cv::Mat& img,
return true;
}
-bool DBDetector::BatchPredict(const std::vector& images,
- std::vector>>* det_results) {
+bool DBDetector::BatchPredict(
+ const std::vector& images,
+ std::vector>>* det_results) {
std::vector fd_images = WrapMat(images);
- std::vector> batch_det_img_info;
- if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, &batch_det_img_info)) {
+ if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
FDERROR << "Failed to preprocess input image." << std::endl;
return false;
}
+ auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
@@ -84,13 +87,15 @@ bool DBDetector::BatchPredict(const std::vector& images,
return false;
}
- if (!postprocessor_.Run(reused_output_tensors_, det_results, batch_det_img_info)) {
- FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
+ if (!postprocessor_.Run(reused_output_tensors_, det_results,
+ *batch_det_img_info)) {
+ FDERROR << "Failed to postprocess the inference cls_results by runtime."
+ << std::endl;
return false;
}
return true;
}
-} // namesapce ocr
+} // namespace ocr
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
index 28b7e47af..69687d5cd 100644
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
@@ -13,9 +13,8 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/det_preprocessor.h"
-#include "fastdeploy/utils/perf.h"
+
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
-#include "fastdeploy/function/concat.h"
namespace fastdeploy {
namespace vision {
@@ -39,64 +38,61 @@ std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) {
resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32);
resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32);
- return {w,h,resize_w,resize_h};
+ return {w, h, resize_w, resize_h};
/*
- *ratio_h = float(resize_h) / float(h);
- *ratio_w = float(resize_w) / float(w);
- */
+ *ratio_h = float(resize_h) / float(h);
+ *ratio_w = float(resize_w) / float(w);
+ */
}
-bool OcrDetectorResizeImage(FDMat* img,
- int resize_w,
- int resize_h,
- int max_resize_w,
- int max_resize_h) {
- Resize::Run(img, resize_w, resize_h);
+
+DBDetectorPreprocessor::DBDetectorPreprocessor() {
+ resize_op_ = std::make_shared(-1, -1);
+
std::vector value = {0, 0, 0};
- Pad::Run(img, 0, max_resize_h-resize_h, 0, max_resize_w - resize_w, value);
+ pad_op_ = std::make_shared(0, 0, 0, 0, value);
+
+ std::vector mean = {0.485f, 0.456f, 0.406f};
+ std::vector std = {0.229f, 0.224f, 0.225f};
+ bool is_scale = true;
+ normalize_permute_op_ =
+ std::make_shared(mean, std, is_scale);
+}
+
+bool DBDetectorPreprocessor::ResizeImage(FDMat* img, int resize_w, int resize_h,
+ int max_resize_w, int max_resize_h) {
+ resize_op_->SetWidthAndHeight(resize_w, resize_h);
+ (*resize_op_)(img);
+
+ pad_op_->SetPaddingSize(0, max_resize_h - resize_h, 0,
+ max_resize_w - resize_w);
+ (*pad_op_)(img);
return true;
}
-bool DBDetectorPreprocessor::Run(std::vector* images,
- std::vector* outputs,
- std::vector>* batch_det_img_info_ptr) {
- if (images->size() == 0) {
- FDERROR << "The size of input images should be greater than 0." << std::endl;
- return false;
- }
+bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch,
+ std::vector* outputs) {
int max_resize_w = 0;
int max_resize_h = 0;
- std::vector>& batch_det_img_info = *batch_det_img_info_ptr;
- batch_det_img_info.clear();
- batch_det_img_info.resize(images->size());
- for (size_t i = 0; i < images->size(); ++i) {
- FDMat* mat = &(images->at(i));
- batch_det_img_info[i] = OcrDetectorGetInfo(mat,max_side_len_);
- max_resize_w = std::max(max_resize_w,batch_det_img_info[i][2]);
- max_resize_h = std::max(max_resize_h,batch_det_img_info[i][3]);
+ batch_det_img_info_.clear();
+ batch_det_img_info_.resize(image_batch->mats->size());
+ for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+ FDMat* mat = &(image_batch->mats->at(i));
+ batch_det_img_info_[i] = OcrDetectorGetInfo(mat, max_side_len_);
+ max_resize_w = std::max(max_resize_w, batch_det_img_info_[i][2]);
+ max_resize_h = std::max(max_resize_h, batch_det_img_info_[i][3]);
}
- for (size_t i = 0; i < images->size(); ++i) {
- FDMat* mat = &(images->at(i));
- OcrDetectorResizeImage(mat, batch_det_img_info[i][2],batch_det_img_info[i][3],max_resize_w,max_resize_h);
- NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
- /*
- Normalize::Run(mat, mean_, scale_, is_scale_);
- HWC2CHW::Run(mat);
- Cast::Run(mat, "float");
- */
+ for (size_t i = 0; i < image_batch->mats->size(); ++i) {
+ FDMat* mat = &(image_batch->mats->at(i));
+ ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
+ max_resize_w, max_resize_h);
}
- // Only have 1 output Tensor.
+ (*normalize_permute_op_)(image_batch);
+
outputs->resize(1);
- // Concat all the preprocessed data to a batch tensor
- std::vector tensors(images->size());
- for (size_t i = 0; i < images->size(); ++i) {
- (*images)[i].ShareWithTensor(&(tensors[i]));
- tensors[i].ExpandDim(0);
- }
- if (tensors.size() == 1) {
- (*outputs)[0] = std::move(tensors[0]);
- } else {
- function::Concat(tensors, &((*outputs)[0]), 0);
- }
+ FDTensor* tensor = image_batch->Tensor();
+ (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
+ tensor->Data(), tensor->device,
+ tensor->device_id);
return true;
}
diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
index 552d0628a..fd7b77de1 100644
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
@@ -13,7 +13,10 @@
// limitations under the License.
#pragma once
-#include "fastdeploy/vision/common/processors/transform.h"
+#include "fastdeploy/vision/common/processors/manager.h"
+#include "fastdeploy/vision/common/processors/resize.h"
+#include "fastdeploy/vision/common/processors/pad.h"
+#include "fastdeploy/vision/common/processors/normalize_and_permute.h"
#include "fastdeploy/vision/common/result.h"
namespace fastdeploy {
@@ -22,43 +25,48 @@ namespace vision {
namespace ocr {
/*! @brief Preprocessor object for DBDetector serials model.
*/
-class FASTDEPLOY_DECL DBDetectorPreprocessor {
+class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager {
public:
+ DBDetectorPreprocessor();
+
/** \brief Process the input image and prepare input tensors for runtime
*
- * \param[in] images The input data list, all the elements are FDMat
+ * \param[in] image_batch The input image batch
* \param[in] outputs The output tensors which will feed in runtime
- * \param[in] batch_det_img_info_ptr The output of preprocess
* \return true if the preprocess successed, otherwise false
*/
- bool Run(std::vector* images, std::vector* outputs,
- std::vector>* batch_det_img_info_ptr);
+ virtual bool Apply(FDMatBatch* image_batch, std::vector* outputs);
/// Set max_side_len for the detection preprocess, default is 960
void SetMaxSideLen(int max_side_len) { max_side_len_ = max_side_len; }
+
/// Get max_side_len of the detection preprocess
int GetMaxSideLen() const { return max_side_len_; }
- /// Set mean value for the image normalization in detection preprocess
- void SetMean(const std::vector& mean) { mean_ = mean; }
- /// Get mean value of the image normalization in detection preprocess
- std::vector GetMean() const { return mean_; }
+ /// Set preprocess normalize parameters, please call this API to customize
+ /// the normalize parameters, otherwise it will use the default normalize
+ /// parameters.
+ void SetNormalize(const std::vector& mean = {0.485f, 0.456f, 0.406f},
+ const std::vector& std = {0.229f, 0.224f, 0.225f},
+ bool is_scale = true) {
+ normalize_permute_op_ =
+ std::make_shared(mean, std, is_scale);
+ }
- /// Set scale value for the image normalization in detection preprocess
- void SetScale(const std::vector& scale) { scale_ = scale; }
- /// Get scale value of the image normalization in detection preprocess
- std::vector GetScale() const { return scale_; }
-
- /// Set is_scale for the image normalization in detection preprocess
- void SetIsScale(bool is_scale) { is_scale_ = is_scale; }
- /// Get is_scale of the image normalization in detection preprocess
- bool GetIsScale() const { return is_scale_; }
+ /// Get the image info of the last batch, return a list of array
+ /// {image width, image height, resize width, resize height}
+ const std::vector>* GetBatchImgInfo() {
+ return &batch_det_img_info_;
+ }
private:
+ bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
+ int max_resize_h);
int max_side_len_ = 960;
- std::vector mean_ = {0.485f, 0.456f, 0.406f};
- std::vector scale_ = {0.229f, 0.224f, 0.225f};
- bool is_scale_ = true;
+ std::vector> batch_det_img_info_;
+ std::shared_ptr resize_op_;
+ std::shared_ptr pad_op_;
+ std::shared_ptr normalize_permute_op_;
};
} // namespace ocr
diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
old mode 100755
new mode 100644
index 2bcb697a8..aa77542af
--- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
+++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -12,80 +12,106 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include
+
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindPPOCRModel(pybind11::module& m) {
m.def("sort_boxes", [](std::vector>& boxes) {
- vision::ocr::SortBoxes(&boxes);
- return boxes;
+ vision::ocr::SortBoxes(&boxes);
+ return boxes;
});
-
+
// DBDetector
- pybind11::class_(m, "DBDetectorPreprocessor")
+ pybind11::class_(
+ m, "DBDetectorPreprocessor")
.def(pybind11::init<>())
- .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
- .def_property("mean", &vision::ocr::DBDetectorPreprocessor::GetMean, &vision::ocr::DBDetectorPreprocessor::SetMean)
- .def_property("scale", &vision::ocr::DBDetectorPreprocessor::GetScale, &vision::ocr::DBDetectorPreprocessor::SetScale)
- .def_property("is_scale", &vision::ocr::DBDetectorPreprocessor::GetIsScale, &vision::ocr::DBDetectorPreprocessor::SetIsScale)
- .def("run", [](vision::ocr::DBDetectorPreprocessor& self, std::vector& im_list) {
+ .def_property("max_side_len",
+ &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
+ &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
+ .def("set_normalize",
+ [](vision::ocr::DBDetectorPreprocessor& self,
+ const std::vector& mean, const std::vector