Optimize ocr system code (#209)

* Support PPYOLOE plus model

* Optimize ocr system code

* modify example code

* fix patchelf of openvino

* optimize demo code of ocr

* remove debug code

* update demo code of ocr

Co-authored-by: Jack Zhou <zhoushunjie@baidu.com>
This commit is contained in:
Jason
2022-09-14 09:46:03 +08:00
committed by GitHub
parent 1452275efe
commit 0dd9ecee65
39 changed files with 1041 additions and 909 deletions

View File

@@ -199,13 +199,8 @@ if(ENABLE_TRT_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
if(NOT WIN32)
find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
else()
set(TRT_CAFFE_LIB "")
endif()
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")

View File

@@ -70,12 +70,7 @@ if(WITH_GPU)
find_library(TRT_INFER_LIB nvinfer ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
find_library(TRT_ONNX_LIB nvonnxparser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
if (NOT WIN32)
find_library(TRT_CAFFE_LIB nvcaffe_parser ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/tensorrt/lib)
list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
else()
list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
endif()
list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
endif()
endif()

15
build_scripts/patch_lib.sh Executable file
View File

@@ -0,0 +1,15 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
patchelf --set-rpath '${ORIGIN}' $1/*.so*

View File

@@ -144,6 +144,9 @@ def process_libraries(current_dir):
for flt in filters:
if f.count(flt) > 0:
remain = False
filename = os.path.split(f)[-1]
if filename in ["libnvinfer_plugin.so", "libnvinfer_plugin.so.8.4.1", "libnvinfer.so", "libnvinfer.so.8.4.1", "libnvonnxparser.so", "libnvonnxparser.so.8.4.1", "libnvparsers.so", "libnvparsers.so.8.4.1"]:
continue
if remain:
package_data.append(
os.path.relpath(f, os.path.join(current_dir, "fastdeploy")))

View File

@@ -261,12 +261,12 @@ bool Runtime::Init(const RuntimeOption& _option) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::ORT only supports Device::CPU/Device::GPU.");
CreateOrtBackend();
FDINFO << "Runtime initialized with Backend::ORT." << std::endl;
FDINFO << "Runtime initialized with Backend::ORT in device " << Str(option.device) << "." << std::endl;
} else if (option.backend == Backend::TRT) {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
CreateTrtBackend();
FDINFO << "Runtime initialized with Backend::TRT." << std::endl;
FDINFO << "Runtime initialized with Backend::TRT in device " << Str(option.device) << "." << std::endl;
} else if (option.backend == Backend::PDINFER) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::TRT only supports Device::CPU/Device::GPU.");
@@ -274,12 +274,12 @@ bool Runtime::Init(const RuntimeOption& _option) {
option.model_format == Frontend::PADDLE,
"Backend::PDINFER only supports model format of Frontend::PADDLE.");
CreatePaddleBackend();
FDINFO << "Runtime initialized with Backend::PDINFER." << std::endl;
FDINFO << "Runtime initialized with Backend::PDINFER in device " << Str(option.device) << "." << std::endl;
} else if (option.backend == Backend::OPENVINO) {
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
CreateOpenVINOBackend();
FDINFO << "Runtime initialized with Backend::OPENVINO." << std::endl;
FDINFO << "Runtime initialized with Backend::OPENVINO in device " << Str(option.device) << "." << std::endl;
} else {
FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."

View File

@@ -72,6 +72,14 @@ std::string DetectionResult::Str() {
return out;
}
void OCRResult::Clear() {
boxes.clear();
text.clear();
rec_scores.clear();
cls_scores.clear();
cls_labels.clear();
}
FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) {
boxes.assign(res.boxes.begin(), res.boxes.end());
landmarks.assign(res.landmarks.begin(), res.landmarks.end());

View File

@@ -71,6 +71,8 @@ struct FASTDEPLOY_DECL OCRResult : public BaseResult {
ResultType type = ResultType::OCR;
void Clear();
std::string Str();
};

View File

@@ -26,10 +26,10 @@ Classifier::Classifier(const std::string& model_file,
const RuntimeOption& custom_option,
const Frontend& model_format) {
if (model_format == Frontend::ONNX) {
valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; // 指定可用的CPU后端
valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO};
valid_gpu_backends = {Backend::PDINFER, Backend::TRT, Backend::ORT};
}
runtime_option = custom_option;
@@ -145,4 +145,4 @@ bool Classifier::Predict(cv::Mat* img, std::tuple<int, float>* cls_result) {
} // namesapce ocr
} // namespace vision
} // namespace fastdeploy
} // namespace fastdeploy

View File

@@ -26,10 +26,10 @@ DBDetector::DBDetector(const std::string& model_file,
const RuntimeOption& custom_option,
const Frontend& model_format) {
if (model_format == Frontend::ONNX) {
valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; // 指定可用的CPU后端
valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT};
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
}
@@ -202,4 +202,4 @@ bool DBDetector::Predict(cv::Mat* img,
} // namesapce ocr
} // namespace vision
} // namespace fastdeploy
} // namespace fastdeploy

View File

@@ -23,10 +23,10 @@ void BindPPOCRSystemv3(pybind11::module& m) {
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def("predict", [](application::ocrsystem::PPOCRSystemv3& self,
pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
self.Predict(&mat, &res);
@@ -38,14 +38,13 @@ void BindPPOCRSystemv2(pybind11::module& m) {
// OCRSys
pybind11::class_<application::ocrsystem::PPOCRSystemv2, FastDeployModel>(
m, "PPOCRSystemv2")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def("predict", [](application::ocrsystem::PPOCRSystemv2& self,
pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
self.Predict(&mat, &res);

View File

@@ -19,92 +19,88 @@
namespace fastdeploy {
namespace application {
namespace ocrsystem {
PPOCRSystemv2::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* ocr_det,
fastdeploy::vision::ocr::Classifier* ocr_cls,
fastdeploy::vision::ocr::Recognizer* ocr_rec)
: detector(ocr_det), classifier(ocr_cls), recognizer(ocr_rec) {}
void PPOCRSystemv2::Detect(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::vector<std::array<int, 8>> boxes;
this->detector->Predict(img, &boxes);
result->boxes = boxes;
PPOCRSystemv2::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: detector_(det_model), classifier_(cls_model), recognizer_(rec_model) {
recognizer_->rec_image_shape[1] = 32;
}
void PPOCRSystemv2::Recognize(cv::Mat* img,
PPOCRSystemv2::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: detector_(det_model), recognizer_(rec_model) {
recognizer_->rec_image_shape[1] = 32;
}
bool PPOCRSystemv2::Detect(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
if (!detector_->Predict(img, &(result->boxes))) {
FDERROR << "There's error while detecting image in PPOCRSystem." << std::endl;
return false;
}
vision::ocr::SortBoxes(result);
return true;
}
bool PPOCRSystemv2::Recognize(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<std::string, float> rec_result;
this->recognizer->rec_image_shape[1] = 32;
this->recognizer->Predict(img, &rec_result);
if (!recognizer_->Predict(img, &rec_result)) {
FDERROR << "There's error while recognizing image in PPOCRSystem." << std::endl;
return false;
}
result->text.push_back(std::get<0>(rec_result));
result->rec_scores.push_back(std::get<1>(rec_result));
return true;
}
void PPOCRSystemv2::Classify(cv::Mat* img,
bool PPOCRSystemv2::Classify(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<int, float> cls_result;
this->classifier->Predict(img, &cls_result);
if (!classifier_->Predict(img, &cls_result)) {
FDERROR << "There's error while classifying image in PPOCRSystem." << std::endl;
return false;
}
result->cls_labels.push_back(std::get<0>(cls_result));
result->cls_scores.push_back(std::get<1>(cls_result));
return true;
}
bool PPOCRSystemv2::Predict(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
if (this->detector->initialized == 0) { //没det
//输入单张“小图片”给分类器
if (this->classifier->initialized != 0) {
this->Classify(img, result);
//摆正单张图像
if ((result->cls_labels)[0] % 2 == 1 &&
(result->cls_scores)[0] > this->classifier->cls_thresh) {
cv::rotate(*img, *img, 1);
}
}
//输入单张“小图片”给识别器
if (this->recognizer->initialized != 0) {
this->Recognize(img, result);
}
} else {
//从DET模型开始
//一张图,会输出多个“小图片”,送给后续模型
this->Detect(img, result);
// crop image
std::vector<cv::Mat> img_list;
for (int j = 0; j < (result->boxes).size(); j++) {
cv::Mat crop_img;
crop_img =
fastdeploy::vision::ocr::GetRotateCropImage(*img, (result->boxes)[j]);
img_list.push_back(crop_img);
}
// cls
if (this->classifier->initialized != 0) {
for (int i = 0; i < img_list.size(); i++) {
this->Classify(&img_list[0], result);
}
for (int i = 0; i < img_list.size(); i++) {
if ((result->cls_labels)[i] % 2 == 1 &&
(result->cls_scores)[i] > this->classifier->cls_thresh) {
std::cout << "Rotate this image " << std::endl;
cv::rotate(img_list[i], img_list[i], 1);
}
}
}
// rec
if (this->recognizer->initialized != 0) {
for (int i = 0; i < img_list.size(); i++) {
this->Recognize(&img_list[i], result);
}
}
result->Clear();
if (nullptr != detector_ && !Detect(img, result)) {
FDERROR << "Failed to detect image." << std::endl;
return false;
}
// Get croped images by detection result
std::vector<cv::Mat> image_list;
for (size_t i = 0; i < result->boxes.size(); ++i) {
auto crop_im = vision::ocr::GetRotateCropImage(*img, (result->boxes)[i]);
image_list.push_back(crop_im);
}
if (result->boxes.size() == 0) {
image_list.push_back(*img);
}
for (size_t i = 0; i < image_list.size(); ++i) {
if (nullptr != classifier_ && !Classify(&(image_list[i]), result)) {
FDERROR << "Failed to classify croped image of index " << i << "." << std::endl;
return false;
}
if (nullptr != classifier_ && result->cls_labels[i] % 2 == 1 && result->cls_scores[i] > classifier_->cls_thresh) {
cv::rotate(image_list[i], image_list[i], 1);
}
if (nullptr != recognizer_ && !Recognize(&(image_list[i]), result)) {
FDERROR << "Failed to recgnize croped image of index " << i << "." << std::endl;
return false;
}
}
return true;
};

View File

@@ -31,20 +31,23 @@ namespace ocrsystem {
class FASTDEPLOY_DECL PPOCRSystemv2 : public FastDeployModel {
public:
PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* ocr_det = nullptr,
fastdeploy::vision::ocr::Classifier* ocr_cls = nullptr,
fastdeploy::vision::ocr::Recognizer* ocr_rec = nullptr);
PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
fastdeploy::vision::ocr::DBDetector* detector = nullptr;
fastdeploy::vision::ocr::Classifier* classifier = nullptr;
fastdeploy::vision::ocr::Recognizer* recognizer = nullptr;
PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
private:
void Detect(cv::Mat* img, fastdeploy::vision::OCRResult* result);
void Recognize(cv::Mat* img, fastdeploy::vision::OCRResult* result);
void Classify(cv::Mat* img, fastdeploy::vision::OCRResult* result);
protected:
fastdeploy::vision::ocr::DBDetector* detector_ = nullptr;
fastdeploy::vision::ocr::Classifier* classifier_ = nullptr;
fastdeploy::vision::ocr::Recognizer* recognizer_ = nullptr;
virtual bool Detect(cv::Mat* img, fastdeploy::vision::OCRResult* result);
virtual bool Recognize(cv::Mat* img, fastdeploy::vision::OCRResult* result);
virtual bool Classify(cv::Mat* img, fastdeploy::vision::OCRResult* result);
};
} // namespace ocrsystem

View File

@@ -1,114 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v3.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace application {
namespace ocrsystem {
PPOCRSystemv3::PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* ocr_det,
fastdeploy::vision::ocr::Classifier* ocr_cls,
fastdeploy::vision::ocr::Recognizer* ocr_rec)
: detector(ocr_det), classifier(ocr_cls), recognizer(ocr_rec) {}
void PPOCRSystemv3::Detect(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::vector<std::array<int, 8>> boxes_result;
this->detector->Predict(img, &boxes_result);
result->boxes = boxes_result;
fastdeploy::vision::ocr::SortBoxes(result);
}
void PPOCRSystemv3::Recognize(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<std::string, float> rec_result;
this->recognizer->Predict(img, &rec_result);
result->text.push_back(std::get<0>(rec_result));
result->rec_scores.push_back(std::get<1>(rec_result));
}
void PPOCRSystemv3::Classify(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<int, float> cls_result;
this->classifier->Predict(img, &cls_result);
result->cls_labels.push_back(std::get<0>(cls_result));
result->cls_scores.push_back(std::get<1>(cls_result));
}
bool PPOCRSystemv3::Predict(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
if (this->detector->initialized == 0) { //没det
//输入单张“小图片”给分类器
if (this->classifier->initialized != 0) {
this->Classify(img, result);
//摆正单张图像
if ((result->cls_labels)[0] % 2 == 1 &&
(result->cls_scores)[0] > this->classifier->cls_thresh) {
cv::rotate(*img, *img, 1);
}
}
//输入单张“小图片”给识别器
if (this->recognizer->initialized != 0) {
this->Recognize(img, result);
}
} else {
//从DET模型开始
//一张图,会输出多个“小图片”,送给后续模型
this->Detect(img, result);
// crop image
std::vector<cv::Mat> img_list;
for (int j = 0; j < (result->boxes).size(); j++) {
cv::Mat crop_img;
crop_img =
fastdeploy::vision::ocr::GetRotateCropImage(*img, (result->boxes)[j]);
img_list.push_back(crop_img);
}
// cls
if (this->classifier->initialized != 0) {
for (int i = 0; i < img_list.size(); i++) {
this->Classify(&img_list[i], result);
}
for (int i = 0; i < img_list.size(); i++) {
if ((result->cls_labels)[i] % 2 == 1 &&
(result->cls_scores)[i] > this->classifier->cls_thresh) {
cv::rotate(img_list[i], img_list[i], 1);
}
}
}
// rec
if (this->recognizer->initialized != 0) {
for (int i = 0; i < img_list.size(); i++) {
this->Recognize(&img_list[i], result);
}
}
}
return true;
};
} // namesapce ocrsystem
} // namespace application
} // namespace fastdeploy

View File

@@ -14,37 +14,25 @@
#pragma once
#include <vector>
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/vision/common/processors/transform.h"
#include "fastdeploy/vision/common/result.h"
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v2.h"
namespace fastdeploy {
namespace application {
namespace ocrsystem {
class FASTDEPLOY_DECL PPOCRSystemv3 : public FastDeployModel {
class FASTDEPLOY_DECL PPOCRSystemv3 : public PPOCRSystemv2 {
public:
PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* ocr_det = nullptr,
fastdeploy::vision::ocr::Classifier* ocr_cls = nullptr,
fastdeploy::vision::ocr::Recognizer* ocr_rec = nullptr);
PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRSystemv2(det_model, cls_model, rec_model) {
// The only difference between v2 and v3
recognizer_->rec_image_shape[1] = 48;
}
fastdeploy::vision::ocr::DBDetector* detector = nullptr;
fastdeploy::vision::ocr::Classifier* classifier = nullptr;
fastdeploy::vision::ocr::Recognizer* recognizer = nullptr;
bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
private:
void Detect(cv::Mat* img, fastdeploy::vision::OCRResult* result);
void Recognize(cv::Mat* img, fastdeploy::vision::OCRResult* result);
void Classify(cv::Mat* img, fastdeploy::vision::OCRResult* result);
PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRSystemv2(det_model, rec_model) {
recognizer_->rec_image_shape[1] = 48;
}
};
} // namespace ocrsystem

View File

@@ -44,11 +44,11 @@ Recognizer::Recognizer(const std::string& model_file,
const RuntimeOption& custom_option,
const Frontend& model_format) {
if (model_format == Frontend::ONNX) {
valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; // 指定可用的CPU后端
valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端
} else {
// NOTE:此模型暂不支持paddle-inference-Gpu推理
valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
valid_cpu_backends = {Backend::ORT, Backend::PDINFER, Backend::OPENVINO};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
}
@@ -204,4 +204,4 @@ bool Recognizer::Predict(cv::Mat* img,
} // namesapce ocr
} // namespace vision
} // namespace fastdeploy
} // namespace fastdeploy

View File

@@ -32,6 +32,10 @@ bool CompareBox(const std::array<int, 8>& result1,
void SortBoxes(OCRResult* result) {
std::sort(result->boxes.begin(), result->boxes.end(), CompareBox);
if (result->boxes.size() == 0) {
return;
}
for (int i = 0; i < result->boxes.size() - 1; i++) {
if (abs(result->boxes[i + 1][1] - result->boxes[i][1]) < 10 &&
(result->boxes[i + 1][0] < result->boxes[i][0])) {
@@ -42,4 +46,4 @@ void SortBoxes(OCRResult* result) {
} // namesoace ocr
} // namespace vision
} // namespace fastdeploy
} // namespace fastdeploy

View File

@@ -0,0 +1,84 @@
# Runtime
在配置`RuntimeOption`即可基于不同后端在不同硬件上创建Runtime用于模型推理。
## Python 类
```
class Runtime(runtime_option)
```
**参数**
> * **runtime_option**(fastdeploy.RuntimeOption): 配置好的RuntimeOption类实例
### 成员函数
```
infer(data)
```
根据输入数据进行模型推理
**参数**
> * **data**(dict({str: np.ndarray}): 输入数据字典dict类型key为输入名value为np.ndarray数据类型
**返回值**
返回list, list的长度与原始模型输出个数一致list中元素为np.ndarray类型
```
num_inputs()
```
返回模型的输入个数
```
num_outputs()
```
返回模型的输出个数
## C++ 类
```
class Runtime
```
### 成员函数
```
bool Init(const RuntimeOption& runtime_option)
```
模型加载初始化
**参数**
> * **runtime_option**: 配置好的RuntimeOption实例
**返回值**
初始化成功返回true否则返回false
```
bool Infer(vector<FDTensor>& inputs, vector<FDTensor>* outputs)
```
根据输入进行推理并将结果写回到outputs
**参数**
> * **inputs**: 输入数据
> * **outputs**: 输出数据
**返回值**
推理成功返回true否则返回false
```
int NumInputs()
```
返回模型输入个数
```
input NumOutputs()
```
返回模型输出个数

View File

@@ -0,0 +1,233 @@
# RuntimeOption
`RuntimeOption`用于配置模型在不同后端、硬件上的推理参数。
## Python 类
```
class RuntimeOption()
```
### 成员函数
```
set_model_path(model_file, params_file="", model_format="paddle")
```
设定加载的模型路径
**参数**
> * **model_file**(str): 模型文件路径
> * **params_file**(str): 参数文件路径当为onnx模型格式时无需指定
> * **model_format**(str): 模型格式支持paddle, onnx, 默认paddle
```
use_gpu(device_id=0)
```
设定使用GPU推理
**参数**
> * **device_id**(int): 环境中存在多个GPU卡时此参数指定推理的卡默认为0
```
use_cpu()
```
设定使用CPU推理
```
set_cpu_thread_num(thread_num=-1)
```
设置CPU上推理时线程数量
**参数**
> * **thread_num**(int): 线程数量当小于或等于0时为后端自动分配默认-1
```
use_paddle_backend()
```
使用Paddle Inference后端进行推理支持CPU/GPU支持Paddle模型格式
```
use_ort_backend()
```
使用ONNX Runtime后端进行推理支持CPU/GPU支持Paddle/ONNX模型格式
```
use_trt_backend()
```
使用TensorRT后端进行推理支持GPU支持Paddle/ONNX模型格式
```
use_openvino_backend()
```
使用OpenVINO后端进行推理支持CPU, 支持Paddle/ONNX模型格式
```
enable_paddle_mkldnn()
disable_paddle_mkldnn()
```
当使用Paddle Inference后端时通过此开关开启或关闭CPU上MKLDNN推理加速后端默认为开启
```
enable_paddle_log_info()
disable_paddle_log_info()
```
当使用Paddle Inference后端时通过此开关开启或关闭模型加载时的优化日志后端默认为关闭
```
set_paddle_mkldnn_cache_size(cache_size)
```
当使用Paddle Inference后端时通过此接口控制MKLDNN加速时的Shape缓存大小
**参数**
> * **cache_size**(int): 缓存大小
```
set_trt_input_shape(tensor_name, min_shape, opt_shape=None, max_shape=None)
```
当使用TensorRT后端时通过此接口设置模型各个输入的Shape范围当只设置min_shape时会自动将opt_shape和max_shape设定为与min_shape一致。
此接口用户也可以无需自行调用FastDeploy在推理过程中会根据推理真实数据自动更新Shape范围但每次遇到新的shape更新范围后会重新构造后端引擎带来一定的耗时。可能过此接口提前配置来避免推理过程中的引擎重新构建。
**参数**
> * **tensor_name**(str): 需要设定输入范围的tensor名
> * **min_shape(list of int): 对应tensor的最小shape例如[1, 3, 224, 224]
> * **opt_shape(list of int): 对应tensor的最常用shape例如[2, 3, 224, 224], 当为None时即保持与min_shape一致默认为None
> * **max_shape(list of int): 对应tensor的最大shape例如[8, 3, 224, 224], 当为None时即保持与min_shape一致默认为None
```
set_trt_cache_file(cache_file_path)
```
当使用TensorRT后端时通过此接口将构建好的TensorRT模型引擎缓存到指定路径或跳过构造引擎步骤直接加载本地缓存的TensorRT模型
- 当调用此接口,且`cache_file_path`不存在时FastDeploy将构建TensorRT模型并将构建好的模型保持至`cache_file_path`
- 当调用此接口,且`cache_file_path`存在时FastDeploy将直接加载`cache_file_path`存储的已构建好的TensorRT模型从而大大减少模型加载初始化的耗时
通过此接口可以在第二次运行代码时加速模型加载初始化的时间但因此也需注意如需您修改了模型加载配置例如TensorRT的max_workspace_size或重新设置了`set_trt_input_shape`以及更换了原始的paddle或onnx模型需先删除已缓存在本地的`cache_file_path`文件,避免重新加载旧的缓存,影响程序正确性。
**参数**
> * **cache_file_path**(str): 缓存文件路径,例如`/Downloads/resnet50.trt`
```
enable_trt_fp16()
disable_trt_fp16()
```
当使用TensorRT后端时通过此接口开启或关闭半精度推理加速会带来明显的性能提升但并非所有GPU都支持半精度推理。 在不支持半精度推理的GPU上将会回退到FP32推理并给出提示`Detected FP16 is not supported in the current GPU, will use FP32 instead.`
## C++ 结构体
```
struct RuntimeOption
```
### 成员函数
```
void SetModelPath(const string& model_file, const string& params_file = "", const string& model_format = "paddle")
```
设定加载的模型路径
**参数**
> * **model_file**: 模型文件路径
> * **params_file**: 参数文件路径当为onnx模型格式时指定为""即可
> * **model_format**: 模型格式,支持"paddle", "onnx", 默认"paddle"
```
void UseGpu(int device_id = 0)
```
设定使用GPU推理
**参数**
> * **device_id**: 环境中存在多个GPU卡时此参数指定推理的卡默认为0
```
void UseCpu()
```
设定使用CPU推理
```
void SetCpuThreadNum(int thread_num=-1)
```
设置CPU上推理时线程数量
**参数**
> * **thread_num**: 线程数量当小于或等于0时为后端自动分配默认-1
```
void UsePaddleBackend()
```
使用Paddle Inference后端进行推理支持CPU/GPU支持Paddle模型格式
```
void UseOrtBackend()
```
使用ONNX Runtime后端进行推理支持CPU/GPU支持Paddle/ONNX模型格式
```
void UseTrtBackend()
```
使用TensorRT后端进行推理支持GPU支持Paddle/ONNX模型格式
```
void UseOpenVINOBackend()
```
使用OpenVINO后端进行推理支持CPU, 支持Paddle/ONNX模型格式
```
void EnablePaddleMKLDNN()
void DisablePaddleMKLDNN()
```
当使用Paddle Inference后端时通过此开关开启或关闭CPU上MKLDNN推理加速后端默认为开启
```
void EnablePaddleLogInfo()
void DisablePaddleLogInfo()
```
当使用Paddle Inference后端时通过此开关开启或关闭模型加载时的优化日志后端默认为关闭
```
void SetPaddleMKLDNNCacheSize(int cache_size)
```
当使用Paddle Inference后端时通过此接口控制MKLDNN加速时的Shape缓存大小
**参数**
> * **cache_size**: 缓存大小
```
void SetTrtInputShape(const string& tensor_name, const vector<int32_t>& min_shape,
const vector<int32_t>& opt_shape = vector<int32_t>(),
const vector<int32_t>& opt_shape = vector<int32_t>())
```
当使用TensorRT后端时通过此接口设置模型各个输入的Shape范围当只设置min_shape时会自动将opt_shape和max_shape设定为与min_shape一致。
此接口用户也可以无需自行调用FastDeploy在推理过程中会根据推理真实数据自动更新Shape范围但每次遇到新的shape更新范围后会重新构造后端引擎带来一定的耗时。可能过此接口提前配置来避免推理过程中的引擎重新构建。
**参数**
> * **tensor_name**: 需要设定输入范围的tensor名
> * **min_shape: 对应tensor的最小shape例如[1, 3, 224, 224]
> * **opt_shape: 对应tensor的最常用shape例如[2, 3, 224, 224], 当为默认参数即空vector时则视为保持与min_shape一致默认为空vector
> * **max_shape: 对应tensor的最大shape例如[8, 3, 224, 224], 当为默认参数即空vector时则视为保持与min_shape一致默认为空vector
```
void SetTrtCacheFile(const string& cache_file_path)
```
当使用TensorRT后端时通过此接口将构建好的TensorRT模型引擎缓存到指定路径或跳过构造引擎步骤直接加载本地缓存的TensorRT模型
- 当调用此接口,且`cache_file_path`不存在时FastDeploy将构建TensorRT模型并将构建好的模型保持至`cache_file_path`
- 当调用此接口,且`cache_file_path`存在时FastDeploy将直接加载`cache_file_path`存储的已构建好的TensorRT模型从而大大减少模型加载初始化的耗时
通过此接口可以在第二次运行代码时加速模型加载初始化的时间但因此也需注意如需您修改了模型加载配置例如TensorRT的max_workspace_size或重新设置了`SetTrtInputShape`以及更换了原始的paddle或onnx模型需先删除已缓存在本地的`cache_file_path`文件,避免重新加载旧的缓存,影响程序正确性。
**参数**
> * **cache_file_path**: 缓存文件路径,例如`/Downloads/resnet50.trt`
```
void EnableTrtFp16()
void DisableTrtFp16()
```
当使用TensorRT后端时通过此接口开启或关闭半精度推理加速会带来明显的性能提升但并非所有GPU都支持半精度推理。 在不支持半精度推理的GPU上将会回退到FP32推理并给出提示`Detected FP16 is not supported in the current GPU, will use FP32 instead.`

View File

@@ -11,11 +11,13 @@
|:---- | :--- | :--- |
| ENABLE_ORT_BACKEND | 启用ONNXRuntime推理后端默认ON | 默认支持CPU开启WITH_GPU后同时支持GPU |
| ENABLE_PADDLE_BACKEND | 启用Paddle Inference推理后端默认OFF | 默认支持CPU开启WITH_GPU后同时支持GPU |
| ENABLE_OPENVINO_BACKEND | 启用OpenVINO推理后端默认OFF | 仅支持 CPU |
| ENABLE_TRT_BACKEND | 启用TensorRT推理后端默认OFF | 仅支持GPU |
| WITH_GPU | 是否开启GPU使用默认OFF | 当设为TRUE编译后将支持Nvidia GPU部署 |
| CUDA_DIRECTORY | 指定编译时的CUDA路径默认为/usr/local/cuda | CUDA 11.2及以上 |
| TRT_DIRECTORY | 当启用TensorRT推理后端时需通过此参数指定TensorRT路径 | TensorRT 8.4及以上 |
| ENABLE_VISION | 启用视觉模型模块默认为ON | |
| ENABLE_TEXT | 启用文本模型模块默认为ON | |
FastDeploy支持在编译时用户选择自己的后端进行编译, 目前已经支持Paddle Inference、ONNXRuntime、TensorRT(加载ONNX格式)。FastDeploy已支持的模型已完成在不同后端上的验证工作会自动根据编译时支持的后端进行选择如若无可用后端则会给出相应提示(如YOLOv7目前仅支持ONNXRuntime/TensorRT后端如若编译时未开启这两个后端则推理时会提示无可用后端)。

18
docs/runtime/README.md Normal file
View File

@@ -0,0 +1,18 @@
# FastDeploy推理后端
FastDeploy当前已集成多种推理后端如下表格列出FastDeploy集成的各后端与在FastDeploy中其支持的平台、硬件等信息
| 推理后端 | 支持平台 | 支持硬件 | 支持模型格式 |
| :------- | :------- | :------- | :---- | :----- |
| Paddle Inference | Windows(x64)/Linux(x64) | GPU/CPU | Paddle |
| ONNX Runtime | Windows(x64)/Linux(x64/aarch64) | GPU/CPU | Paddle/ONNX |
| TensorRT | Windows(x64)/Linux(x64/jetson) | GPU | Paddle/ONNX |
| OpenVINO | Windows(x64)/Linux(x64) | CPU | Paddle/ONNX |
| Poros[进行中] | Linux(x64) | CPU/GPU | TorchScript |
FastDeploy中各后端独立用户在自行编译时可以选择开启其中一种或多种后端FastDeploy中的`Runtime`模块为所有后端提供了统一的使用APIRuntime使用方式参阅文档[FastDeploy Runtime使用文档](usage.md)
## 其它文档
- [FastDeploy编译](../compile)

44
docs/runtime/usage.md Normal file
View File

@@ -0,0 +1,44 @@
# FastDeploy Runtime使用文档
`Runtime`作为FastDeploy中模型推理的模块目前集成了多种后端用户通过统一的后端即可快速完成不同格式的模型在各硬件、平台、后端上的推理。本文档通过如下示例展示各硬件、后端上的推理
## CPU推理
Python示例
```
import fastdeploy as fd
import numpy as np
option = fd.RuntimeOption()
# 设定模型路径
option.set_model_path("resnet50/inference.pdmodel", "resnet50/inference.pdiparams")
# 使用OpenVINO后端
option.use_openvino_backend()
# 初始化runtime
runtime = fd.Runtime(option)
# 获取输入名
input_name = runtime.get_input_info(0).name
# 构造数据进行推理
results = runtime.infer({input_name: np.random.rand(1, 3, 224, 224).astype("float32")})
```
## GPU推理
```
import fastdeploy as fd
import numpy as np
option = fd.RuntimeOption()
# 设定模型路径
option.set_model_path("resnet50/inference.pdmodel", "resnet50/inference.pdiparams")
# 使用GPU并且使用第0张GPU卡
option.use_gpu(0)
# 使用Paddle Inference后端
option.use_openvino_backend()
# 初始化runtime
runtime = fd.Runtime(option)
# 获取输入名
input_name = runtime.get_input_info(0).name
# 构造数据进行推理
results = runtime.infer({input_name: np.random.rand(1, 3, 224, 224).astype("float32")})
```
更多Python/C++推理示例请直接参考[FastDeploy/examples/runtime](../../examples/runtime)

View File

@@ -0,0 +1,16 @@
# FastDeploy Runtime推理示例
| 示例代码 | 编程语言 | 说明 |
| :------- | :------- | :---- |
| python/infer_paddle_paddle_inference.py | Python | paddle模型通过paddle inference在cpu/gpu上的推理 |
| python/infer_paddle_tensorrt.py | Python | paddle模型通过tensorrt在gpu上的推理 |
| python/infer_paddle_openvino.py | Python | paddle模型通过openvino在cpu上的推理 |
| python/infer_paddle_onnxruntime.py | Python | paddle模型通过onnx runtime在cpu/gpu上的推理 |
| python/infer_onnx_openvino.py | Python | onnx模型通过openvino在cpu上的推理 |
| python/infer_onnx_tensorrt.py | Python | onnx模型通过tensorrt在gpu上的推理 |
| cpp/infer_paddle_paddle_inference.cc | C++ | paddle模型通过paddle inference在cpu/gpu上的推理 |
| cpp/infer_paddle_tensorrt.cc | C++ | paddle模型通过tensorrt在gpu上的推理 |
| cpp/infer_paddle_openvino.cc | C++ | paddle模型通过openvino在cpu上的推理 |
| cpp/infer_paddle_onnxruntime.cc | C++ | paddle模型通过onnx runtime在cpu/gpu上的推理 |
| cpp/infer_onnx_openvino.cc | C++ | onnx模型通过openvino在cpu上的推理 |
| cpp/infer_onnx_tensorrt.cc | C++ | onnx模型通过tensorrt在gpu上的推理 |

View File

@@ -0,0 +1,39 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.onnx"
fd.download(model_url, path=".")
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2.onnx", model_format="onnx")
option.use_openvino_backend()
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.onnx"
fd.download(model_url, path=".")
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2.onnx", model_format="onnx")
# **** GPU 配置 ***
option.use_gpu(0)
option.use_trt_backend()
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -0,0 +1,47 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
fd.download_and_decompress(model_url)
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2/inference.pdmodel",
"mobilenetv2/inference.pdiparams")
# **** CPU 配置 ****
option.use_cpu()
option.use_ort_backend()
option.set_cpu_thread_num(12)
# **** GPU 配置 ***
# 如需使用GPU使用如下注释代码
# option.use_gpu(0)
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -0,0 +1,42 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
fd.download_and_decompress(model_url)
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2/inference.pdmodel",
"mobilenetv2/inference.pdiparams")
option.use_cpu()
option.use_openvino_backend()
option.set_cpu_thread_num(12)
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -0,0 +1,47 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
fd.download_and_decompress(model_url)
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2/inference.pdmodel",
"mobilenetv2/inference.pdiparams")
# **** CPU 配置 ****
option.use_cpu()
option.use_paddle_backend()
option.set_cpu_thread_num(12)
# **** GPU 配置 ***
# 如需使用GPU使用如下注释代码
# option.use_gpu(0)
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -0,0 +1,42 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import numpy as np
# 下载模型并解压
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.tgz"
fd.download_and_decompress(model_url)
option = fd.RuntimeOption()
option.set_model_path("mobilenetv2/inference.pdmodel",
"mobilenetv2/inference.pdiparams")
# **** GPU 配置 ***
option.use_gpu(0)
option.use_trt_backend()
# 初始化构造runtime
runtime = fd.Runtime(option)
# 获取模型输入名
input_name = runtime.get_input_info(0).name
# 构造随机数据进行推理
results = runtime.infer({
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
})
print(results[0].shape)

View File

@@ -28,10 +28,9 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar.gz
wget https://bj.bcebos.com/paddlehub/fastdeploy/ch_PP-OCRv2_rec_infer.tar.gz
tar -xvf ch_PP-OCRv2_rec_infer.tar.gz
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/doc/imgs/12.jpg
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
# CPU推理
./infer_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
@@ -39,8 +38,6 @@ wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/
./infer_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 1
# GPU上TensorRT推理
./infer_demo ./ch_PP-OCRv2_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 2
# OCR还支持det/cls/rec三个模型的组合使用例如当我们不想使用cls模型的时候只需要给cls模型路径的位置传入一个空的字符串, 例子如下
./infer_demo ./ch_PP-OCRv2_det_infer "" ./ch_PP-OCRv2_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
```
运行完成可视化结果如下图所示
@@ -53,9 +50,9 @@ wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/
### PPOCRSystemv2类
```
fastdeploy::application::ocrsystem::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* ocr_det = nullptr,
fastdeploy::vision::ocr::Classifier* ocr_cls = nullptr,
fastdeploy::vision::ocr::Recognizer* ocr_rec = nullptr);
fastdeploy::application::ocrsystem::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
```
PPOCRSystemv2 的初始化,由检测,分类和识别模型串联构成
@@ -66,6 +63,18 @@ PPOCRSystemv2 的初始化,由检测,分类和识别模型串联构成
> * **Classifier**(model): OCR中的分类模型
> * **Recognizer**(model): OCR中的识别模型
```
fastdeploy::application::ocrsystem::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
```
PPOCRSystemv2 的初始化,由检测,识别模型串联构成(无分类器)
**参数**
> * **DBDetector**(model): OCR中的检测模型
> * **Recognizer**(model): OCR中的识别模型
#### Predict函数
> ```

View File

@@ -19,11 +19,7 @@ const char sep = '\\';
const char sep = '/';
#endif
void CpuInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model_dir, const std::string& rec_model_dir, const std::string& rec_label_file, const std::string& image_file, const fastdeploy::RuntimeOption& option) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
@@ -32,238 +28,32 @@ void CpuInfer(const std::string& det_model_dir,
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
auto det_model = fastdeploy::vision::ocr::DBDetector(det_model_file, det_params_file, option);
auto cls_model = fastdeploy::vision::ocr::Classifier(cls_model_file, cls_params_file, option);
auto rec_model = fastdeploy::vision::ocr::Recognizer(rec_model_file, rec_params_file, rec_label_file, option);
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseCpu();
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
assert(det_model.Initialized());
assert(cls_model.Initialized());
assert(rec_model.Initialized());
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseCpu();
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseCpu();
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv2_app = fastdeploy::application::ocrsystem::PPOCRSystemv2(
&det_model, &cls_model, &rec_model);
// 其中分类模型可选因此也可使用如下方式串联OCR系统
// auto ocr_system_v2 = fastdeploy::application::ocrsystem::PPOCRSystemv2(&det_model, &rec_model);
auto ocr_system_v2 = fastdeploy::application::ocrsystem::PPOCRSystemv2(&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv2_app.Predict(&im, &res)) {
fastdeploy::vision::OCRResult result;
if (!ocr_system_v2.Predict(&im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
std::cout << result.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void GpuInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
auto cls_model_file = cls_model_dir + sep + "inference.pdmodel";
auto cls_params_file = cls_model_dir + sep + "inference.pdiparams";
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
//准备模型
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseGpu();
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseGpu();
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseGpu();
rec_option
.UsePaddleBackend(); // OCRv2的rec模型暂不支持ORT后端与PaddleInference
// v2.3.2
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv2_app = fastdeploy::application::ocrsystem::PPOCRSystemv2(
&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv2_app.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void TrtInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
auto cls_model_file = cls_model_dir + sep + "inference.pdmodel";
auto cls_params_file = cls_model_dir + sep + "inference.pdiparams";
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
//准备模型
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseGpu();
det_option.UseTrtBackend();
det_option.SetTrtInputShape("x", {1, 3, 50, 50}, {1, 3, 640, 640},
{1, 3, 960, 960});
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseGpu();
cls_option.UseTrtBackend();
cls_option.SetTrtInputShape("x", {1, 3, 48, 192});
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseGpu();
rec_option.UseTrtBackend();
rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {1, 3, 48, 320},
{1, 3, 48, 2000});
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv2_app = fastdeploy::application::ocrsystem::PPOCRSystemv2(
&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv2_app.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
auto vis_im = fastdeploy::vision::Visualize::VisOcr(im_bak, result);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
@@ -282,12 +72,23 @@ int main(int argc, char* argv[]) {
return -1;
}
if (std::atoi(argv[6]) == 0) {
CpuInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
} else if (std::atoi(argv[6]) == 1) {
GpuInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
} else if (std::atoi(argv[6]) == 2) {
TrtInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
fastdeploy::RuntimeOption option;
int flag = std::atoi(argv[6]);
if (flag == 0) {
option.UseCpu();
} else if (flag == 1) {
option.UseGpu();
} else if (flag == 2) {
option.UseGpu();
option.UseTrtBackend();
}
std::string det_model_dir = argv[1];
std::string cls_model_dir = argv[2];
std::string rec_model_dir = argv[3];
std::string rec_label_file = argv[4];
std::string test_image = argv[5];
InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, test_image, option);
return 0;
}
}

View File

@@ -19,9 +19,9 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar.gz
wget https://bj.bcebos.com/paddlehub/fastdeploy/ch_PP-OCRv2_rec_infer.tar.gz
tar -xvf ch_PP-OCRv2_rec_infer.tar.gz
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
#下载部署示例代码
@@ -33,9 +33,7 @@ python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2
# GPU推理
python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu
# GPU上使用TensorRT推理
python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --det_use_trt True --cls_use_trt True --rec_use_trt True
# OCR还支持det/cls/rec三个模型的组合使用例如当我们不想使用cls模型的时候只需要给--cls_model传入一个空的字符串, 例子如下:
python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model "" --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu
python infer.py --det_model ch_PP-OCRv2_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv2_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --backend trt
```
运行完成可视化结果如下图所示

View File

@@ -1,3 +1,17 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import cv2
import os
@@ -21,7 +35,6 @@ def parse_arguments():
"--rec_label_file",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
parser.add_argument(
@@ -30,114 +43,83 @@ def parse_arguments():
default='cpu',
help="Type of inference device, support 'cpu' or 'gpu'.")
parser.add_argument(
"--det_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--backend",
type=str,
default="default",
help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
)
parser.add_argument(
"--cls_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--device_id",
type=int,
default=0,
help="Define which GPU card used to run model.")
parser.add_argument(
"--rec_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--cpu_thread_num",
type=int,
default=9,
help="Number of threads while inference on CPU.")
return parser.parse_args()
def build_det_option(args):
def build_option(args):
option = fd.RuntimeOption()
if args.device.lower() == "gpu":
option.use_gpu()
option.use_gpu(0)
if args.det_use_trt:
option.set_cpu_thread_num(args.cpu_thread_num)
if args.backend.lower() == "trt":
assert args.device.lower(
) == "gpu", "TensorRT backend require inference on device GPU."
option.use_trt_backend()
#det_max_side_len 默认为960,当用户更改DET模型的max_side_len参数时请将此参数同时更改
det_max_side_len = 960
option.set_trt_input_shape("x", [1, 3, 50, 50], [1, 3, 640, 640],
[1, 3, det_max_side_len, det_max_side_len])
return option
def build_cls_option(args):
option = fd.RuntimeOption()
option.use_paddle_backend()
if args.device.lower() == "gpu":
option.use_gpu()
if args.cls_use_trt:
option.use_trt_backend()
option.set_trt_input_shape("x", [1, 3, 32, 100])
return option
def build_rec_option(args):
option = fd.RuntimeOption()
option.use_paddle_backend()
if args.device.lower() == "gpu":
option.use_gpu()
if args.rec_use_trt:
option.use_trt_backend()
option.set_trt_input_shape("x", [1, 3, 48, 10], [1, 3, 48, 320],
[1, 3, 48, 2000])
elif args.backend.lower() == "ort":
option.use_ort_backend()
elif args.backend.lower() == "paddle":
option.use_paddle_backend()
elif args.backend.lower() == "openvino":
assert args.device.lower(
) == "cpu", "OpenVINO backend require inference on device CPU."
option.use_openvino_backend()
return option
args = parse_arguments()
#Det模型
# Detection模型, 检测文字框
det_model_file = os.path.join(args.det_model, "inference.pdmodel")
det_params_file = os.path.join(args.det_model, "inference.pdiparams")
#Cls模型
# Classification模型方向分类可选
cls_model_file = os.path.join(args.cls_model, "inference.pdmodel")
cls_params_file = os.path.join(args.cls_model, "inference.pdiparams")
#Rec模型
# Recognition模型文字识别模型
rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
rec_label_file = args.rec_label_file
#默认
det_model = fd.vision.ocr.DBDetector()
cls_model = fd.vision.ocr.Classifier()
rec_model = fd.vision.ocr.Recognizer()
# 对于三个模型,均采用同样的部署配置
# 用户也可根据自行需求分别配置
runtime_option = build_option(args)
#模型初始化
if (len(args.det_model) != 0):
det_runtime_option = build_det_option(args)
det_model = fd.vision.ocr.DBDetector(
det_model_file, det_params_file, runtime_option=det_runtime_option)
det_model = fd.vision.ocr.DBDetector(
det_model_file, det_params_file, runtime_option=runtime_option)
cls_model = fd.vision.ocr.Classifier(
cls_model_file, cls_params_file, runtime_option=runtime_option)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file,
rec_params_file,
rec_label_file,
runtime_option=runtime_option)
if (len(args.cls_model) != 0):
cls_runtime_option = build_cls_option(args)
cls_model = fd.vision.ocr.Classifier(
cls_model_file, cls_params_file, runtime_option=cls_runtime_option)
if (len(args.rec_model) != 0):
rec_runtime_option = build_rec_option(args)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file,
rec_params_file,
rec_label_file,
runtime_option=rec_runtime_option)
ppocrsysv2 = fd.vision.ocr.PPOCRSystemv2(
ocr_det=det_model._model,
ocr_cls=cls_model._model,
ocr_rec=rec_model._model)
# 创建OCR系统串联3个模型其中cls_model可选如无需求可设置为None
ocr_system = fd.vision.ocr.PPOCRSystemv2(
det_model=det_model, cls_model=cls_model, rec_model=rec_model)
# 预测图片准备
im = cv2.imread(args.image)
#预测并打印结果
result = ppocrsysv2.predict(im)
result = ocr_system.predict(im)
print(result)
# 可视化结果

View File

@@ -28,10 +28,9 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar.gz
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar -xvf ch_PP-OCRv3_rec_infer.tar
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/doc/imgs/12.jpg
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
# CPU推理
./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
@@ -39,8 +38,6 @@ wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/
./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 1
# GPU上TensorRT推理
./infer_demo ./ch_PP-OCRv3_det_infer ./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 2
# OCR还支持det/cls/rec三个模型的组合使用例如当我们不想使用cls模型的时候只需要给cls模型路径的位置传入一个空的字符串, 例子如下
./infer_demo ./ch_PP-OCRv3_det_infer "" ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
```
运行完成可视化结果如下图所示
@@ -53,12 +50,12 @@ wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/
### PPOCRSystemv3类
```
fastdeploy::application::ocrsystem::PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* ocr_det = nullptr,
fastdeploy::vision::ocr::Classifier* ocr_cls = nullptr,
fastdeploy::vision::ocr::Recognizer* ocr_rec = nullptr);
fastdeploy::application::ocrsystem::PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
```
PPOCRSystemv3 的初始化,由检测,分类和识别模型串联构成
PPOCRSystemv2 的初始化,由检测,分类和识别模型串联构成
**参数**
@@ -66,6 +63,17 @@ PPOCRSystemv3 的初始化,由检测,分类和识别模型串联构成
> * **Classifier**(model): OCR中的分类模型
> * **Recognizer**(model): OCR中的识别模型
```
fastdeploy::application::ocrsystem::PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
```
PPOCRSystemv2 的初始化,由检测,识别模型串联构成(无分类器)
**参数**
> * **DBDetector**(model): OCR中的检测模型
> * **Recognizer**(model): OCR中的识别模型
#### Predict函数
> ```

View File

@@ -19,11 +19,7 @@ const char sep = '\\';
const char sep = '/';
#endif
void CpuInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model_dir, const std::string& rec_model_dir, const std::string& rec_label_file, const std::string& image_file, const fastdeploy::RuntimeOption& option) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
@@ -32,235 +28,32 @@ void CpuInfer(const std::string& det_model_dir,
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
auto det_model = fastdeploy::vision::ocr::DBDetector(det_model_file, det_params_file, option);
auto cls_model = fastdeploy::vision::ocr::Classifier(cls_model_file, cls_params_file, option);
auto rec_model = fastdeploy::vision::ocr::Recognizer(rec_model_file, rec_params_file, rec_label_file, option);
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseCpu();
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
assert(det_model.Initialized());
assert(cls_model.Initialized());
assert(rec_model.Initialized());
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseCpu();
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseCpu();
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv3_app = fastdeploy::application::ocrsystem::PPOCRSystemv3(
&det_model, &cls_model, &rec_model);
// 其中分类模型可选因此也可使用如下方式串联OCR系统
// auto ocr_system_v3 = fastdeploy::application::ocrsystem::PPOCRSystemv3(&det_model, &rec_model);
auto ocr_system_v3 = fastdeploy::application::ocrsystem::PPOCRSystemv3(&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv3_app.Predict(&im, &res)) {
fastdeploy::vision::OCRResult result;
if (!ocr_system_v3.Predict(&im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
std::cout << result.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void GpuInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
auto cls_model_file = cls_model_dir + sep + "inference.pdmodel";
auto cls_params_file = cls_model_dir + sep + "inference.pdiparams";
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
//准备模型
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseGpu();
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseGpu();
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseGpu();
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv3_app = fastdeploy::application::ocrsystem::PPOCRSystemv3(
&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv3_app.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void TrtInfer(const std::string& det_model_dir,
const std::string& cls_model_dir,
const std::string& rec_model_dir,
const std::string& rec_label_file,
const std::string& image_file) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
auto cls_model_file = cls_model_dir + sep + "inference.pdmodel";
auto cls_params_file = cls_model_dir + sep + "inference.pdiparams";
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto rec_label = rec_label_file;
fastdeploy::vision::ocr::DBDetector det_model;
fastdeploy::vision::ocr::Classifier cls_model;
fastdeploy::vision::ocr::Recognizer rec_model;
//准备模型
if (!det_model_dir.empty()) {
auto det_option = fastdeploy::RuntimeOption();
det_option.UseGpu();
det_option.UseTrtBackend();
det_option.SetTrtInputShape("x", {1, 3, 50, 50}, {1, 3, 640, 640},
{1, 3, 960, 960});
det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
if (!det_model.Initialized()) {
std::cerr << "Failed to initialize det_model." << std::endl;
return;
}
}
if (!cls_model_dir.empty()) {
auto cls_option = fastdeploy::RuntimeOption();
cls_option.UseGpu();
cls_option.UseTrtBackend();
cls_option.SetTrtInputShape("x", {1, 3, 48, 192});
cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option);
if (!cls_model.Initialized()) {
std::cerr << "Failed to initialize cls_model." << std::endl;
return;
}
}
if (!rec_model_dir.empty()) {
auto rec_option = fastdeploy::RuntimeOption();
rec_option.UseGpu();
rec_option.UseTrtBackend();
rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {1, 3, 48, 320},
{1, 3, 48, 2000});
rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label, rec_option);
if (!rec_model.Initialized()) {
std::cerr << "Failed to initialize rec_model." << std::endl;
return;
}
}
auto ocrv3_app = fastdeploy::application::ocrsystem::PPOCRSystemv3(
&det_model, &cls_model, &rec_model);
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult res;
//开始预测
if (!ocrv3_app.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
//输出预测信息
std::cout << res.Str() << std::endl;
//可视化
auto vis_img = fastdeploy::vision::Visualize::VisOcr(im_bak, res);
cv::imwrite("vis_result.jpg", vis_img);
auto vis_im = fastdeploy::vision::Visualize::VisOcr(im_bak, result);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
@@ -279,12 +72,23 @@ int main(int argc, char* argv[]) {
return -1;
}
if (std::atoi(argv[6]) == 0) {
CpuInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
} else if (std::atoi(argv[6]) == 1) {
GpuInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
} else if (std::atoi(argv[6]) == 2) {
TrtInfer(argv[1], argv[2], argv[3], argv[4], argv[5]);
fastdeploy::RuntimeOption option;
int flag = std::atoi(argv[6]);
if (flag == 0) {
option.UseCpu();
} else if (flag == 1) {
option.UseGpu();
} else if (flag == 2) {
option.UseGpu();
option.UseTrtBackend();
}
std::string det_model_dir = argv[1];
std::string cls_model_dir = argv[2];
std::string rec_model_dir = argv[3];
std::string rec_label_file = argv[4];
std::string test_image = argv[5];
InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, test_image, option);
return 0;
}
}

View File

@@ -19,10 +19,9 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar.gz
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar xvf ch_PP-OCRv3_rec_infer.tar
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/doc/imgs/12.jpg
wget https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
#下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
@@ -33,9 +32,7 @@ python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2
# GPU推理
python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu
# GPU上使用TensorRT推理
python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --det_use_trt True --cls_use_trt True --rec_use_trt True
# OCR还支持det/cls/rec三个模型的组合使用例如当我们不想使用cls模型的时候只需要给--cls_model传入一个空的字符串, 例子如下:
python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model "" --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu
python infer.py --det_model ch_PP-OCRv3_det_infer --cls_model ch_ppocr_mobile_v2.0_cls_infer --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device gpu --backend trt
```
运行完成可视化结果如下图所示

View File

@@ -1,3 +1,17 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import cv2
import os
@@ -21,7 +35,6 @@ def parse_arguments():
"--rec_label_file",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
parser.add_argument(
@@ -30,112 +43,82 @@ def parse_arguments():
default='cpu',
help="Type of inference device, support 'cpu' or 'gpu'.")
parser.add_argument(
"--det_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--backend",
type=str,
default="default",
help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
)
parser.add_argument(
"--cls_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--device_id",
type=int,
default=0,
help="Define which GPU card used to run model.")
parser.add_argument(
"--rec_use_trt",
type=ast.literal_eval,
default=False,
help="Wether to use tensorrt.")
"--cpu_thread_num",
type=int,
default=9,
help="Number of threads while inference on CPU.")
return parser.parse_args()
def build_det_option(args):
def build_option(args):
option = fd.RuntimeOption()
if args.device.lower() == "gpu":
option.use_gpu()
option.use_gpu(0)
if args.det_use_trt:
option.set_cpu_thread_num(args.cpu_thread_num)
if args.backend.lower() == "trt":
assert args.device.lower(
) == "gpu", "TensorRT backend require inference on device GPU."
option.use_trt_backend()
#det_max_side_len 默认为960,当用户更改DET模型的max_side_len参数时请将此参数同时更改
det_max_side_len = 960
option.set_trt_input_shape("x", [1, 3, 50, 50], [1, 3, 640, 640],
[1, 3, det_max_side_len, det_max_side_len])
return option
def build_cls_option(args):
option = fd.RuntimeOption()
if args.device.lower() == "gpu":
option.use_gpu()
if args.cls_use_trt:
option.use_trt_backend()
option.set_trt_input_shape("x", [1, 3, 32, 100])
return option
def build_rec_option(args):
option = fd.RuntimeOption()
if args.device.lower() == "gpu":
option.use_gpu()
if args.rec_use_trt:
option.use_trt_backend()
option.set_trt_input_shape("x", [1, 3, 48, 10], [1, 3, 48, 320],
[1, 3, 48, 2000])
elif args.backend.lower() == "ort":
option.use_ort_backend()
elif args.backend.lower() == "paddle":
option.use_paddle_backend()
elif args.backend.lower() == "openvino":
assert args.device.lower(
) == "cpu", "OpenVINO backend require inference on device CPU."
option.use_openvino_backend()
return option
args = parse_arguments()
#Det模型
# Detection模型, 检测文字框
det_model_file = os.path.join(args.det_model, "inference.pdmodel")
det_params_file = os.path.join(args.det_model, "inference.pdiparams")
#Cls模型
# Classification模型方向分类可选
cls_model_file = os.path.join(args.cls_model, "inference.pdmodel")
cls_params_file = os.path.join(args.cls_model, "inference.pdiparams")
#Rec模型
# Recognition模型文字识别模型
rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
rec_label_file = args.rec_label_file
#默认
det_model = fd.vision.ocr.DBDetector()
cls_model = fd.vision.ocr.Classifier()
rec_model = fd.vision.ocr.Recognizer()
# 对于三个模型,均采用同样的部署配置
# 用户也可根据自行需求分别配置
runtime_option = build_option(args)
#模型初始化
if (len(args.det_model) != 0):
det_runtime_option = build_det_option(args)
det_model = fd.vision.ocr.DBDetector(
det_model_file, det_params_file, runtime_option=det_runtime_option)
det_model = fd.vision.ocr.DBDetector(
det_model_file, det_params_file, runtime_option=runtime_option)
cls_model = fd.vision.ocr.Classifier(
cls_model_file, cls_params_file, runtime_option=runtime_option)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file,
rec_params_file,
rec_label_file,
runtime_option=runtime_option)
if (len(args.cls_model) != 0):
cls_runtime_option = build_cls_option(args)
cls_model = fd.vision.ocr.Classifier(
cls_model_file, cls_params_file, runtime_option=cls_runtime_option)
if (len(args.rec_model) != 0):
rec_runtime_option = build_rec_option(args)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file,
rec_params_file,
rec_label_file,
runtime_option=rec_runtime_option)
ppocrsysv3 = fd.vision.ocr.PPOCRSystemv3(
ocr_det=det_model._model,
ocr_cls=cls_model._model,
ocr_rec=rec_model._model)
# 创建OCR系统串联3个模型其中cls_model可选如无需求可设置为None
ocr_system = fd.vision.ocr.PPOCRSystemv3(
det_model=det_model, cls_model=cls_model, rec_model=rec_model)
# 预测图片准备
im = cv2.imread(args.image)
#预测并打印结果
result = ppocrsysv3.predict(im)
result = ocr_system.predict(im)
print(result)

View File

@@ -124,12 +124,7 @@ else()
endif()
if(UNIX)
execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${OPENVINO_INSTALL_DIR}/lib
COMMAND sh -c "xargs patchelf --set-rpath '$ORIGIN'" WORKING_DIRECTORY ${OPENVINO_INSTALL_DIR}/lib
RESULT_VARIABLE result
OUTPUT_VARIABLE curr_out
ERROR_VARIABLE curr_out)
message(STATUS "result:${result} out:${curr_out}")
add_custom_target(patchelf_openvino ALL COMMAND bash -c "sh ${PROJECT_SOURCE_DIR}/build_scripts/patch_lib.sh ${OPENVINO_INSTALL_DIR}/lib" DEPENDS ${LIBRARY_NAME})
endif()
add_library(external_openvino STATIC IMPORTED GLOBAL)

View File

@@ -64,7 +64,7 @@ class RuntimeOption:
def use_cpu(self):
return self._option.use_cpu()
def set_cpu_thread_num(self, thread_num=8):
def set_cpu_thread_num(self, thread_num=-1):
return self._option.set_cpu_thread_num(thread_num)
def use_paddle_backend(self):

View File

@@ -217,18 +217,24 @@ class Recognizer(FastDeployModel):
class PPOCRSystemv3(FastDeployModel):
def __init__(self, ocr_det=None, ocr_cls=None, ocr_rec=None):
self._model = C.vision.ocr.PPOCRSystemv3(ocr_det, ocr_cls, ocr_rec)
def __init__(self, det_model=None, cls_model=None, rec_model=None):
assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None."
if cls_model is None:
self.system = C.vision.ocr.PPOCRSystemv3(det_model._model, rec_model._model)
else:
self.system = C.vision.ocr.PPOCRSystemv3(det_model._model, cls_model._model, rec_model._model)
def predict(self, input_image):
return self._model.predict(input_image)
return self.system.predict(input_image)
class PPOCRSystemv2(FastDeployModel):
def __init__(self, ocr_det=None, ocr_cls=None, ocr_rec=None):
self._model = C.vision.ocr.PPOCRSystemv2(ocr_det, ocr_cls, ocr_rec)
def __init__(self, det_model=None, cls_model=None, rec_model=None):
assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None."
if cls_model is None:
self.system = C.vision.ocr.PPOCRSystemv2(det_model._model, rec_model._model)
else:
self.system = C.vision.ocr.PPOCRSystemv2(det_model._model, cls_model._model, rec_model._model)
def predict(self, input_image):
return self._model.predict(input_image)
return self.system.predict(input_image)