mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
Add PaddleOCRv3 & PaddleOCRv2 Support (#139)
* Add PaddleOCR Support * Add PaddleOCR Support * Add PaddleOCRv3 Support * Add PaddleOCRv3 Support * Update README.md * Update README.md * Update README.md * Update README.md * Add PaddleOCRv3 Support * Add PaddleOCRv3 Supports * Add PaddleOCRv3 Suport * Fix Rec diff * Remove useless functions * Remove useless comments * Add PaddleOCRv2 Support
This commit is contained in:
207
csrc/fastdeploy/vision/ocr/ppocr/recognizer.cc
Normal file
207
csrc/fastdeploy/vision/ocr/ppocr/recognizer.cc
Normal file
@@ -0,0 +1,207 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
||||
#include "fastdeploy/utils/perf.h"
|
||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
namespace ocr {
|
||||
|
||||
std::vector<std::string> ReadDict(const std::string& path) {
|
||||
std::ifstream in(path);
|
||||
std::string line;
|
||||
std::vector<std::string> m_vec;
|
||||
if (in) {
|
||||
while (getline(in, line)) {
|
||||
m_vec.push_back(line);
|
||||
}
|
||||
} else {
|
||||
std::cout << "no such label file: " << path << ", exit the program..."
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
return m_vec;
|
||||
}
|
||||
|
||||
Recognizer::Recognizer() {}
|
||||
|
||||
Recognizer::Recognizer(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const std::string& label_path,
|
||||
const RuntimeOption& custom_option,
|
||||
const Frontend& model_format) {
|
||||
if (model_format == Frontend::ONNX) {
|
||||
valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端
|
||||
valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端
|
||||
} else {
|
||||
// NOTE:此模型暂不支持paddle-inference-Gpu推理
|
||||
valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
|
||||
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
||||
}
|
||||
|
||||
runtime_option = custom_option;
|
||||
runtime_option.model_format = model_format;
|
||||
runtime_option.model_file = model_file;
|
||||
runtime_option.params_file = params_file;
|
||||
// Recognizer在使用CPU推理,并把PaddleInference作为推理后端时,需要删除以下2个pass//
|
||||
runtime_option.DeletePaddleBackendPass("matmul_transpose_reshape_fuse_pass");
|
||||
runtime_option.DeletePaddleBackendPass(
|
||||
"matmul_transpose_reshape_mkldnn_fuse_pass");
|
||||
|
||||
initialized = Initialize();
|
||||
|
||||
// init label_lsit
|
||||
label_list = ReadDict(label_path);
|
||||
label_list.insert(label_list.begin(), "#"); // blank char for ctc
|
||||
label_list.push_back(" ");
|
||||
}
|
||||
|
||||
// Init
|
||||
bool Recognizer::Initialize() {
|
||||
// pre&post process parameters
|
||||
rec_batch_num = 1;
|
||||
rec_img_h = 48;
|
||||
rec_img_w = 320;
|
||||
rec_image_shape = {3, rec_img_h, rec_img_w};
|
||||
|
||||
mean = {0.5f, 0.5f, 0.5f};
|
||||
scale = {0.5f, 0.5f, 0.5f};
|
||||
is_scale = true;
|
||||
|
||||
if (!InitRuntime()) {
|
||||
FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OcrRecognizerResizeImage(Mat* mat, float wh_ratio,
|
||||
const std::vector<int>& rec_image_shape) {
|
||||
int imgC, imgH, imgW;
|
||||
imgC = rec_image_shape[0];
|
||||
imgH = rec_image_shape[1];
|
||||
imgW = rec_image_shape[2];
|
||||
|
||||
imgW = int(imgH * wh_ratio);
|
||||
|
||||
float ratio = float(mat->Width()) / float(mat->Height());
|
||||
int resize_w;
|
||||
if (ceilf(imgH * ratio) > imgW)
|
||||
resize_w = imgW;
|
||||
else
|
||||
resize_w = int(ceilf(imgH * ratio));
|
||||
|
||||
Resize::Run(mat, resize_w, imgH);
|
||||
|
||||
std::vector<float> value = {127, 127, 127};
|
||||
Pad::Run(mat, 0, 0, 0, int(imgW - mat->Width()), value);
|
||||
}
|
||||
|
||||
//预处理
|
||||
bool Recognizer::Preprocess(Mat* mat, FDTensor* output,
|
||||
const std::vector<int>& rec_image_shape) {
|
||||
int imgH = rec_image_shape[1];
|
||||
int imgW = rec_image_shape[2];
|
||||
float wh_ratio = imgW * 1.0 / imgH;
|
||||
|
||||
float ori_wh_ratio = mat->Width() * 1.0 / mat->Height();
|
||||
wh_ratio = std::max(wh_ratio, ori_wh_ratio);
|
||||
|
||||
OcrRecognizerResizeImage(mat, wh_ratio, rec_image_shape);
|
||||
|
||||
Normalize::Run(mat, mean, scale, true);
|
||||
|
||||
HWC2CHW::Run(mat);
|
||||
Cast::Run(mat, "float");
|
||||
|
||||
mat->ShareWithTensor(output);
|
||||
output->shape.insert(output->shape.begin(), 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//后处理
|
||||
bool Recognizer::Postprocess(FDTensor& infer_result, std::string& rec_texts,
|
||||
float& rec_text_scores) {
|
||||
std::vector<int64_t> output_shape = infer_result.shape;
|
||||
FDASSERT(output_shape[0] == 1, "Only support batch =1 now.");
|
||||
|
||||
float* out_data = static_cast<float*>(infer_result.Data());
|
||||
|
||||
std::string str_res;
|
||||
int argmax_idx;
|
||||
int last_index = 0;
|
||||
float score = 0.f;
|
||||
int count = 0;
|
||||
float max_value = 0.0f;
|
||||
|
||||
for (int n = 0; n < output_shape[1]; n++) {
|
||||
argmax_idx = int(
|
||||
std::distance(&out_data[n * output_shape[2]],
|
||||
std::max_element(&out_data[n * output_shape[2]],
|
||||
&out_data[(n + 1) * output_shape[2]])));
|
||||
|
||||
max_value = float(*std::max_element(&out_data[n * output_shape[2]],
|
||||
&out_data[(n + 1) * output_shape[2]]));
|
||||
|
||||
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
|
||||
score += max_value;
|
||||
count += 1;
|
||||
str_res += label_list[argmax_idx];
|
||||
}
|
||||
last_index = argmax_idx;
|
||||
}
|
||||
|
||||
score /= count;
|
||||
|
||||
rec_texts = str_res;
|
||||
rec_text_scores = score;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//预测
|
||||
bool Recognizer::Predict(cv::Mat* img, std::string& rec_texts,
|
||||
float& rec_text_scores) {
|
||||
Mat mat(*img);
|
||||
|
||||
std::vector<FDTensor> input_tensors(1);
|
||||
|
||||
if (!Preprocess(&mat, &input_tensors[0], rec_image_shape)) {
|
||||
FDERROR << "Failed to preprocess input image." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
input_tensors[0].name = InputInfoOfRuntime(0).name;
|
||||
std::vector<FDTensor> output_tensors;
|
||||
|
||||
if (!Infer(input_tensors, &output_tensors)) {
|
||||
FDERROR << "Failed to inference." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Postprocess(output_tensors[0], rec_texts, rec_text_scores)) {
|
||||
FDERROR << "Failed to post process." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namesapce ocr
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
Reference in New Issue
Block a user