Files
FastDeploy/fastdeploy/vision/ocr/ppocr/dbdetector.cc
DefTruth a51e5a6e55 [Android] Add android aar package (#416)
* [Android] Add Android build docs and demo (#26)

* [Backend] Add override flag to lite backend

* [Docs] Add Android C++ SDK build docs

* [Doc] fix android_build_docs typos

* Update CMakeLists.txt

* Update android.md

* [Doc] Add PicoDet Android demo docs

* [Doc] Update PicoDet Andorid demo docs

* [Doc] Update PaddleClasModel Android demo docs

* [Doc] Update fastdeploy android jni docs

* [Doc] Update fastdeploy android jni usage docs

* [Android] init fastdeploy android jar package

* [Backend] support int8 option for lite backend

* [Model] add Backend::Lite to paddle model

* [Backend] use CopyFromCpu for lite backend.

* [Android] package jni srcs and java api into aar

* Update infer.cc

* Update infer.cc

* [Android] Update package build.gradle

* [Android] Update android app examples

* [Android] update android detection app
2022-10-26 17:01:14 +08:00

204 lines
6.1 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
DBDetector::DBDetector() {}
DBDetector::DBDetector(const std::string& model_file,
const std::string& params_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT,
Backend::OPENVINO};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
}
runtime_option = custom_option;
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
runtime_option.params_file = params_file;
initialized = Initialize();
}
// Init
bool DBDetector::Initialize() {
// pre&post process parameters
max_side_len = 960;
det_db_thresh = 0.3;
det_db_box_thresh = 0.6;
det_db_unclip_ratio = 1.5;
det_db_score_mode = "slow";
use_dilation = false;
mean = {0.485f, 0.456f, 0.406f};
scale = {0.229f, 0.224f, 0.225f};
is_scale = true;
if (!InitRuntime()) {
FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
return false;
}
return true;
}
void OcrDetectorResizeImage(Mat* img, int max_size_len, float* ratio_h,
float* ratio_w) {
int w = img->Width();
int h = img->Height();
float ratio = 1.f;
int max_wh = w >= h ? w : h;
if (max_wh > max_size_len) {
if (h > w) {
ratio = float(max_size_len) / float(h);
} else {
ratio = float(max_size_len) / float(w);
}
}
int resize_h = int(float(h) * ratio);
int resize_w = int(float(w) * ratio);
resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32);
resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32);
Resize::Run(img, resize_w, resize_h);
*ratio_h = float(resize_h) / float(h);
*ratio_w = float(resize_w) / float(w);
}
bool DBDetector::Preprocess(
Mat* mat, FDTensor* output,
std::map<std::string, std::array<float, 2>>* im_info) {
// Resize
OcrDetectorResizeImage(mat, max_side_len, &ratio_h, &ratio_w);
// Normalize
Normalize::Run(mat, mean, scale, true);
(*im_info)["output_shape"] = {static_cast<float>(mat->Height()),
static_cast<float>(mat->Width())};
//-CHW
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
mat->ShareWithTensor(output);
output->shape.insert(output->shape.begin(), 1);
return true;
}
bool DBDetector::Postprocess(
FDTensor& infer_result, std::vector<std::array<int, 8>>* boxes_result,
const std::map<std::string, std::array<float, 2>>& im_info) {
std::vector<int64_t> output_shape = infer_result.shape;
FDASSERT(output_shape[0] == 1, "Only support batch =1 now.");
int n2 = output_shape[2];
int n3 = output_shape[3];
int n = n2 * n3;
float* out_data = static_cast<float*>(infer_result.Data());
// prepare bitmap
std::vector<float> pred(n, 0.0);
std::vector<unsigned char> cbuf(n, ' ');
for (int i = 0; i < n; i++) {
pred[i] = float(out_data[i]);
cbuf[i] = (unsigned char)((out_data[i]) * 255);
}
cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char*)cbuf.data());
cv::Mat pred_map(n2, n3, CV_32F, (float*)pred.data());
const double threshold = det_db_thresh * 255;
const double maxvalue = 255;
cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
if (use_dilation) {
cv::Mat dila_ele =
cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(bit_map, bit_map, dila_ele);
}
std::vector<std::vector<std::vector<int>>> boxes;
boxes =
post_processor_.BoxesFromBitmap(pred_map, bit_map, det_db_box_thresh,
det_db_unclip_ratio, det_db_score_mode);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, im_info);
// boxes to boxes_result
for (int i = 0; i < boxes.size(); i++) {
std::array<int, 8> new_box;
int k = 0;
for (auto& vec : boxes[i]) {
for (auto& e : vec) {
new_box[k++] = e;
}
}
boxes_result->push_back(new_box);
}
return true;
}
bool DBDetector::Predict(cv::Mat* img,
std::vector<std::array<int, 8>>* boxes_result) {
Mat mat(*img);
std::vector<FDTensor> input_tensors(1);
std::map<std::string, std::array<float, 2>> im_info;
// Record the shape of image and the shape of preprocessed image
im_info["input_shape"] = {static_cast<float>(mat.Height()),
static_cast<float>(mat.Width())};
im_info["output_shape"] = {static_cast<float>(mat.Height()),
static_cast<float>(mat.Width())};
if (!Preprocess(&mat, &input_tensors[0], &im_info)) {
FDERROR << "Failed to preprocess input image." << std::endl;
return false;
}
input_tensors[0].name = InputInfoOfRuntime(0).name;
std::vector<FDTensor> output_tensors;
if (!Infer(input_tensors, &output_tensors)) {
FDERROR << "Failed to inference." << std::endl;
return false;
}
if (!Postprocess(output_tensors[0], boxes_result, im_info)) {
FDERROR << "Failed to post process." << std::endl;
return false;
}
return true;
}
} // namesapce ocr
} // namespace vision
} // namespace fastdeploy