// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/dbdetector.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" namespace fastdeploy { namespace vision { namespace ocr { DBDetector::DBDetector() {} DBDetector::DBDetector(const std::string& model_file, const std::string& params_file, const RuntimeOption& custom_option, const Frontend& model_format) { if (model_format == Frontend::ONNX) { valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; // 指定可用的CPU后端 valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 } else { valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; } runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; runtime_option.params_file = params_file; initialized = Initialize(); } // Init bool DBDetector::Initialize() { // pre&post process parameters max_side_len = 960; det_db_thresh = 0.3; det_db_box_thresh = 0.6; det_db_unclip_ratio = 1.5; det_db_score_mode = "slow"; use_dilation = false; mean = {0.485f, 0.456f, 0.406f}; scale = {0.229f, 0.224f, 0.225f}; is_scale = true; if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; } return true; } void OcrDetectorResizeImage(Mat* img, int max_size_len, float* ratio_h, float* ratio_w) { int w = img->Width(); int h = img->Height(); float ratio = 1.f; int max_wh = w >= h ? w : h; if (max_wh > max_size_len) { if (h > w) { ratio = float(max_size_len) / float(h); } else { ratio = float(max_size_len) / float(w); } } int resize_h = int(float(h) * ratio); int resize_w = int(float(w) * ratio); resize_h = std::max(int(std::round(float(resize_h) / 32) * 32), 32); resize_w = std::max(int(std::round(float(resize_w) / 32) * 32), 32); Resize::Run(img, resize_w, resize_h); *ratio_h = float(resize_h) / float(h); *ratio_w = float(resize_w) / float(w); } //预处理 bool DBDetector::Preprocess( Mat* mat, FDTensor* output, std::map>* im_info) { // Resize OcrDetectorResizeImage(mat, max_side_len, &ratio_h, &ratio_w); // Normalize Normalize::Run(mat, mean, scale, true); (*im_info)["output_shape"] = {static_cast(mat->Height()), static_cast(mat->Width())}; //-CHW HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); output->shape.insert(output->shape.begin(), 1); return true; } //后处理 bool DBDetector::Postprocess( FDTensor& infer_result, std::vector>* boxes_result, const std::map>& im_info) { std::vector output_shape = infer_result.shape; FDASSERT(output_shape[0] == 1, "Only support batch =1 now."); int n2 = output_shape[2]; int n3 = output_shape[3]; int n = n2 * n3; float* out_data = static_cast(infer_result.Data()); // prepare bitmap std::vector pred(n, 0.0); std::vector cbuf(n, ' '); for (int i = 0; i < n; i++) { pred[i] = float(out_data[i]); cbuf[i] = (unsigned char)((out_data[i]) * 255); } cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char*)cbuf.data()); cv::Mat pred_map(n2, n3, CV_32F, (float*)pred.data()); const double threshold = det_db_thresh * 255; const double maxvalue = 255; cv::Mat bit_map; cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); if (use_dilation) { cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::dilate(bit_map, bit_map, dila_ele); } std::vector>> boxes; boxes = post_processor_.BoxesFromBitmap(pred_map, bit_map, det_db_box_thresh, det_db_unclip_ratio, det_db_score_mode); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, im_info); // boxes to boxes_result for (int i = 0; i < boxes.size(); i++) { std::array new_box; int k = 0; for (auto& vec : boxes[i]) { for (auto& e : vec) { new_box[k++] = e; } } boxes_result->push_back(new_box); } return true; } //预测 bool DBDetector::Predict(cv::Mat* img, std::vector>* boxes_result) { Mat mat(*img); std::vector input_tensors(1); std::map> im_info; // Record the shape of image and the shape of preprocessed image im_info["input_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; im_info["output_shape"] = {static_cast(mat.Height()), static_cast(mat.Width())}; if (!Preprocess(&mat, &input_tensors[0], &im_info)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } input_tensors[0].name = InputInfoOfRuntime(0).name; std::vector output_tensors; if (!Infer(input_tensors, &output_tensors)) { FDERROR << "Failed to inference." << std::endl; return false; } if (!Postprocess(output_tensors[0], boxes_result, im_info)) { FDERROR << "Failed to post process." << std::endl; return false; } return true; } } // namesapce ocr } // namespace vision } // namespace fastdeploy