// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/recognizer.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" namespace fastdeploy { namespace vision { namespace ocr { std::vector ReadDict(const std::string& path) { std::ifstream in(path); std::string line; std::vector m_vec; if (in) { while (getline(in, line)) { m_vec.push_back(line); } } else { std::cout << "no such label file: " << path << ", exit the program..." << std::endl; exit(1); } return m_vec; } Recognizer::Recognizer() {} Recognizer::Recognizer(const std::string& model_file, const std::string& params_file, const std::string& label_path, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; // 指定可用的CPU后端 valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 } else { // NOTE:此模型暂不支持paddle-inference-Gpu推理 valid_cpu_backends = {Backend::ORT, Backend::PDINFER, Backend::OPENVINO}; valid_gpu_backends = {Backend::ORT, Backend::TRT}; } runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; runtime_option.params_file = params_file; // Recognizer在使用CPU推理,并把PaddleInference作为推理后端时,需要删除以下2个pass// runtime_option.DeletePaddleBackendPass("matmul_transpose_reshape_fuse_pass"); runtime_option.DeletePaddleBackendPass( "matmul_transpose_reshape_mkldnn_fuse_pass"); initialized = Initialize(); // init label_lsit label_list = ReadDict(label_path); label_list.insert(label_list.begin(), "#"); // blank char for ctc label_list.push_back(" "); } // Init bool Recognizer::Initialize() { // pre&post process parameters rec_batch_num = 1; rec_img_h = 48; rec_img_w = 320; rec_image_shape = {3, rec_img_h, rec_img_w}; mean = {0.5f, 0.5f, 0.5f}; scale = {0.5f, 0.5f, 0.5f}; is_scale = true; if (!InitRuntime()) { FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; } return true; } void OcrRecognizerResizeImage(Mat* mat, const float& wh_ratio, const std::vector& rec_image_shape) { int imgC, imgH, imgW; imgC = rec_image_shape[0]; imgH = rec_image_shape[1]; imgW = rec_image_shape[2]; imgW = int(imgH * wh_ratio); float ratio = float(mat->Width()) / float(mat->Height()); int resize_w; if (ceilf(imgH * ratio) > imgW) resize_w = imgW; else resize_w = int(ceilf(imgH * ratio)); Resize::Run(mat, resize_w, imgH); std::vector value = {127, 127, 127}; Pad::Run(mat, 0, 0, 0, int(imgW - mat->Width()), value); } //预处理 bool Recognizer::Preprocess(Mat* mat, FDTensor* output, const std::vector& rec_image_shape) { int imgH = rec_image_shape[1]; int imgW = rec_image_shape[2]; float wh_ratio = imgW * 1.0 / imgH; float ori_wh_ratio = mat->Width() * 1.0 / mat->Height(); wh_ratio = std::max(wh_ratio, ori_wh_ratio); OcrRecognizerResizeImage(mat, wh_ratio, rec_image_shape); Normalize::Run(mat, mean, scale, true); HWC2CHW::Run(mat); Cast::Run(mat, "float"); mat->ShareWithTensor(output); output->shape.insert(output->shape.begin(), 1); return true; } //后处理 bool Recognizer::Postprocess(FDTensor& infer_result, std::tuple* rec_result) { std::vector output_shape = infer_result.shape; FDASSERT(output_shape[0] == 1, "Only support batch =1 now."); float* out_data = static_cast(infer_result.Data()); std::string str_res; int argmax_idx; int last_index = 0; float score = 0.f; int count = 0; float max_value = 0.0f; for (int n = 0; n < output_shape[1]; n++) { argmax_idx = int( std::distance(&out_data[n * output_shape[2]], std::max_element(&out_data[n * output_shape[2]], &out_data[(n + 1) * output_shape[2]]))); max_value = float(*std::max_element(&out_data[n * output_shape[2]], &out_data[(n + 1) * output_shape[2]])); if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { score += max_value; count += 1; str_res += label_list[argmax_idx]; } last_index = argmax_idx; } score /= count; std::get<0>(*rec_result) = str_res; std::get<1>(*rec_result) = score; return true; } //预测 bool Recognizer::Predict(cv::Mat* img, std::tuple* rec_result) { Mat mat(*img); std::vector input_tensors(1); if (!Preprocess(&mat, &input_tensors[0], rec_image_shape)) { FDERROR << "Failed to preprocess input image." << std::endl; return false; } input_tensors[0].name = InputInfoOfRuntime(0).name; std::vector output_tensors; if (!Infer(input_tensors, &output_tensors)) { FDERROR << "Failed to inference." << std::endl; return false; } if (!Postprocess(output_tensors[0], rec_result)) { FDERROR << "Failed to post process." << std::endl; return false; } return true; } } // namesapce ocr } // namespace vision } // namespace fastdeploy