Files
FastDeploy/fastdeploy/vision/detection/ppdet/mask_rcnn.cc
2022-09-22 13:24:05 +08:00

121 lines
4.8 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/detection/ppdet/mask_rcnn.h"
namespace fastdeploy {
namespace vision {
namespace detection {
MaskRCNN::MaskRCNN(const std::string& model_file,
const std::string& params_file,
const std::string& config_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
config_file_ = config_file;
valid_cpu_backends = {Backend::PDINFER};
valid_gpu_backends = {Backend::PDINFER};
runtime_option = custom_option;
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
runtime_option.params_file = params_file;
initialized = Initialize();
}
bool MaskRCNN::Postprocess(std::vector<FDTensor>& infer_result,
DetectionResult* result) {
// index 0: bbox_data [N, 6] float32
// index 1: bbox_num [B=1] int32
// index 2: mask_data [N, h, w] int32
FDASSERT(infer_result[1].shape[0] == 1,
"Only support batch = 1 in FastDeploy now.");
FDASSERT(infer_result.size() == 3,
"The infer_result must contains 3 otuput Tensors, but found %lu",
infer_result.size());
FDTensor& box_tensor = infer_result[0];
FDTensor& box_num_tensor = infer_result[1];
FDTensor& mask_tensor = infer_result[2];
int box_num = 0;
if (box_num_tensor.dtype == FDDataType::INT32) {
box_num = *(static_cast<int32_t*>(box_num_tensor.Data()));
} else if (box_num_tensor.dtype == FDDataType::INT64) {
box_num = *(static_cast<int64_t*>(box_num_tensor.Data()));
} else {
FDASSERT(false,
"The output box_num of PaddleDetection/MaskRCNN model should be "
"type of int32/int64.");
}
if (box_num <= 0) {
return true; // no object detected.
}
result->Resize(box_num);
float* box_data = static_cast<float*>(box_tensor.Data());
for (size_t i = 0; i < box_num; ++i) {
result->label_ids[i] = static_cast<int>(box_data[i * 6]);
result->scores[i] = box_data[i * 6 + 1];
result->boxes[i] =
std::array<float, 4>{box_data[i * 6 + 2], box_data[i * 6 + 3],
box_data[i * 6 + 4], box_data[i * 6 + 5]};
}
result->contain_masks = true;
// TODO(qiuyanjun): Cast int64/int8 to int32.
FDASSERT(mask_tensor.dtype == FDDataType::INT32,
"The dtype of mask Tensor must be int32 now!");
// In PaddleDetection/MaskRCNN, the mask_h and mask_w
// are already aligned with original input image. So,
// we need to crop it from output mask according to
// the detected bounding box.
// +-----------------------+
// | x1,y1 |
// | +---------------+ |
// | | | |
// | | Crop | |
// | | | |
// | | | |
// | +---------------+ |
// | x2,y2 |
// +-----------------------+
int64_t out_mask_h = mask_tensor.shape[1];
int64_t out_mask_w = mask_tensor.shape[2];
int64_t out_mask_numel = out_mask_h * out_mask_w;
int32_t* out_mask_data = static_cast<int32_t*>(mask_tensor.Data());
for (size_t i = 0; i < box_num; ++i) {
// crop instance mask according to box
int64_t x1 = static_cast<int64_t>(result->boxes[i][0]);
int64_t y1 = static_cast<int64_t>(result->boxes[i][1]);
int64_t x2 = static_cast<int64_t>(result->boxes[i][2]);
int64_t y2 = static_cast<int64_t>(result->boxes[i][3]);
int64_t keep_mask_h = y2 - y1;
int64_t keep_mask_w = x2 - x1;
int64_t keep_mask_numel = keep_mask_h * keep_mask_w;
result->masks[i].Resize(keep_mask_numel); // int32
result->masks[i].shape = {keep_mask_h, keep_mask_w};
int32_t* mask_start_ptr = out_mask_data + i * out_mask_numel;
int32_t* keep_mask_ptr = static_cast<int32_t*>(result->masks[i].Data());
for (size_t row = y1; row < y2; ++row) {
size_t keep_nbytes_in_col = keep_mask_w * sizeof(int32_t);
int32_t* out_row_start_ptr = mask_start_ptr + row * out_mask_w + x1;
int32_t* keep_row_start_ptr = keep_mask_ptr + (row - y1) * keep_mask_w;
std::memcpy(keep_row_start_ptr, out_row_start_ptr, keep_nbytes_in_col);
}
}
return true;
}
} // namespace detection
} // namespace vision
} // namespace fastdeploy