Files
FastDeploy/paddle2onnx/mapper/detection/multiclass_nms.cc
Jason 6343b0db47 [Build] Support build with source code of Paddle2ONNX (#1559)
* Add notes for tensors

* Optimize some apis

* move some warnings

* Support build with Paddle2ONNX

* Add protobuf support

* Fix compile on mac

* add clearn package script

* Add paddle2onnx code

* remove submodule

* Add onnx ocde

* remove softlink

* add onnx code

* fix error

* Add cmake file

* fix patchelf

* update paddle2onnx

* Delete .gitmodules

---------

Co-authored-by: PaddleCI <paddle_ci@example.com>
Co-authored-by: pangyoki <pangyoki@126.com>
Co-authored-by: jiangjiajun <jiangjiajun@baidu.lcom>
2023-03-17 10:03:22 +08:00

361 lines
15 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle2onnx/mapper/detection/multiclass_nms.h"
namespace paddle2onnx {
REGISTER_MAPPER(multiclass_nms3, NMSMapper);
int32_t NMSMapper::GetMinOpset(bool verbose) {
auto boxes_info = GetInput("BBoxes");
auto score_info = GetInput("Scores");
if (score_info[0].Rank() != 3) {
Error() << "Lod Tensor input is not supported, which means the shape of "
"input(scores) is [M, C] now, but Paddle2ONNX only support [N, "
"C, M]."
<< std::endl;
return -1;
}
if (boxes_info[0].Rank() != 3) {
Error() << "Only support input boxes as 3-D Tensor, but now it's rank is "
<< boxes_info[0].Rank() << "." << std::endl;
return -1;
}
if (score_info[0].shape[1] <= 0) {
Error() << "The 2nd-dimension of score should be fixed(means the number of "
"classes), but now it's "
<< score_info[0].shape[1] << "." << std::endl;
return -1;
}
if (export_as_custom_op || this->deploy_backend == "tensorrt") {
return 7;
}
Logger(verbose, 10) << RequireOpset(10) << std::endl;
return 10;
}
void NMSMapper::KeepTopK(const std::string& selected_indices) {
auto boxes_info = GetInput("BBoxes");
auto score_info = GetInput("Scores");
auto out_info = GetOutput("Out");
auto index_info = GetOutput("Index");
auto num_rois_info = GetOutput("NmsRoisNum");
auto value_0 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(0));
auto value_1 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(1));
auto value_2 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(2));
auto value_neg_1 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(-1));
auto class_id = helper_->MakeNode("Gather", {selected_indices, value_1});
AddAttribute(class_id, "axis", int64_t(1));
auto box_id = helper_->MakeNode("Gather", {selected_indices, value_2});
AddAttribute(box_id, "axis", int64_t(1));
auto filtered_class_id = class_id->output(0);
auto filtered_box_id = box_id->output(0);
if (background_label_ >= 0) {
auto filter_indices = MapperHelper::Get()->GenName("nms.filter_background");
auto squeezed_class_id =
helper_->Squeeze(class_id->output(0), std::vector<int64_t>(1, 1));
if (background_label_ > 0) {
auto background = helper_->Constant(
{1}, ONNX_NAMESPACE::TensorProto::INT64, background_label_);
auto diff = helper_->MakeNode("Sub", {squeezed_class_id, background});
helper_->MakeNode("NonZero", {diff->output(0)}, {filter_indices});
} else if (background_label_ == 0) {
helper_->MakeNode("NonZero", {squeezed_class_id}, {filter_indices});
}
auto new_class_id =
helper_->MakeNode("Gather", {filtered_class_id, filter_indices});
AddAttribute(new_class_id, "axis", int64_t(0));
auto new_box_id =
helper_->MakeNode("Gather", {box_id->output(0), filter_indices});
AddAttribute(new_box_id, "axis", int64_t(0));
filtered_class_id = new_class_id->output(0);
filtered_box_id = new_box_id->output(0);
}
// Here is a little complicated
// Since we need to gather all the scores for the final boxes to filter the
// top-k boxes Now we have the follow inputs
// - scores: [N, C, M] N means batch size(but now it will be regarded as
// 1); C means number of classes; M means number of boxes for each classes
// - selected_indices: [num_selected_indices, 3], and 3 means [batch,
// class_id, box_id]. We will use this inputs to gather score
// So now we will first flatten `scores` to shape of [1 * C * M], then we
// gather scores by each elements in `selected_indices` The index need be
// calculated as
// `gather_index = class_id * M + box_id`
auto flatten_score = helper_->Flatten(score_info[0].name);
auto num_boxes_each_class = helper_->Constant(
{1}, ONNX_NAMESPACE::TensorProto::INT64, score_info[0].shape[2]);
auto gather_indices_0 =
helper_->MakeNode("Mul", {filtered_class_id, num_boxes_each_class});
auto gather_indices_1 =
helper_->MakeNode("Add", {gather_indices_0->output(0), filtered_box_id});
auto gather_indices = helper_->Flatten(gather_indices_1->output(0));
auto gathered_scores =
helper_->MakeNode("Gather", {flatten_score, gather_indices});
AddAttribute(gathered_scores, "axis", int64_t(0));
// Now we will perform keep_top_k process
// First we need to check if the number of remaining boxes is greater than
// keep_top_k Otherwise, we will downgrade the keep_top_k to number of
// remaining boxes
auto final_classes = filtered_class_id;
auto final_boxes_id = filtered_box_id;
auto final_scores = gathered_scores->output(0);
if (keep_top_k_ > 0) {
// get proper topk
auto shape_of_scores = helper_->MakeNode("Shape", {final_scores});
auto num_of_boxes =
helper_->Slice(shape_of_scores->output(0), std::vector<int64_t>(1, 0),
std::vector<int64_t>(1, 0), std::vector<int64_t>(1, 1));
auto top_k =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, keep_top_k_);
auto ensemble_value = helper_->MakeNode("Concat", {num_of_boxes, top_k});
AddAttribute(ensemble_value, "axis", int64_t(0));
auto new_top_k =
helper_->MakeNode("ReduceMin", {ensemble_value->output(0)});
AddAttribute(new_top_k, "axes", std::vector<int64_t>(1, 0));
AddAttribute(new_top_k, "keepdims", int64_t(1));
// the output is topk_scores, topk_score_indices
auto topk_node =
helper_->MakeNode("TopK", {final_scores, new_top_k->output(0)}, 2);
auto topk_scores =
helper_->MakeNode("Gather", {final_scores, topk_node->output(1)});
AddAttribute(topk_scores, "axis", int64_t(0));
filtered_class_id =
helper_->MakeNode("Squeeze", {filtered_class_id})->output(0);
auto topk_classes =
helper_->MakeNode("Gather", {filtered_class_id, topk_node->output(1)});
AddAttribute(topk_classes, "axis", int64_t(0));
filtered_box_id =
helper_->MakeNode("Squeeze", {filtered_box_id})->output(0);
auto topk_boxes_id =
helper_->MakeNode("Gather", {filtered_box_id, topk_node->output(1)});
AddAttribute(topk_boxes_id, "axis", int64_t(0));
final_boxes_id = topk_boxes_id->output(0);
final_scores = topk_scores->output(0);
final_classes = topk_classes->output(0);
}
auto flatten_boxes_id = helper_->Flatten({final_boxes_id});
auto gathered_selected_boxes =
helper_->MakeNode("Gather", {boxes_info[0].name, flatten_boxes_id});
AddAttribute(gathered_selected_boxes, "axis", int64_t(1));
auto float_classes = helper_->MakeNode("Cast", {final_classes});
AddAttribute(float_classes, "to", ONNX_NAMESPACE::TensorProto::FLOAT);
std::vector<int64_t> shape{1, -1, 1};
auto unsqueezed_scores = helper_->Reshape({final_scores}, shape);
auto unsqueezed_class = helper_->Reshape({float_classes->output(0)}, shape);
auto box_result =
helper_->MakeNode("Concat", {unsqueezed_class, unsqueezed_scores,
gathered_selected_boxes->output(0)});
AddAttribute(box_result, "axis", int64_t(2));
helper_->Squeeze({box_result->output(0)}, {out_info[0].name},
std::vector<int64_t>(1, 0));
// other outputs, we don't use sometimes
// there's lots of Cast in exporting
// TODO(jiangjiajun) A pass to eleminate all the useless Cast is needed
auto reshaped_index_result =
helper_->Reshape({flatten_boxes_id}, {int64_t(-1), int64_t(1)});
auto index_result =
helper_->MakeNode("Cast", {reshaped_index_result}, {index_info[0].name});
AddAttribute(index_result, "to", GetOnnxDtype(index_info[0].dtype));
auto out_box_shape = helper_->MakeNode("Shape", {out_info[0].name});
auto num_rois_result =
helper_->Slice({out_box_shape->output(0)}, std::vector<int64_t>(1, 0),
std::vector<int64_t>(1, 0), std::vector<int64_t>(1, 1));
auto int32_num_rois_result =
helper_->AutoCast(num_rois_result, num_rois_info[0].name,
P2ODataType::INT64, num_rois_info[0].dtype);
}
void NMSMapper::Opset10() {
if (this->deploy_backend == "tensorrt") {
return ExportForTensorRT();
}
auto boxes_info = GetInput("BBoxes");
auto score_info = GetInput("Scores");
if (boxes_info[0].shape[0] != 1) {
Warn()
<< "[WARNING] Due to the operator multiclass_nms3, the exported ONNX "
"model will only supports inference with input batch_size == 1."
<< std::endl;
}
int64_t num_classes = score_info[0].shape[1];
auto score_threshold = helper_->Constant(
{1}, ONNX_NAMESPACE::TensorProto::FLOAT, score_threshold_);
auto nms_threshold = helper_->Constant(
{1}, ONNX_NAMESPACE::TensorProto::FLOAT, nms_threshold_);
auto nms_top_k =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, nms_top_k_);
auto selected_box_index = MapperHelper::Get()->GenName("nms.selected_index");
if (normalized_) {
helper_->MakeNode("NonMaxSuppression",
{boxes_info[0].name, score_info[0].name, nms_top_k,
nms_threshold, score_threshold},
{selected_box_index});
} else {
auto value_1 =
helper_->Constant({1}, GetOnnxDtype(boxes_info[0].dtype), float(1.0));
auto split_boxes = helper_->Split(boxes_info[0].name,
std::vector<int64_t>(4, 1), int64_t(2));
auto xmax = helper_->MakeNode("Add", {split_boxes[2], value_1});
auto ymax = helper_->MakeNode("Add", {split_boxes[3], value_1});
auto new_boxes = helper_->MakeNode(
"Concat",
{split_boxes[0], split_boxes[1], xmax->output(0), ymax->output(0)});
AddAttribute(new_boxes, "axis", int64_t(2));
helper_->MakeNode("NonMaxSuppression",
{new_boxes->output(0), score_info[0].name, nms_top_k,
nms_threshold, score_threshold},
{selected_box_index});
}
KeepTopK(selected_box_index);
}
void NMSMapper::ExportAsCustomOp() {
auto boxes_info = GetInput("BBoxes");
auto score_info = GetInput("Scores");
auto out_info = GetOutput("Out");
auto index_info = GetOutput("Index");
auto num_rois_info = GetOutput("NmsRoisNum");
auto node = helper_->MakeNode(
custom_op_name, {boxes_info[0].name, score_info[0].name},
{out_info[0].name, index_info[0].name, num_rois_info[0].name});
node->set_domain("Paddle");
int64_t normalized = normalized_ ? 1 : 0;
AddAttribute(node, "normalized", normalized);
AddAttribute(node, "nms_threshold", nms_threshold_);
AddAttribute(node, "score_threshold", score_threshold_);
AddAttribute(node, "nms_eta", nms_eta_);
AddAttribute(node, "nms_top_k", nms_top_k_);
AddAttribute(node, "background_label", background_label_);
AddAttribute(node, "keep_top_k", keep_top_k_);
helper_->MakeValueInfo(boxes_info[0].name, boxes_info[0].dtype,
boxes_info[0].shape);
helper_->MakeValueInfo(score_info[0].name, score_info[0].dtype,
score_info[0].shape);
helper_->MakeValueInfo(out_info[0].name, out_info[0].dtype,
out_info[0].shape);
helper_->MakeValueInfo(index_info[0].name, index_info[0].dtype,
index_info[0].shape);
helper_->MakeValueInfo(num_rois_info[0].name, num_rois_info[0].dtype,
num_rois_info[0].shape);
}
void NMSMapper::ExportForTensorRT() {
auto boxes_info = GetInput("BBoxes");
auto score_info = GetInput("Scores");
auto out_info = GetOutput("Out");
auto index_info = GetOutput("Index");
auto num_rois_info = GetOutput("NmsRoisNum");
auto scores = helper_->Transpose(score_info[0].name, {0, 2, 1});
auto boxes = helper_->Unsqueeze(boxes_info[0].name, {2});
int64_t num_classes = score_info[0].shape[1];
auto repeats =
helper_->Constant(GetOnnxDtype(P2ODataType::INT64),
std::vector<int64_t>({1, 1, num_classes, 1}));
boxes = helper_->MakeNode("Tile", {boxes, repeats})->output(0);
auto nms_node =
helper_->MakeNode("BatchedNMSDynamic_TRT", {boxes, scores}, 4);
AddAttribute(nms_node, "shareLocation", int64_t(0));
AddAttribute(nms_node, "backgroundLabelId", background_label_);
AddAttribute(nms_node, "numClasses", num_classes);
int64_t nms_top_k = nms_top_k_;
int64_t keep_top_k = keep_top_k_;
if (nms_top_k > 4096) {
Warn()
<< "Paramter nms_top_k:" << nms_top_k
<< " is exceed limit in TensorRT BatchedNMS plugin, will force to 4096."
<< std::endl;
nms_top_k = 4096;
}
if (keep_top_k > 4096) {
Warn()
<< "Parameter keep_top_k:" << keep_top_k
<< " is exceed limit in TensorRT BatchedNMS plugin, will force to 4096."
<< std::endl;
keep_top_k = 4096;
}
AddAttribute(nms_node, "topK", nms_top_k);
AddAttribute(nms_node, "keepTopK", keep_top_k);
AddAttribute(nms_node, "scoreThreshold", score_threshold_);
AddAttribute(nms_node, "iouThreshold", nms_threshold_);
if (normalized_) {
AddAttribute(nms_node, "isNormalized", int64_t(1));
} else {
AddAttribute(nms_node, "isNormalized", int64_t(0));
}
AddAttribute(nms_node, "clipBoxes", int64_t(0));
nms_node->set_domain("Paddle");
auto num_rois = helper_->Reshape(nms_node->output(0), {-1});
helper_->AutoCast(num_rois, num_rois_info[0].name, P2ODataType::INT32,
num_rois_info[0].dtype);
auto out_classes = helper_->Reshape(nms_node->output(3), {-1, 1});
auto out_scores = helper_->Reshape(nms_node->output(2), {-1, 1});
auto out_boxes = helper_->Reshape(nms_node->output(1), {-1, 4});
out_classes =
helper_->AutoCast(out_classes, P2ODataType::INT32, P2ODataType::FP32);
helper_->Concat({out_classes, out_scores, out_boxes}, {out_info[0].name}, 1);
// EfficientNMS_TRT cannot get the same result, so disable now
// auto nms_node = helper_->MakeNode("EfficientNMS_TRT", {boxes_info[0].name,
// score}, 4);
// AddAttribute(nms_node, "plugin_version", "1");
// AddAttribute(nms_node, "background_class", background_label_);
// AddAttribute(nms_node, "max_output_boxes", nms_top_k_);
// AddAttribute(nms_node, "score_threshold", score_threshold_);
// AddAttribute(nms_node, "iou_threshold", nms_threshold_);
// AddAttribute(nms_node, "score_activation", int64_t(0));
// AddAttribute(nms_node, "box_coding", int64_t(0));
// nms_node->set_domain("Paddle");
//
// auto num_rois = helper_->Reshape(nms_node->output(0), {-1});
// helper_->AutoCast(num_rois, num_rois_info[0].name, P2ODataType::INT32,
// num_rois_info[0].dtype);
//
// auto out_classes = helper_->Reshape(nms_node->output(3), {-1, 1});
// auto out_scores = helper_->Reshape(nms_node->output(2), {-1, 1});
// auto out_boxes = helper_->Reshape(nms_node->output(1), {-1, 4});
// out_classes = helper_->AutoCast(out_classes, P2ODataType::INT32,
// P2ODataType::FP32);
// helper_->Concat({out_classes, out_scores, out_boxes}, {out_info[0].name},
// 1);
}
} // namespace paddle2onnx