mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00

* Add notes for tensors * Optimize some apis * move some warnings * Support build with Paddle2ONNX * Add protobuf support * Fix compile on mac * add clearn package script * Add paddle2onnx code * remove submodule * Add onnx ocde * remove softlink * add onnx code * fix error * Add cmake file * fix patchelf * update paddle2onnx * Delete .gitmodules --------- Co-authored-by: PaddleCI <paddle_ci@example.com> Co-authored-by: pangyoki <pangyoki@126.com> Co-authored-by: jiangjiajun <jiangjiajun@baidu.lcom>
361 lines
15 KiB
C++
361 lines
15 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "paddle2onnx/mapper/detection/multiclass_nms.h"
|
|
|
|
namespace paddle2onnx {
|
|
|
|
REGISTER_MAPPER(multiclass_nms3, NMSMapper);
|
|
|
|
int32_t NMSMapper::GetMinOpset(bool verbose) {
|
|
auto boxes_info = GetInput("BBoxes");
|
|
auto score_info = GetInput("Scores");
|
|
if (score_info[0].Rank() != 3) {
|
|
Error() << "Lod Tensor input is not supported, which means the shape of "
|
|
"input(scores) is [M, C] now, but Paddle2ONNX only support [N, "
|
|
"C, M]."
|
|
<< std::endl;
|
|
return -1;
|
|
}
|
|
if (boxes_info[0].Rank() != 3) {
|
|
Error() << "Only support input boxes as 3-D Tensor, but now it's rank is "
|
|
<< boxes_info[0].Rank() << "." << std::endl;
|
|
return -1;
|
|
}
|
|
if (score_info[0].shape[1] <= 0) {
|
|
Error() << "The 2nd-dimension of score should be fixed(means the number of "
|
|
"classes), but now it's "
|
|
<< score_info[0].shape[1] << "." << std::endl;
|
|
return -1;
|
|
}
|
|
|
|
if (export_as_custom_op || this->deploy_backend == "tensorrt") {
|
|
return 7;
|
|
}
|
|
|
|
Logger(verbose, 10) << RequireOpset(10) << std::endl;
|
|
return 10;
|
|
}
|
|
|
|
void NMSMapper::KeepTopK(const std::string& selected_indices) {
|
|
auto boxes_info = GetInput("BBoxes");
|
|
auto score_info = GetInput("Scores");
|
|
auto out_info = GetOutput("Out");
|
|
auto index_info = GetOutput("Index");
|
|
auto num_rois_info = GetOutput("NmsRoisNum");
|
|
auto value_0 =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(0));
|
|
auto value_1 =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(1));
|
|
auto value_2 =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(2));
|
|
auto value_neg_1 =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(-1));
|
|
|
|
auto class_id = helper_->MakeNode("Gather", {selected_indices, value_1});
|
|
AddAttribute(class_id, "axis", int64_t(1));
|
|
|
|
auto box_id = helper_->MakeNode("Gather", {selected_indices, value_2});
|
|
AddAttribute(box_id, "axis", int64_t(1));
|
|
|
|
auto filtered_class_id = class_id->output(0);
|
|
auto filtered_box_id = box_id->output(0);
|
|
if (background_label_ >= 0) {
|
|
auto filter_indices = MapperHelper::Get()->GenName("nms.filter_background");
|
|
auto squeezed_class_id =
|
|
helper_->Squeeze(class_id->output(0), std::vector<int64_t>(1, 1));
|
|
if (background_label_ > 0) {
|
|
auto background = helper_->Constant(
|
|
{1}, ONNX_NAMESPACE::TensorProto::INT64, background_label_);
|
|
auto diff = helper_->MakeNode("Sub", {squeezed_class_id, background});
|
|
helper_->MakeNode("NonZero", {diff->output(0)}, {filter_indices});
|
|
} else if (background_label_ == 0) {
|
|
helper_->MakeNode("NonZero", {squeezed_class_id}, {filter_indices});
|
|
}
|
|
auto new_class_id =
|
|
helper_->MakeNode("Gather", {filtered_class_id, filter_indices});
|
|
AddAttribute(new_class_id, "axis", int64_t(0));
|
|
auto new_box_id =
|
|
helper_->MakeNode("Gather", {box_id->output(0), filter_indices});
|
|
AddAttribute(new_box_id, "axis", int64_t(0));
|
|
filtered_class_id = new_class_id->output(0);
|
|
filtered_box_id = new_box_id->output(0);
|
|
}
|
|
|
|
// Here is a little complicated
|
|
// Since we need to gather all the scores for the final boxes to filter the
|
|
// top-k boxes Now we have the follow inputs
|
|
// - scores: [N, C, M] N means batch size(but now it will be regarded as
|
|
// 1); C means number of classes; M means number of boxes for each classes
|
|
// - selected_indices: [num_selected_indices, 3], and 3 means [batch,
|
|
// class_id, box_id]. We will use this inputs to gather score
|
|
// So now we will first flatten `scores` to shape of [1 * C * M], then we
|
|
// gather scores by each elements in `selected_indices` The index need be
|
|
// calculated as
|
|
// `gather_index = class_id * M + box_id`
|
|
auto flatten_score = helper_->Flatten(score_info[0].name);
|
|
auto num_boxes_each_class = helper_->Constant(
|
|
{1}, ONNX_NAMESPACE::TensorProto::INT64, score_info[0].shape[2]);
|
|
auto gather_indices_0 =
|
|
helper_->MakeNode("Mul", {filtered_class_id, num_boxes_each_class});
|
|
auto gather_indices_1 =
|
|
helper_->MakeNode("Add", {gather_indices_0->output(0), filtered_box_id});
|
|
auto gather_indices = helper_->Flatten(gather_indices_1->output(0));
|
|
auto gathered_scores =
|
|
helper_->MakeNode("Gather", {flatten_score, gather_indices});
|
|
AddAttribute(gathered_scores, "axis", int64_t(0));
|
|
|
|
// Now we will perform keep_top_k process
|
|
// First we need to check if the number of remaining boxes is greater than
|
|
// keep_top_k Otherwise, we will downgrade the keep_top_k to number of
|
|
// remaining boxes
|
|
auto final_classes = filtered_class_id;
|
|
auto final_boxes_id = filtered_box_id;
|
|
auto final_scores = gathered_scores->output(0);
|
|
if (keep_top_k_ > 0) {
|
|
// get proper topk
|
|
auto shape_of_scores = helper_->MakeNode("Shape", {final_scores});
|
|
auto num_of_boxes =
|
|
helper_->Slice(shape_of_scores->output(0), std::vector<int64_t>(1, 0),
|
|
std::vector<int64_t>(1, 0), std::vector<int64_t>(1, 1));
|
|
auto top_k =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, keep_top_k_);
|
|
auto ensemble_value = helper_->MakeNode("Concat", {num_of_boxes, top_k});
|
|
AddAttribute(ensemble_value, "axis", int64_t(0));
|
|
auto new_top_k =
|
|
helper_->MakeNode("ReduceMin", {ensemble_value->output(0)});
|
|
AddAttribute(new_top_k, "axes", std::vector<int64_t>(1, 0));
|
|
AddAttribute(new_top_k, "keepdims", int64_t(1));
|
|
|
|
// the output is topk_scores, topk_score_indices
|
|
auto topk_node =
|
|
helper_->MakeNode("TopK", {final_scores, new_top_k->output(0)}, 2);
|
|
auto topk_scores =
|
|
helper_->MakeNode("Gather", {final_scores, topk_node->output(1)});
|
|
AddAttribute(topk_scores, "axis", int64_t(0));
|
|
filtered_class_id =
|
|
helper_->MakeNode("Squeeze", {filtered_class_id})->output(0);
|
|
auto topk_classes =
|
|
helper_->MakeNode("Gather", {filtered_class_id, topk_node->output(1)});
|
|
AddAttribute(topk_classes, "axis", int64_t(0));
|
|
filtered_box_id =
|
|
helper_->MakeNode("Squeeze", {filtered_box_id})->output(0);
|
|
auto topk_boxes_id =
|
|
helper_->MakeNode("Gather", {filtered_box_id, topk_node->output(1)});
|
|
AddAttribute(topk_boxes_id, "axis", int64_t(0));
|
|
|
|
final_boxes_id = topk_boxes_id->output(0);
|
|
final_scores = topk_scores->output(0);
|
|
final_classes = topk_classes->output(0);
|
|
}
|
|
|
|
auto flatten_boxes_id = helper_->Flatten({final_boxes_id});
|
|
auto gathered_selected_boxes =
|
|
helper_->MakeNode("Gather", {boxes_info[0].name, flatten_boxes_id});
|
|
AddAttribute(gathered_selected_boxes, "axis", int64_t(1));
|
|
|
|
auto float_classes = helper_->MakeNode("Cast", {final_classes});
|
|
AddAttribute(float_classes, "to", ONNX_NAMESPACE::TensorProto::FLOAT);
|
|
|
|
std::vector<int64_t> shape{1, -1, 1};
|
|
auto unsqueezed_scores = helper_->Reshape({final_scores}, shape);
|
|
|
|
auto unsqueezed_class = helper_->Reshape({float_classes->output(0)}, shape);
|
|
|
|
auto box_result =
|
|
helper_->MakeNode("Concat", {unsqueezed_class, unsqueezed_scores,
|
|
gathered_selected_boxes->output(0)});
|
|
AddAttribute(box_result, "axis", int64_t(2));
|
|
helper_->Squeeze({box_result->output(0)}, {out_info[0].name},
|
|
std::vector<int64_t>(1, 0));
|
|
|
|
// other outputs, we don't use sometimes
|
|
// there's lots of Cast in exporting
|
|
// TODO(jiangjiajun) A pass to eleminate all the useless Cast is needed
|
|
auto reshaped_index_result =
|
|
helper_->Reshape({flatten_boxes_id}, {int64_t(-1), int64_t(1)});
|
|
auto index_result =
|
|
helper_->MakeNode("Cast", {reshaped_index_result}, {index_info[0].name});
|
|
AddAttribute(index_result, "to", GetOnnxDtype(index_info[0].dtype));
|
|
|
|
auto out_box_shape = helper_->MakeNode("Shape", {out_info[0].name});
|
|
auto num_rois_result =
|
|
helper_->Slice({out_box_shape->output(0)}, std::vector<int64_t>(1, 0),
|
|
std::vector<int64_t>(1, 0), std::vector<int64_t>(1, 1));
|
|
auto int32_num_rois_result =
|
|
helper_->AutoCast(num_rois_result, num_rois_info[0].name,
|
|
P2ODataType::INT64, num_rois_info[0].dtype);
|
|
}
|
|
|
|
void NMSMapper::Opset10() {
|
|
if (this->deploy_backend == "tensorrt") {
|
|
return ExportForTensorRT();
|
|
}
|
|
auto boxes_info = GetInput("BBoxes");
|
|
auto score_info = GetInput("Scores");
|
|
if (boxes_info[0].shape[0] != 1) {
|
|
Warn()
|
|
<< "[WARNING] Due to the operator multiclass_nms3, the exported ONNX "
|
|
"model will only supports inference with input batch_size == 1."
|
|
<< std::endl;
|
|
}
|
|
int64_t num_classes = score_info[0].shape[1];
|
|
auto score_threshold = helper_->Constant(
|
|
{1}, ONNX_NAMESPACE::TensorProto::FLOAT, score_threshold_);
|
|
auto nms_threshold = helper_->Constant(
|
|
{1}, ONNX_NAMESPACE::TensorProto::FLOAT, nms_threshold_);
|
|
auto nms_top_k =
|
|
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, nms_top_k_);
|
|
|
|
auto selected_box_index = MapperHelper::Get()->GenName("nms.selected_index");
|
|
if (normalized_) {
|
|
helper_->MakeNode("NonMaxSuppression",
|
|
{boxes_info[0].name, score_info[0].name, nms_top_k,
|
|
nms_threshold, score_threshold},
|
|
{selected_box_index});
|
|
} else {
|
|
auto value_1 =
|
|
helper_->Constant({1}, GetOnnxDtype(boxes_info[0].dtype), float(1.0));
|
|
auto split_boxes = helper_->Split(boxes_info[0].name,
|
|
std::vector<int64_t>(4, 1), int64_t(2));
|
|
auto xmax = helper_->MakeNode("Add", {split_boxes[2], value_1});
|
|
auto ymax = helper_->MakeNode("Add", {split_boxes[3], value_1});
|
|
auto new_boxes = helper_->MakeNode(
|
|
"Concat",
|
|
{split_boxes[0], split_boxes[1], xmax->output(0), ymax->output(0)});
|
|
AddAttribute(new_boxes, "axis", int64_t(2));
|
|
helper_->MakeNode("NonMaxSuppression",
|
|
{new_boxes->output(0), score_info[0].name, nms_top_k,
|
|
nms_threshold, score_threshold},
|
|
{selected_box_index});
|
|
}
|
|
KeepTopK(selected_box_index);
|
|
}
|
|
|
|
void NMSMapper::ExportAsCustomOp() {
|
|
auto boxes_info = GetInput("BBoxes");
|
|
auto score_info = GetInput("Scores");
|
|
auto out_info = GetOutput("Out");
|
|
auto index_info = GetOutput("Index");
|
|
auto num_rois_info = GetOutput("NmsRoisNum");
|
|
auto node = helper_->MakeNode(
|
|
custom_op_name, {boxes_info[0].name, score_info[0].name},
|
|
{out_info[0].name, index_info[0].name, num_rois_info[0].name});
|
|
node->set_domain("Paddle");
|
|
int64_t normalized = normalized_ ? 1 : 0;
|
|
AddAttribute(node, "normalized", normalized);
|
|
AddAttribute(node, "nms_threshold", nms_threshold_);
|
|
AddAttribute(node, "score_threshold", score_threshold_);
|
|
AddAttribute(node, "nms_eta", nms_eta_);
|
|
AddAttribute(node, "nms_top_k", nms_top_k_);
|
|
AddAttribute(node, "background_label", background_label_);
|
|
AddAttribute(node, "keep_top_k", keep_top_k_);
|
|
helper_->MakeValueInfo(boxes_info[0].name, boxes_info[0].dtype,
|
|
boxes_info[0].shape);
|
|
helper_->MakeValueInfo(score_info[0].name, score_info[0].dtype,
|
|
score_info[0].shape);
|
|
helper_->MakeValueInfo(out_info[0].name, out_info[0].dtype,
|
|
out_info[0].shape);
|
|
helper_->MakeValueInfo(index_info[0].name, index_info[0].dtype,
|
|
index_info[0].shape);
|
|
helper_->MakeValueInfo(num_rois_info[0].name, num_rois_info[0].dtype,
|
|
num_rois_info[0].shape);
|
|
}
|
|
|
|
void NMSMapper::ExportForTensorRT() {
|
|
auto boxes_info = GetInput("BBoxes");
|
|
auto score_info = GetInput("Scores");
|
|
auto out_info = GetOutput("Out");
|
|
auto index_info = GetOutput("Index");
|
|
auto num_rois_info = GetOutput("NmsRoisNum");
|
|
|
|
auto scores = helper_->Transpose(score_info[0].name, {0, 2, 1});
|
|
auto boxes = helper_->Unsqueeze(boxes_info[0].name, {2});
|
|
int64_t num_classes = score_info[0].shape[1];
|
|
auto repeats =
|
|
helper_->Constant(GetOnnxDtype(P2ODataType::INT64),
|
|
std::vector<int64_t>({1, 1, num_classes, 1}));
|
|
boxes = helper_->MakeNode("Tile", {boxes, repeats})->output(0);
|
|
|
|
auto nms_node =
|
|
helper_->MakeNode("BatchedNMSDynamic_TRT", {boxes, scores}, 4);
|
|
AddAttribute(nms_node, "shareLocation", int64_t(0));
|
|
AddAttribute(nms_node, "backgroundLabelId", background_label_);
|
|
AddAttribute(nms_node, "numClasses", num_classes);
|
|
int64_t nms_top_k = nms_top_k_;
|
|
int64_t keep_top_k = keep_top_k_;
|
|
if (nms_top_k > 4096) {
|
|
Warn()
|
|
<< "Paramter nms_top_k:" << nms_top_k
|
|
<< " is exceed limit in TensorRT BatchedNMS plugin, will force to 4096."
|
|
<< std::endl;
|
|
nms_top_k = 4096;
|
|
}
|
|
if (keep_top_k > 4096) {
|
|
Warn()
|
|
<< "Parameter keep_top_k:" << keep_top_k
|
|
<< " is exceed limit in TensorRT BatchedNMS plugin, will force to 4096."
|
|
<< std::endl;
|
|
keep_top_k = 4096;
|
|
}
|
|
AddAttribute(nms_node, "topK", nms_top_k);
|
|
AddAttribute(nms_node, "keepTopK", keep_top_k);
|
|
AddAttribute(nms_node, "scoreThreshold", score_threshold_);
|
|
AddAttribute(nms_node, "iouThreshold", nms_threshold_);
|
|
if (normalized_) {
|
|
AddAttribute(nms_node, "isNormalized", int64_t(1));
|
|
} else {
|
|
AddAttribute(nms_node, "isNormalized", int64_t(0));
|
|
}
|
|
AddAttribute(nms_node, "clipBoxes", int64_t(0));
|
|
nms_node->set_domain("Paddle");
|
|
|
|
auto num_rois = helper_->Reshape(nms_node->output(0), {-1});
|
|
helper_->AutoCast(num_rois, num_rois_info[0].name, P2ODataType::INT32,
|
|
num_rois_info[0].dtype);
|
|
|
|
auto out_classes = helper_->Reshape(nms_node->output(3), {-1, 1});
|
|
auto out_scores = helper_->Reshape(nms_node->output(2), {-1, 1});
|
|
auto out_boxes = helper_->Reshape(nms_node->output(1), {-1, 4});
|
|
out_classes =
|
|
helper_->AutoCast(out_classes, P2ODataType::INT32, P2ODataType::FP32);
|
|
helper_->Concat({out_classes, out_scores, out_boxes}, {out_info[0].name}, 1);
|
|
|
|
// EfficientNMS_TRT cannot get the same result, so disable now
|
|
// auto nms_node = helper_->MakeNode("EfficientNMS_TRT", {boxes_info[0].name,
|
|
// score}, 4);
|
|
// AddAttribute(nms_node, "plugin_version", "1");
|
|
// AddAttribute(nms_node, "background_class", background_label_);
|
|
// AddAttribute(nms_node, "max_output_boxes", nms_top_k_);
|
|
// AddAttribute(nms_node, "score_threshold", score_threshold_);
|
|
// AddAttribute(nms_node, "iou_threshold", nms_threshold_);
|
|
// AddAttribute(nms_node, "score_activation", int64_t(0));
|
|
// AddAttribute(nms_node, "box_coding", int64_t(0));
|
|
// nms_node->set_domain("Paddle");
|
|
//
|
|
// auto num_rois = helper_->Reshape(nms_node->output(0), {-1});
|
|
// helper_->AutoCast(num_rois, num_rois_info[0].name, P2ODataType::INT32,
|
|
// num_rois_info[0].dtype);
|
|
//
|
|
// auto out_classes = helper_->Reshape(nms_node->output(3), {-1, 1});
|
|
// auto out_scores = helper_->Reshape(nms_node->output(2), {-1, 1});
|
|
// auto out_boxes = helper_->Reshape(nms_node->output(1), {-1, 4});
|
|
// out_classes = helper_->AutoCast(out_classes, P2ODataType::INT32,
|
|
// P2ODataType::FP32);
|
|
// helper_->Concat({out_classes, out_scores, out_boxes}, {out_info[0].name},
|
|
// 1);
|
|
}
|
|
|
|
} // namespace paddle2onnx
|