Files
FastDeploy/paddle2onnx/mapper/detection/yolo_box.cc
Jason 6343b0db47 [Build] Support build with source code of Paddle2ONNX (#1559)
* Add notes for tensors

* Optimize some apis

* move some warnings

* Support build with Paddle2ONNX

* Add protobuf support

* Fix compile on mac

* add clearn package script

* Add paddle2onnx code

* remove submodule

* Add onnx ocde

* remove softlink

* add onnx code

* fix error

* Add cmake file

* fix patchelf

* update paddle2onnx

* Delete .gitmodules

---------

Co-authored-by: PaddleCI <paddle_ci@example.com>
Co-authored-by: pangyoki <pangyoki@126.com>
Co-authored-by: jiangjiajun <jiangjiajun@baidu.lcom>
2023-03-17 10:03:22 +08:00

255 lines
12 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle2onnx/mapper/detection/yolo_box.h"
namespace paddle2onnx {
REGISTER_MAPPER(yolo_box, YoloBoxMapper)
int32_t YoloBoxMapper::GetMinOpset(bool verbose) {
Logger(verbose, 11) << RequireOpset(11) << std::endl;
return 11;
}
void YoloBoxMapper::Opset11() {
auto x_info_ori = GetInput("X");
// handle the float64 input
auto x_info = x_info_ori;
if (x_info_ori[0].dtype != P2ODataType::FP32) {
x_info[0].name = helper_->AutoCast(x_info_ori[0].name, x_info_ori[0].dtype,
P2ODataType::FP32);
x_info[0].dtype = P2ODataType::FP32;
}
auto im_size_info = GetInput("ImgSize");
auto boxes_info = GetOutput("Boxes");
auto scores_info = GetOutput("Scores");
int64_t max_int = 999999;
int64_t anchor_num = anchors_.size() / 2;
auto x_shape = helper_->MakeNode("Shape", {x_info[0].name});
std::vector<std::string> nchw = helper_->Split(
x_shape->output(0), std::vector<int64_t>(4, 1), int64_t(0));
std::string float_h =
helper_->AutoCast(nchw[2], P2ODataType::INT64, x_info[0].dtype);
std::string float_w =
helper_->AutoCast(nchw[3], P2ODataType::INT64, x_info[0].dtype);
auto anchor_num_tensor =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, anchor_num);
auto x_name = x_info[0].name;
if (iou_aware_) {
// Here we use the feature that while value is very large, it equals to the
// ends This is a standared definition in ONNX However not sure all the
// inference engines implements `Slice` this way Let's handle this issue
// later
x_name = helper_->Slice(x_name, {0, 1, 2, 3}, {0, 0, 0, 0},
{max_int, anchor_num, max_int, max_int});
}
auto unknown_dim =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(-1));
auto shape_0 = helper_->MakeNode(
"Concat", {nchw[0], anchor_num_tensor, unknown_dim, nchw[2], nchw[3]});
AddAttribute(shape_0, "axis", int64_t(0));
auto reshaped_x = helper_->MakeNode("Reshape", {x_name, shape_0->output(0)});
auto transposed_x = helper_->MakeNode("Transpose", {reshaped_x->output(0)});
{
std::vector<int64_t> perm({0, 1, 3, 4, 2});
AddAttribute(transposed_x, "perm", perm);
}
// grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
// grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
auto float_value_0 =
helper_->Constant({}, GetOnnxDtype(x_info[0].dtype), float(0.0));
auto float_value_1 =
helper_->Constant({}, GetOnnxDtype(x_info[0].dtype), float(1.0));
auto scalar_float_w = helper_->Squeeze(float_w, {});
auto scalar_float_h = helper_->Squeeze(float_h, {});
auto grid_x_0 = helper_->MakeNode(
"Range", {float_value_0, scalar_float_w, float_value_1}); // shape is [w]
auto grid_y_0 = helper_->MakeNode(
"Range", {float_value_0, scalar_float_h, float_value_1}); // shape is [h]
auto grid_x_1 = helper_->MakeNode(
"Tile", {grid_x_0->output(0), nchw[2]}); // shape is [w*h]
auto grid_y_1 = helper_->MakeNode(
"Tile", {grid_y_0->output(0), nchw[3]}); // shape is [h*w]
auto int_value_1 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, float(1.0));
auto grid_shape_x =
helper_->MakeNode("Concat", {nchw[2], nchw[3], int_value_1});
auto grid_shape_y =
helper_->MakeNode("Concat", {nchw[3], nchw[2], int_value_1});
AddAttribute(grid_shape_x, "axis", int64_t(0));
AddAttribute(grid_shape_y, "axis", int64_t(0));
auto grid_x = helper_->MakeNode(
"Reshape", {grid_x_1->output(0), grid_shape_x->output(0)});
auto grid_y_2 = helper_->MakeNode(
"Reshape", {grid_y_1->output(0), grid_shape_y->output(0)});
auto grid_y = helper_->MakeNode("Transpose", {grid_y_2->output(0)});
{
std::vector<int64_t> perm({1, 0, 2});
AddAttribute(grid_y, "perm", perm);
}
auto grid =
helper_->MakeNode("Concat", {grid_x->output(0), grid_y->output(0)});
AddAttribute(grid, "axis", int64_t(2));
// pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0]) *
// scale_x_y + bias_x_y) / w pred_box[:, :, :, :, 1] = (grid_y +
// sigmoid(pred_box[:, :, :, :, 1]) * scale_x_y + bias_x_y) / h
auto pred_box_xy =
helper_->Slice(transposed_x->output(0), {0, 1, 2, 3, 4}, {0, 0, 0, 0, 0},
{max_int, max_int, max_int, max_int, 2});
auto scale_x_y =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), scale_x_y_);
float bias_x_y_value = (1.0 - scale_x_y_) / 2.0;
auto bias_x_y =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), bias_x_y_value);
auto wh = helper_->MakeNode("Concat", {float_w, float_h});
AddAttribute(wh, "axis", int64_t(0));
pred_box_xy = helper_->MakeNode("Sigmoid", {pred_box_xy})->output(0);
pred_box_xy = helper_->MakeNode("Mul", {pred_box_xy, scale_x_y})->output(0);
pred_box_xy = helper_->MakeNode("Add", {pred_box_xy, bias_x_y})->output(0);
pred_box_xy =
helper_->MakeNode("Add", {pred_box_xy, grid->output(0)})->output(0);
pred_box_xy =
helper_->MakeNode("Div", {pred_box_xy, wh->output(0)})->output(0);
// anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
// anchors_s = np.array(
// [(an_w / input_w, an_h / input_h) for an_w, an_h in anchors])
// anchor_w = anchors_s[:, 0:1].reshape((1, an_num, 1, 1))
// anchor_h = anchors_s[:, 1:2].reshape((1, an_num, 1, 1))
std::vector<int64_t> valid_anchors(anchor_num);
valid_anchors.assign(anchors_.begin(), anchors_.begin() + anchor_num * 2);
auto anchors =
helper_->Constant(GetOnnxDtype(x_info[0].dtype), valid_anchors);
anchors = helper_->Reshape(anchors, {anchor_num, 2});
auto downsample =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), downsample_ratio_);
auto ori_wh =
helper_->MakeNode("Mul", {wh->output(0), downsample})->output(0);
anchors = helper_->MakeNode("Div", {anchors, ori_wh})->output(0);
// Following divide operation requires undirectional broadcast
// It satisfies the definition of ONNX, but now sure all the inference engines
// support this rule e.g TensorRT、OpenVINO anchor_w = anchors_s[:,
// 0:1].reshape((1, an_num, 1, 1)) anchor_h = anchors_s[:, 1:2].reshape((1,
// an_num, 1, 1)) pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) *
// anchor_w pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) *
// anchor_h
anchors = helper_->Reshape(anchors, {1, anchor_num, 1, 1, 2});
auto pred_box_wh =
helper_->Slice(transposed_x->output(0), {0, 1, 2, 3, 4}, {0, 0, 0, 0, 2},
{max_int, max_int, max_int, max_int, 4});
pred_box_wh = helper_->MakeNode("Exp", {pred_box_wh})->output(0);
pred_box_wh = helper_->MakeNode("Mul", {pred_box_wh, anchors})->output(0);
// if iou_aware:
// pred_conf = sigmoid(x[:, :, :, :, 4:5])**(
// 1 - iou_aware_factor) * sigmoid(ioup)**iou_aware_factor
// else:
// pred_conf = sigmoid(x[:, :, :, :, 4:5])
auto confidence =
helper_->Slice(transposed_x->output(0), {0, 1, 2, 3, 4}, {0, 0, 0, 0, 4},
{max_int, max_int, max_int, max_int, 5});
std::string pred_conf = helper_->MakeNode("Sigmoid", {confidence})->output(0);
if (iou_aware_) {
auto ioup = helper_->Slice(x_info[0].name, {0, 1, 2, 3}, {0, 0, 0, 0},
{max_int, anchor_num, max_int, max_int});
ioup = helper_->Unsqueeze(ioup, {4});
ioup = helper_->MakeNode("Sigmoid", {ioup})->output(0);
float power_value_0 = 1 - iou_aware_factor_;
auto power_0 =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), power_value_0);
auto power_1 = helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype),
iou_aware_factor_);
ioup = helper_->MakeNode("Pow", {ioup, power_1})->output(0);
pred_conf = helper_->MakeNode("Pow", {pred_conf, power_0})->output(0);
pred_conf = helper_->MakeNode("Mul", {pred_conf, ioup})->output(0);
}
// pred_conf[pred_conf < conf_thresh] = 0.
// pred_score = sigmoid(x[:, :, :, :, 5:]) * pred_conf
// pred_box = pred_box * (pred_conf > 0.).astype('float32')
auto value_2 =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), float(2.0));
auto center = helper_->MakeNode("Div", {pred_box_wh, value_2})->output(0);
auto min_xy = helper_->MakeNode("Sub", {pred_box_xy, center})->output(0);
auto max_xy = helper_->MakeNode("Add", {pred_box_xy, center})->output(0);
auto conf_thresh =
helper_->Constant({1}, GetOnnxDtype(x_info[0].dtype), conf_thresh_);
auto filter =
helper_->MakeNode("Greater", {pred_conf, conf_thresh})->output(0);
filter = helper_->AutoCast(filter, P2ODataType::BOOL, x_info[0].dtype);
pred_conf = helper_->MakeNode("Mul", {pred_conf, filter})->output(0);
auto pred_score =
helper_->Slice(transposed_x->output(0), {0, 1, 2, 3, 4}, {0, 0, 0, 0, 5},
{max_int, max_int, max_int, max_int, max_int});
pred_score = helper_->MakeNode("Sigmoid", {pred_score})->output(0);
pred_score = helper_->MakeNode("Mul", {pred_score, pred_conf})->output(0);
auto pred_box = helper_->Concat({min_xy, max_xy}, 4);
pred_box = helper_->MakeNode("Mul", {pred_box, filter})->output(0);
auto value_neg_1 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(-1));
auto value_4 =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, int64_t(4));
auto new_shape = helper_->Concat({nchw[0], value_neg_1, value_4}, 0);
pred_box = helper_->MakeNode("Reshape", {pred_box, new_shape})->output(0);
auto float_img_size = helper_->AutoCast(
im_size_info[0].name, im_size_info[0].dtype, x_info[0].dtype);
float_img_size = helper_->Unsqueeze(float_img_size, {1});
auto split_im_hw = helper_->Split(float_img_size, {1, 1}, 2);
auto im_whwh = helper_->Concat(
{split_im_hw[1], split_im_hw[0], split_im_hw[1], split_im_hw[0]}, 2);
if (!clip_bbox_) {
auto out = helper_->MakeNode("Mul", {pred_box, im_whwh})->output(0);
helper_->AutoCast(out, boxes_info[0].name, x_info[0].dtype,
boxes_info[0].dtype);
} else {
pred_box = helper_->MakeNode("Mul", {pred_box, im_whwh})->output(0);
auto im_wh = helper_->Concat({split_im_hw[1], split_im_hw[0]}, 2);
im_wh = helper_->MakeNode("Sub", {im_wh, float_value_1})->output(0);
auto pred_box_xymin_xymax = helper_->Split(pred_box, {2, 2}, 2);
pred_box_xymin_xymax[0] =
helper_->MakeNode("Relu", {pred_box_xymin_xymax[0]})->output(0);
pred_box_xymin_xymax[1] =
helper_->MakeNode("Min", {pred_box_xymin_xymax[1], im_wh})->output(0);
auto out = helper_->Concat(pred_box_xymin_xymax, 2);
helper_->AutoCast(out, boxes_info[0].name, x_info[0].dtype,
boxes_info[0].dtype);
}
auto class_num =
helper_->Constant({1}, ONNX_NAMESPACE::TensorProto::INT64, class_num_);
auto score_out_shape =
helper_->Concat({nchw[0], value_neg_1, class_num}, int64_t(0));
auto score_out =
helper_->MakeNode("Reshape", {pred_score, score_out_shape})->output(0);
helper_->AutoCast(score_out, scores_info[0].name, x_info[0].dtype,
scores_info[0].dtype);
}
} // namespace paddle2onnx