Files
FastDeploy/fastdeploy/vision/visualize/detection.cc
WJJ1995 aa6931bee9 [Model] Add YOLOv5-seg (#988)
* add onnx_ort_runtime demo

* rm in requirements

* support batch eval

* fixed MattingResults bug

* move assignment for DetectionResult

* integrated x2paddle

* add model convert readme

* update readme

* re-lint

* add processor api

* Add MattingResult Free

* change valid_cpu_backends order

* add ppocr benchmark

* mv bs from 64 to 32

* fixed quantize.md

* fixed quantize bugs

* Add Monitor for benchmark

* update mem monitor

* Set trt_max_batch_size default 1

* fixed ocr benchmark bug

* support yolov5 in serving

* Fixed yolov5 serving

* Fixed postprocess

* update yolov5 to 7.0

* add poros runtime demos

* update readme

* Support poros abi=1

* rm useless note

* deal with comments

* support pp_trt for ppseg

* fixed symlink problem

* Add is_mini_pad and stride for yolov5

* Add yolo series for paddle format

* fixed bugs

* fixed bug

* support yolov5seg

* fixed bug

* refactor yolov5seg

* fixed bug

* mv Mask int32 to uint8

* add yolov5seg example

* rm log info

* fixed code style

* add yolov5seg example in python

* fixed dtype bug

* update note

* deal with comments

* get sorted index

* add yolov5seg test case

* Add GPL-3.0 License

* add round func

* deal with comments

* deal with commens

Co-authored-by: Jason <jiangjiajun@baidu.com>
2023-01-11 15:36:32 +08:00

287 lines
12 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "fastdeploy/vision/visualize/visualize.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace fastdeploy {
namespace vision {
cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
float score_threshold, int line_size, float font_size) {
if (result.contain_masks) {
FDASSERT(result.boxes.size() == result.masks.size(),
"The size of masks must be equal to the size of boxes, but now "
"%zu != %zu.",
result.boxes.size(), result.masks.size());
}
int max_label_id =
*std::max_element(result.label_ids.begin(), result.label_ids.end());
std::vector<int> color_map = GenerateColorMap(max_label_id);
int h = im.rows;
int w = im.cols;
auto vis_im = im.clone();
for (size_t i = 0; i < result.boxes.size(); ++i) {
if (result.scores[i] < score_threshold) {
continue;
}
int x1 = static_cast<int>(round(result.boxes[i][0]));
int y1 = static_cast<int>(round(result.boxes[i][1]));
int x2 = static_cast<int>(round(result.boxes[i][2]));
int y2 = static_cast<int>(round(result.boxes[i][3]));
int box_h = y2 - y1;
int box_w = x2 - x1;
int c0 = color_map[3 * result.label_ids[i] + 0];
int c1 = color_map[3 * result.label_ids[i] + 1];
int c2 = color_map[3 * result.label_ids[i] + 2];
cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
std::string id = std::to_string(result.label_ids[i]);
std::string score = std::to_string(result.scores[i]);
if (score.size() > 4) {
score = score.substr(0, 4);
}
std::string text = id + ", " + score;
int font = cv::FONT_HERSHEY_SIMPLEX;
cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
cv::Point origin;
origin.x = x1;
origin.y = y1;
cv::Rect rect(x1, y1, box_w, box_h);
cv::rectangle(vis_im, rect, rect_color, line_size);
cv::putText(vis_im, text, origin, font, font_size,
cv::Scalar(255, 255, 255), 1);
if (result.contain_masks) {
int mask_h = static_cast<int>(result.masks[i].shape[0]);
int mask_w = static_cast<int>(result.masks[i].shape[1]);
// non-const pointer for cv:Mat constructor
uint8_t* mask_raw_data = const_cast<uint8_t*>(
static_cast<const uint8_t*>(result.masks[i].Data()));
// only reference to mask data (zero copy)
cv::Mat mask(mask_h, mask_w, CV_8UC1, mask_raw_data);
if ((mask_h != box_h) || (mask_w != box_w)) {
cv::resize(mask, mask, cv::Size(box_w, box_h));
}
// use a bright color for instance mask
int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
uint8_t* mask_data = reinterpret_cast<uint8_t*>(mask.data);
// inplace blending (zero copy)
uchar* vis_im_data = static_cast<uchar*>(vis_im.data);
for (size_t i = y1; i < y2; ++i) {
for (size_t j = x1; j < x2; ++j) {
if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
static_cast<float>(mc0) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
static_cast<float>(mc1) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
static_cast<float>(mc2) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
}
}
}
}
}
return vis_im;
}
// Visualize DetectionResult with custom labels.
cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
const std::vector<std::string>& labels,
float score_threshold, int line_size, float font_size) {
if (result.contain_masks) {
FDASSERT(result.boxes.size() == result.masks.size(),
"The size of masks must be equal to the size of boxes, but now "
"%zu != %zu.",
result.boxes.size(), result.masks.size());
}
int max_label_id =
*std::max_element(result.label_ids.begin(), result.label_ids.end());
std::vector<int> color_map = GenerateColorMap(max_label_id);
int h = im.rows;
int w = im.cols;
auto vis_im = im.clone();
for (size_t i = 0; i < result.boxes.size(); ++i) {
if (result.scores[i] < score_threshold) {
continue;
}
int x1 = static_cast<int>(result.boxes[i][0]);
int y1 = static_cast<int>(result.boxes[i][1]);
int x2 = static_cast<int>(result.boxes[i][2]);
int y2 = static_cast<int>(result.boxes[i][3]);
int box_h = y2 - y1;
int box_w = x2 - x1;
int c0 = color_map[3 * result.label_ids[i] + 0];
int c1 = color_map[3 * result.label_ids[i] + 1];
int c2 = color_map[3 * result.label_ids[i] + 2];
cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
std::string id = std::to_string(result.label_ids[i]);
std::string score = std::to_string(result.scores[i]);
if (score.size() > 4) {
score = score.substr(0, 4);
}
std::string text = id + "," + score;
if (labels.size() > result.label_ids[i]) {
text = labels[result.label_ids[i]] + "," + text;
} else {
FDWARNING << "The label_id: " << result.label_ids[i]
<< " in DetectionResult should be less than length of labels:"
<< labels.size() << "." << std::endl;
}
if (text.size() > 16) {
text = text.substr(0, 16);
}
int font = cv::FONT_HERSHEY_SIMPLEX;
cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
cv::Point origin;
origin.x = x1;
origin.y = y1;
cv::Rect rect(x1, y1, box_w, box_h);
cv::rectangle(vis_im, rect, rect_color, line_size);
cv::putText(vis_im, text, origin, font, font_size,
cv::Scalar(255, 255, 255), 1);
if (result.contain_masks) {
int mask_h = static_cast<int>(result.masks[i].shape[0]);
int mask_w = static_cast<int>(result.masks[i].shape[1]);
// non-const pointer for cv:Mat constructor
int32_t* mask_raw_data = const_cast<int32_t*>(
static_cast<const int32_t*>(result.masks[i].Data()));
// only reference to mask data (zero copy)
cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
if ((mask_h != box_h) || (mask_w != box_w)) {
cv::resize(mask, mask, cv::Size(box_w, box_h));
}
// use a bright color for instance mask
int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
int32_t* mask_data = reinterpret_cast<int32_t*>(mask.data);
// inplace blending (zero copy)
uchar* vis_im_data = static_cast<uchar*>(vis_im.data);
for (size_t i = y1; i < y2; ++i) {
for (size_t j = x1; j < x2; ++j) {
if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
static_cast<float>(mc0) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
static_cast<float>(mc1) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
static_cast<float>(mc2) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
}
}
}
}
}
return vis_im;
}
// Default only support visualize num_classes <= 1000
// If need to visualize num_classes > 1000
// Please call Visualize::GetColorMap(num_classes) first
cv::Mat Visualize::VisDetection(const cv::Mat& im,
const DetectionResult& result,
float score_threshold, int line_size,
float font_size) {
FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisDetection is "
"deprecated, please use fastdeploy::vision:VisDetection "
"function instead."
<< std::endl;
if (result.contain_masks) {
FDASSERT(result.boxes.size() == result.masks.size(),
"The size of masks must be equal the size of boxes!");
}
auto color_map = GetColorMap();
int h = im.rows;
int w = im.cols;
auto vis_im = im.clone();
for (size_t i = 0; i < result.boxes.size(); ++i) {
if (result.scores[i] < score_threshold) {
continue;
}
int x1 = static_cast<int>(result.boxes[i][0]);
int y1 = static_cast<int>(result.boxes[i][1]);
int x2 = static_cast<int>(result.boxes[i][2]);
int y2 = static_cast<int>(result.boxes[i][3]);
int box_h = y2 - y1;
int box_w = x2 - x1;
int c0 = color_map[3 * result.label_ids[i] + 0];
int c1 = color_map[3 * result.label_ids[i] + 1];
int c2 = color_map[3 * result.label_ids[i] + 2];
cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
std::string id = std::to_string(result.label_ids[i]);
std::string score = std::to_string(result.scores[i]);
if (score.size() > 4) {
score = score.substr(0, 4);
}
std::string text = id + "," + score;
int font = cv::FONT_HERSHEY_SIMPLEX;
cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
cv::Point origin;
origin.x = x1;
origin.y = y1;
cv::Rect rect(x1, y1, box_w, box_h);
cv::rectangle(vis_im, rect, rect_color, line_size);
cv::putText(vis_im, text, origin, font, font_size,
cv::Scalar(255, 255, 255), 1);
if (result.contain_masks) {
int mask_h = static_cast<int>(result.masks[i].shape[0]);
int mask_w = static_cast<int>(result.masks[i].shape[1]);
// non-const pointer for cv:Mat constructor
int32_t* mask_raw_data = const_cast<int32_t*>(
static_cast<const int32_t*>(result.masks[i].Data()));
// only reference to mask data (zero copy)
cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
if ((mask_h != box_h) || (mask_w != box_w)) {
cv::resize(mask, mask, cv::Size(box_w, box_h));
}
// use a bright color for instance mask
int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
int32_t* mask_data = reinterpret_cast<int32_t*>(mask.data);
// inplace blending (zero copy)
uchar* vis_im_data = static_cast<uchar*>(vis_im.data);
for (size_t i = y1; i < y2; ++i) {
for (size_t j = x1; j < x2; ++j) {
if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
static_cast<float>(mc0) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
static_cast<float>(mc1) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
static_cast<float>(mc2) * 0.5f +
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
}
}
}
}
}
return vis_im;
}
} // namespace vision
} // namespace fastdeploy