mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-16 21:51:31 +08:00
[Visualize] Support custom labels for visualization (#397)
* [lite] enable lite arm64-v8a fp16 option. * Update VERSION_NUMBER * [Vision] support custom labels for visualization * [Visualize] add custom labels warning * [Visualize] fix VisClassification bug
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 3.8)
|
||||
|
||||
set(WITH_GPU @WITH_GPU@)
|
||||
set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
|
||||
|
100
fastdeploy/vision/visualize/classification.cc
Normal file
100
fastdeploy/vision/visualize/classification.cc
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifdef ENABLE_VISION_VISUALIZE
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "fastdeploy/vision/visualize/visualize.h"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace vision {
|
||||
|
||||
cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
|
||||
int top_k, float score_threshold, float font_size) {
|
||||
int h = im.rows;
|
||||
int w = im.cols;
|
||||
auto vis_im = im.clone();
|
||||
int h_sep = h / 30;
|
||||
int w_sep = w / 10;
|
||||
if (top_k > result.scores.size()) {
|
||||
top_k = result.scores.size();
|
||||
}
|
||||
for (int i = 0; i < top_k; ++i) {
|
||||
if (result.scores[i] < score_threshold) {
|
||||
continue;
|
||||
}
|
||||
std::string id = std::to_string(result.label_ids[i]);
|
||||
std::string score = std::to_string(result.scores[i]);
|
||||
if (score.size() > 4) {
|
||||
score = score.substr(0, 4);
|
||||
}
|
||||
std::string text = id + "," + score;
|
||||
int font = cv::FONT_HERSHEY_SIMPLEX;
|
||||
cv::Point origin;
|
||||
origin.x = w_sep;
|
||||
origin.y = h_sep * (i + 1);
|
||||
cv::putText(vis_im, text, origin, font, font_size,
|
||||
cv::Scalar(255, 255, 255), 1);
|
||||
}
|
||||
return vis_im;
|
||||
}
|
||||
|
||||
// Visualize ClassifyResult with custom labels.
|
||||
cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result,
|
||||
const std::vector<std::string>& labels,
|
||||
int top_k, float score_threshold,
|
||||
float font_size) {
|
||||
int h = im.rows;
|
||||
int w = im.cols;
|
||||
auto vis_im = im.clone();
|
||||
int h_sep = h / 30;
|
||||
int w_sep = w / 10;
|
||||
if (top_k > result.scores.size()) {
|
||||
top_k = result.scores.size();
|
||||
}
|
||||
for (int i = 0; i < top_k; ++i) {
|
||||
if (result.scores[i] < score_threshold) {
|
||||
continue;
|
||||
}
|
||||
std::string id = std::to_string(result.label_ids[i]);
|
||||
std::string score = std::to_string(result.scores[i]);
|
||||
if (score.size() > 4) {
|
||||
score = score.substr(0, 4);
|
||||
}
|
||||
std::string text = id + "," + score;
|
||||
if (labels.size() > result.label_ids[i]) {
|
||||
text = labels[result.label_ids[i]] + "," + text;
|
||||
} else {
|
||||
FDWARNING << "The label_id: " << result.label_ids[i]
|
||||
<< " in DetectionResult should be less than length of labels:"
|
||||
<< labels.size() << "." << std::endl;
|
||||
}
|
||||
if (text.size() > 16) {
|
||||
text = text.substr(0, 16);
|
||||
}
|
||||
int font = cv::FONT_HERSHEY_SIMPLEX;
|
||||
cv::Point origin;
|
||||
origin.x = w_sep;
|
||||
origin.y = h_sep * (i + 1);
|
||||
cv::putText(vis_im, text, origin, font, font_size,
|
||||
cv::Scalar(255, 255, 255), 1);
|
||||
}
|
||||
return vis_im;
|
||||
}
|
||||
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
||||
#endif
|
@@ -104,6 +104,101 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
|
||||
return vis_im;
|
||||
}
|
||||
|
||||
// Visualize DetectionResult with custom labels.
|
||||
cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result,
|
||||
const std::vector<std::string>& labels,
|
||||
float score_threshold, int line_size,
|
||||
float font_size) {
|
||||
if (result.contain_masks) {
|
||||
FDASSERT(result.boxes.size() == result.masks.size(),
|
||||
"The size of masks must be equal to the size of boxes, but now "
|
||||
"%zu != %zu.",
|
||||
result.boxes.size(), result.masks.size());
|
||||
}
|
||||
int max_label_id =
|
||||
*std::max_element(result.label_ids.begin(), result.label_ids.end());
|
||||
std::vector<int> color_map = GenerateColorMap(max_label_id);
|
||||
|
||||
int h = im.rows;
|
||||
int w = im.cols;
|
||||
auto vis_im = im.clone();
|
||||
for (size_t i = 0; i < result.boxes.size(); ++i) {
|
||||
if (result.scores[i] < score_threshold) {
|
||||
continue;
|
||||
}
|
||||
int x1 = static_cast<int>(result.boxes[i][0]);
|
||||
int y1 = static_cast<int>(result.boxes[i][1]);
|
||||
int x2 = static_cast<int>(result.boxes[i][2]);
|
||||
int y2 = static_cast<int>(result.boxes[i][3]);
|
||||
int box_h = y2 - y1;
|
||||
int box_w = x2 - x1;
|
||||
int c0 = color_map[3 * result.label_ids[i] + 0];
|
||||
int c1 = color_map[3 * result.label_ids[i] + 1];
|
||||
int c2 = color_map[3 * result.label_ids[i] + 2];
|
||||
cv::Scalar rect_color = cv::Scalar(c0, c1, c2);
|
||||
std::string id = std::to_string(result.label_ids[i]);
|
||||
std::string score = std::to_string(result.scores[i]);
|
||||
if (score.size() > 4) {
|
||||
score = score.substr(0, 4);
|
||||
}
|
||||
std::string text = id + "," + score;
|
||||
if (labels.size() > result.label_ids[i]) {
|
||||
text = labels[result.label_ids[i]] + "," + text;
|
||||
} else {
|
||||
FDWARNING << "The label_id: " << result.label_ids[i]
|
||||
<< " in DetectionResult should be less than length of labels:"
|
||||
<< labels.size() << "." << std::endl;
|
||||
}
|
||||
if (text.size() > 16) {
|
||||
text = text.substr(0, 16);
|
||||
}
|
||||
int font = cv::FONT_HERSHEY_SIMPLEX;
|
||||
cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr);
|
||||
cv::Point origin;
|
||||
origin.x = x1;
|
||||
origin.y = y1;
|
||||
cv::Rect rect(x1, y1, box_w, box_h);
|
||||
cv::rectangle(vis_im, rect, rect_color, line_size);
|
||||
cv::putText(vis_im, text, origin, font, font_size,
|
||||
cv::Scalar(255, 255, 255), 1);
|
||||
if (result.contain_masks) {
|
||||
int mask_h = static_cast<int>(result.masks[i].shape[0]);
|
||||
int mask_w = static_cast<int>(result.masks[i].shape[1]);
|
||||
// non-const pointer for cv:Mat constructor
|
||||
int32_t* mask_raw_data = const_cast<int32_t*>(
|
||||
static_cast<const int32_t*>(result.masks[i].Data()));
|
||||
// only reference to mask data (zero copy)
|
||||
cv::Mat mask(mask_h, mask_w, CV_32SC1, mask_raw_data);
|
||||
if ((mask_h != box_h) || (mask_w != box_w)) {
|
||||
cv::resize(mask, mask, cv::Size(box_w, box_h));
|
||||
}
|
||||
// use a bright color for instance mask
|
||||
int mc0 = 255 - c0 >= 127 ? 255 - c0 : 127;
|
||||
int mc1 = 255 - c1 >= 127 ? 255 - c1 : 127;
|
||||
int mc2 = 255 - c2 >= 127 ? 255 - c2 : 127;
|
||||
int32_t* mask_data = reinterpret_cast<int32_t*>(mask.data);
|
||||
// inplace blending (zero copy)
|
||||
uchar* vis_im_data = static_cast<uchar*>(vis_im.data);
|
||||
for (size_t i = y1; i < y2; ++i) {
|
||||
for (size_t j = x1; j < x2; ++j) {
|
||||
if (mask_data[(i - y1) * mask_w + (j - x1)] != 0) {
|
||||
vis_im_data[i * w * 3 + j * 3 + 0] = cv::saturate_cast<uchar>(
|
||||
static_cast<float>(mc0) * 0.5f +
|
||||
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 0]) * 0.5f);
|
||||
vis_im_data[i * w * 3 + j * 3 + 1] = cv::saturate_cast<uchar>(
|
||||
static_cast<float>(mc1) * 0.5f +
|
||||
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 1]) * 0.5f);
|
||||
vis_im_data[i * w * 3 + j * 3 + 2] = cv::saturate_cast<uchar>(
|
||||
static_cast<float>(mc2) * 0.5f +
|
||||
static_cast<float>(vis_im_data[i * w * 3 + j * 3 + 2]) * 0.5f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return vis_im;
|
||||
}
|
||||
|
||||
// Default only support visualize num_classes <= 1000
|
||||
// If need to visualize num_classes > 1000
|
||||
// Please call Visualize::GetColorMap(num_classes) first
|
||||
|
@@ -54,6 +54,18 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
|
||||
const DetectionResult& result,
|
||||
float score_threshold = 0.0,
|
||||
int line_size = 1, float font_size = 0.5f);
|
||||
FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
|
||||
const DetectionResult& result,
|
||||
const std::vector<std::string>& labels,
|
||||
float score_threshold = 0.0,
|
||||
int line_size = 1, float font_size = 0.5f);
|
||||
FASTDEPLOY_DECL cv::Mat VisClassification(
|
||||
const cv::Mat& im, const ClassifyResult& result, int top_k = 5,
|
||||
float score_threshold = 0.0f, float font_size = 0.5f);
|
||||
FASTDEPLOY_DECL cv::Mat VisClassification(
|
||||
const cv::Mat& im, const ClassifyResult& result,
|
||||
const std::vector<std::string>& labels, int top_k = 5,
|
||||
float score_threshold = 0.0f, float font_size = 0.5f);
|
||||
FASTDEPLOY_DECL cv::Mat VisFaceDetection(const cv::Mat& im,
|
||||
const FaceDetectionResult& result,
|
||||
int line_size = 1,
|
||||
|
Reference in New Issue
Block a user