Refine code structure (#89)

* refine code structure

* refine code structure
This commit is contained in:
Jason
2022-08-10 10:50:22 +08:00
committed by GitHub
parent c7d37b6732
commit 22ca63982b
333 changed files with 1 additions and 37500 deletions

View File

@@ -19,7 +19,7 @@ option(CSRCS_DIR_NAME "Name of source code directory")
option(LIBRARY_NAME "Name of build library name")
option(PY_LIBRARY_NAME "Name of build python library name")
if(NOT CSRCS_DIR_NAME)
set(CSRCS_DIR_NAME "csrcs")
set(CSRCS_DIR_NAME "csrc")
endif()
if(NOT LIBRARY_NAME)
set(LIBRARY_NAME "fastdeploy")
@@ -55,10 +55,6 @@ option(ENABLE_FDTENSOR_FUNC "Whether to compile with function of FDTensor." OFF)
option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF)
# Whether to build fastdeply with vision/text/... examples, only for testings.
option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF)
option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF)
# config GIT_URL with github mirrors to speed up dependent repos clone
option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
if(NOT GIT_URL)
@@ -102,19 +98,6 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
include_directories(${HEAD_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
# ENABLE_VISION and ENABLE_VISION_VISUALIZE must be ON if enable vision examples.
message(STATUS "Found WTIH_VISION_EXAMPLES ON, so, force ENABLE_VISION and ENABLE_VISION_VISUALIZE ON")
set(ENABLE_VISION ON CACHE BOOL "force to enable vision models usage" FORCE)
set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE)
endif()
if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
# ENABLE_TEXT must be ON if enable text examples.
message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON")
set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE)
endif()
add_definitions(-DFASTDEPLOY_LIB)
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
@@ -277,22 +260,6 @@ if(MSVC)
endif()
target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
# add examples after prepare include paths for third-parties
if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
add_definitions(-DWITH_VISION_EXAMPLES)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
add_subdirectory(examples)
endif()
if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
add_definitions(-DWITH_TEXT_EXAMPLES)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin)
# Avoid to add_subdirectory repeatedly
if (NOT WITH_VISION_EXAMPLES)
add_subdirectory(examples)
endif()
endif()
if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests)
add_definitions(-DWITH_TESTING)
include(external/gtest.cmake)

View File

@@ -1,49 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/common/multiclass_nms.h"
#include "fastdeploy/core/fd_tensor.h"
namespace fastdeploy {
struct TensorInfo {
std::string name;
std::vector<int> shape;
FDDataType dtype;
};
class BaseBackend {
public:
bool initialized_ = false;
BaseBackend() {}
virtual ~BaseBackend() = default;
virtual bool Initialized() const { return initialized_; }
virtual int NumInputs() const = 0;
virtual int NumOutputs() const = 0;
virtual TensorInfo GetInputInfo(int index) = 0;
virtual TensorInfo GetOutputInfo(int index) = 0;
virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) = 0;
};
} // namespace fastdeploy

View File

@@ -1,224 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/common/multiclass_nms.h"
#include <algorithm>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace backend {
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
void GetMaxScoreIndex(const float* scores, const int& score_size,
const float& threshold, const int& top_k,
std::vector<std::pair<float, int>>* sorted_indices) {
for (size_t i = 0; i < score_size; ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
float BBoxArea(const float* box, const bool& normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return 0.f;
} else {
const float w = box[2] - box[0];
const float h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
float JaccardOverlap(const float* box1, const float* box2,
const bool& normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return 0.f;
} else {
const float inter_xmin = std::max(box1[0], box2[0]);
const float inter_ymin = std::max(box1[1], box2[1]);
const float inter_xmax = std::min(box1[2], box2[2]);
const float inter_ymax = std::min(box1[3], box2[3]);
float norm = normalized ? 0.0f : 1.0f;
float inter_w = inter_xmax - inter_xmin + norm;
float inter_h = inter_ymax - inter_ymin + norm;
const float inter_area = inter_w * inter_h;
const float bbox1_area = BBoxArea(box1, normalized);
const float bbox2_area = BBoxArea(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
void MultiClassNMS::FastNMS(const float* boxes, const float* scores,
const int& num_boxes,
std::vector<int>* keep_indices) {
std::vector<std::pair<float, int>> sorted_indices;
GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
&sorted_indices);
float adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (size_t k = 0; k < keep_indices->size(); ++k) {
if (!keep) {
break;
}
const int kept_idx = (*keep_indices)[k];
float overlap =
JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
keep = overlap <= adaptive_threshold;
}
if (keep) {
keep_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
adaptive_threshold *= nms_eta;
}
}
}
int MultiClassNMS::NMSForEachSample(
const float* boxes, const float* scores, int num_boxes, int num_classes,
std::map<int, std::vector<int>>* keep_indices) {
for (int i = 0; i < num_classes; ++i) {
if (i == background_label) {
continue;
}
const float* score_for_class_i = scores + i * num_boxes;
FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
}
int num_det = 0;
for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
num_det += iter->second.size();
}
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *keep_indices) {
int label = it.first;
const float* current_score = scores + label * num_boxes;
auto& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
score_index_pairs.push_back(
std::make_pair(current_score[idx], std::make_pair(label, idx)));
}
}
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*keep_indices);
num_det = keep_top_k;
}
return num_det;
}
void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data,
const std::vector<int64_t>& boxes_dim,
const std::vector<int64_t>& scores_dim) {
int score_size = scores_dim.size();
int64_t batch_size = scores_dim[0];
int64_t box_dim = boxes_dim[2];
int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0;
FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
std::to_string(score_size) + ".");
FDASSERT(boxes_dim[2] == 4,
"Require the 3-dimension of input boxes be 4, but now it's " +
std::to_string(boxes_dim[2]) + ".");
out_num_rois_data.resize(batch_size);
std::vector<std::map<int, std::vector<int>>> all_indices;
for (size_t i = 0; i < batch_size; ++i) {
std::map<int, std::vector<int>> indices; // indices kept for each class
const float* current_boxes_ptr =
boxes_data + i * boxes_dim[1] * boxes_dim[2];
const float* current_scores_ptr =
scores_data + i * scores_dim[1] * scores_dim[2];
int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
boxes_dim[1], scores_dim[1], &indices);
num_nmsed_out += num;
out_num_rois_data[i] = num;
all_indices.emplace_back(indices);
}
std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
if (num_nmsed_out == 0) {
for (size_t i = 0; i < batch_size; ++i) {
out_num_rois_data[i] = 0;
}
return;
}
out_box_data.resize(num_nmsed_out * 6);
out_index_data.resize(num_nmsed_out);
int count = 0;
for (size_t i = 0; i < batch_size; ++i) {
const float* current_boxes_ptr =
boxes_data + i * boxes_dim[1] * boxes_dim[2];
const float* current_scores_ptr =
scores_data + i * scores_dim[1] * scores_dim[2];
for (const auto& it : all_indices[i]) {
int label = it.first;
const auto& indices = it.second;
const float* current_scores_class_ptr =
current_scores_ptr + label * scores_dim[2];
for (size_t j = 0; j < indices.size(); ++j) {
int start = count * 6;
out_box_data[start] = label;
out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
out_index_data[count] = i * boxes_dim[1] + indices[j];
count += 1;
}
}
}
}
} // namespace backend
} // namespace fastdeploy

View File

@@ -1,45 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
namespace fastdeploy {
namespace backend {
struct MultiClassNMS {
int64_t background_label = -1;
int64_t keep_top_k = -1;
float nms_eta;
float nms_threshold = 0.7;
int64_t nms_top_k;
bool normalized;
float score_threshold;
std::vector<int32_t> out_num_rois_data;
std::vector<int32_t> out_index_data;
std::vector<float> out_box_data;
void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
std::vector<int>* keep_indices);
int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
int num_classes,
std::map<int, std::vector<int>>* keep_indices);
void Compute(const float* boxes, const float* scores,
const std::vector<int64_t>& boxes_dim,
const std::vector<int64_t>& scores_dim);
};
} // namespace backend
} // namespace fastdeploy

View File

@@ -1,261 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NON_64_PLATFORM
#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include <algorithm>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
struct OrtTensorDimensions : std::vector<int64_t> {
OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) {
OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value);
std::vector<int64_t>::operator=(ort.GetTensorShape(info));
ort.ReleaseTensorTypeAndShapeInfo(info);
}
};
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
void GetMaxScoreIndex(const float* scores, const int& score_size,
const float& threshold, const int& top_k,
std::vector<std::pair<float, int>>* sorted_indices) {
for (size_t i = 0; i < score_size; ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
float BBoxArea(const float* box, const bool& normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return 0.f;
} else {
const float w = box[2] - box[0];
const float h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
float JaccardOverlap(const float* box1, const float* box2,
const bool& normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return 0.f;
} else {
const float inter_xmin = std::max(box1[0], box2[0]);
const float inter_ymin = std::max(box1[1], box2[1]);
const float inter_xmax = std::min(box1[2], box2[2]);
const float inter_ymax = std::min(box1[3], box2[3]);
float norm = normalized ? 0.0f : 1.0f;
float inter_w = inter_xmax - inter_xmin + norm;
float inter_h = inter_ymax - inter_ymin + norm;
const float inter_area = inter_w * inter_h;
const float bbox1_area = BBoxArea(box1, normalized);
const float bbox2_area = BBoxArea(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
void MultiClassNmsKernel::FastNMS(const float* boxes, const float* scores,
const int& num_boxes,
std::vector<int>* keep_indices) {
std::vector<std::pair<float, int>> sorted_indices;
GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k,
&sorted_indices);
float adaptive_threshold = nms_threshold;
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (size_t k = 0; k < keep_indices->size(); ++k) {
if (!keep) {
break;
}
const int kept_idx = (*keep_indices)[k];
float overlap =
JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized);
keep = overlap <= adaptive_threshold;
}
if (keep) {
keep_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) {
adaptive_threshold *= nms_eta;
}
}
}
int MultiClassNmsKernel::NMSForEachSample(
const float* boxes, const float* scores, int num_boxes, int num_classes,
std::map<int, std::vector<int>>* keep_indices) {
for (int i = 0; i < num_classes; ++i) {
if (i == background_label) {
continue;
}
const float* score_for_class_i = scores + i * num_boxes;
FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i]));
}
int num_det = 0;
for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) {
num_det += iter->second.size();
}
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *keep_indices) {
int label = it.first;
const float* current_score = scores + label * num_boxes;
auto& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
score_index_pairs.push_back(
std::make_pair(current_score[idx], std::make_pair(label, idx)));
}
}
std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*keep_indices);
num_det = keep_top_k;
}
return num_det;
}
void MultiClassNmsKernel::Compute(OrtKernelContext* context) {
const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0);
const OrtValue* scores = ort_.KernelContext_GetInput(context, 1);
const float* boxes_data =
reinterpret_cast<const float*>(ort_.GetTensorData<float>(boxes));
const float* scores_data =
reinterpret_cast<const float*>(ort_.GetTensorData<float>(scores));
OrtTensorDimensions boxes_dim(ort_, boxes);
OrtTensorDimensions scores_dim(ort_, scores);
int score_size = scores_dim.size();
int64_t batch_size = scores_dim[0];
int64_t box_dim = boxes_dim[2];
int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0;
FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " +
std::to_string(score_size) + ".");
FDASSERT(boxes_dim[2] == 4,
"Require the 3-dimension of input boxes be 4, but now it's " +
std::to_string(boxes_dim[2]) + ".");
std::vector<int64_t> out_num_rois_dims = {batch_size};
OrtValue* out_num_rois = ort_.KernelContext_GetOutput(
context, 2, out_num_rois_dims.data(), out_num_rois_dims.size());
int32_t* out_num_rois_data = ort_.GetTensorMutableData<int32_t>(out_num_rois);
std::vector<std::map<int, std::vector<int>>> all_indices;
for (size_t i = 0; i < batch_size; ++i) {
std::map<int, std::vector<int>> indices; // indices kept for each class
const float* current_boxes_ptr =
boxes_data + i * boxes_dim[1] * boxes_dim[2];
const float* current_scores_ptr =
scores_data + i * scores_dim[1] * scores_dim[2];
int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr,
boxes_dim[1], scores_dim[1], &indices);
num_nmsed_out += num;
out_num_rois_data[i] = num;
all_indices.emplace_back(indices);
}
std::vector<int64_t> out_box_dims = {num_nmsed_out, 6};
std::vector<int64_t> out_index_dims = {num_nmsed_out, 1};
OrtValue* out_box = ort_.KernelContext_GetOutput(
context, 0, out_box_dims.data(), out_box_dims.size());
OrtValue* out_index = ort_.KernelContext_GetOutput(
context, 1, out_index_dims.data(), out_index_dims.size());
if (num_nmsed_out == 0) {
int32_t* out_num_rois_data =
ort_.GetTensorMutableData<int32_t>(out_num_rois);
for (size_t i = 0; i < batch_size; ++i) {
out_num_rois_data[i] = 0;
}
return;
}
float* out_box_data = ort_.GetTensorMutableData<float>(out_box);
int32_t* out_index_data = ort_.GetTensorMutableData<int32_t>(out_index);
int count = 0;
for (size_t i = 0; i < batch_size; ++i) {
const float* current_boxes_ptr =
boxes_data + i * boxes_dim[1] * boxes_dim[2];
const float* current_scores_ptr =
scores_data + i * scores_dim[1] * scores_dim[2];
for (const auto& it : all_indices[i]) {
int label = it.first;
const auto& indices = it.second;
const float* current_scores_class_ptr =
current_scores_ptr + label * scores_dim[2];
for (size_t j = 0; j < indices.size(); ++j) {
int start = count * 6;
out_box_data[start] = label;
out_box_data[start + 1] = current_scores_class_ptr[indices[j]];
out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4];
out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1];
out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2];
out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3];
out_index_data[count] = i * boxes_dim[1] + indices[j];
count += 1;
}
}
}
}
void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) {
background_label =
ort_.KernelInfoGetAttribute<int64_t>(info, "background_label");
keep_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "keep_top_k");
nms_eta = ort_.KernelInfoGetAttribute<float>(info, "nms_eta");
nms_threshold = ort_.KernelInfoGetAttribute<float>(info, "nms_threshold");
nms_top_k = ort_.KernelInfoGetAttribute<int64_t>(info, "nms_top_k");
normalized = ort_.KernelInfoGetAttribute<int64_t>(info, "normalized");
score_threshold = ort_.KernelInfoGetAttribute<float>(info, "score_threshold");
}
} // namespace fastdeploy
#endif

View File

@@ -1,81 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#ifndef NON_64_PLATFORM
#include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy {
struct MultiClassNmsKernel {
protected:
int64_t background_label = -1;
int64_t keep_top_k = -1;
float nms_eta;
float nms_threshold = 0.7;
int64_t nms_top_k;
bool normalized;
float score_threshold;
Ort::CustomOpApi ort_;
public:
MultiClassNmsKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info)
: ort_(ort) {
GetAttribute(info);
}
void GetAttribute(const OrtKernelInfo* info);
void Compute(OrtKernelContext* context);
void FastNMS(const float* boxes, const float* scores, const int& num_boxes,
std::vector<int>* keep_indices);
int NMSForEachSample(const float* boxes, const float* scores, int num_boxes,
int num_classes,
std::map<int, std::vector<int>>* keep_indices);
};
struct MultiClassNmsOp
: Ort::CustomOpBase<MultiClassNmsOp, MultiClassNmsKernel> {
void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const {
return new MultiClassNmsKernel(api, info);
}
const char* GetName() const { return "MultiClassNMS"; }
size_t GetInputTypeCount() const { return 2; }
ONNXTensorElementDataType GetInputType(size_t index) const {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
}
size_t GetOutputTypeCount() const { return 3; }
ONNXTensorElementDataType GetOutputType(size_t index) const {
if (index == 0) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
}
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
}
const char* GetExecutionProviderType() const {
return "CPUExecutionProvider";
}
};
} // namespace fastdeploy
#endif

View File

@@ -1,279 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/ort/ort_backend.h"
#include <memory>
#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h"
#endif
namespace fastdeploy {
std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
std::vector<OrtCustomOp*>();
void OrtBackend::BuildOption(const OrtBackendOption& option) {
option_ = option;
if (option.graph_optimization_level >= 0) {
session_options_.SetGraphOptimizationLevel(
GraphOptimizationLevel(option.graph_optimization_level));
}
if (option.intra_op_num_threads >= 0) {
session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
}
if (option.inter_op_num_threads >= 0) {
session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
}
if (option.execution_mode >= 0) {
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
}
if (option.use_gpu) {
auto all_providers = Ort::GetAvailableProviders();
bool support_cuda = false;
std::string providers_msg = "";
for (size_t i = 0; i < all_providers.size(); ++i) {
providers_msg = providers_msg + all_providers[i] + ", ";
if (all_providers[i] == "CUDAExecutionProvider") {
support_cuda = true;
}
}
if (!support_cuda) {
FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
"support GPU, the available providers are "
<< providers_msg << "will fallback to CPUExecutionProvider."
<< std::endl;
option_.use_gpu = false;
} else {
FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
std::to_string(option.gpu_id) + ".");
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = option.gpu_id;
session_options_.AppendExecutionProvider_CUDA(cuda_options);
}
}
}
bool OrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const OrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
#ifdef ENABLE_PADDLE_FRONTEND
char* model_content_ptr;
int model_content_size = 0;
std::vector<paddle2onnx::CustomOp> custom_ops;
for (auto& item : option.custom_op_info_) {
paddle2onnx::CustomOp op;
strcpy(op.op_name, item.first.c_str());
strcpy(op.export_op_name, item.second.c_str());
custom_ops.emplace_back(op);
}
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, custom_ops.data(),
custom_ops.size())) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl;
return false;
}
std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size);
delete[] model_content_ptr;
model_content_ptr = nullptr;
return InitFromOnnx(onnx_model_proto, option, true);
#else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead."
<< std::endl;
#endif
return false;
}
bool OrtBackend::InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option,
bool from_memory_buffer) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
BuildOption(option);
InitCustomOperators();
if (from_memory_buffer) {
session_ = {env_, model_file.data(), model_file.size(), session_options_};
} else {
#ifdef _WIN32
session_ = {env_,
std::wstring(model_file.begin(), model_file.end()).c_str(),
session_options_};
#else
session_ = {env_, model_file.c_str(), session_options_};
#endif
}
binding_ = std::make_shared<Ort::IoBinding>(session_);
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
Ort::Allocator allocator(session_, memory_info);
size_t n_inputs = session_.GetInputCount();
for (size_t i = 0; i < n_inputs; ++i) {
auto input_name = session_.GetInputName(i, allocator);
auto type_info = session_.GetInputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type});
allocator.Free(input_name);
}
size_t n_outputs = session_.GetOutputCount();
for (size_t i = 0; i < n_outputs; ++i) {
auto output_name = session_.GetOutputName(i, allocator);
auto type_info = session_.GetOutputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type});
Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
binding_->BindOutput(output_name, out_memory_info);
allocator.Free(output_name);
}
initialized_ = true;
return true;
}
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
const auto info = value.GetTensorTypeAndShapeInfo();
const auto data_type = info.GetElementType();
size_t numel = info.GetElementCount();
tensor->shape = info.GetShape();
if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
tensor->data.resize(numel * sizeof(float));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(float));
tensor->dtype = FDDataType::FP32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
tensor->data.resize(numel * sizeof(int32_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(int32_t));
tensor->dtype = FDDataType::INT32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
tensor->data.resize(numel * sizeof(int64_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(int64_t));
tensor->dtype = FDDataType::INT64;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
tensor->data.resize(numel * sizeof(double));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(double));
tensor->dtype = FDDataType::FP64;
} else {
FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) +
" while calling OrtBackend::CopyToCpu().");
}
}
bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
// from FDTensor to Ort Inputs
for (size_t i = 0; i < inputs.size(); ++i) {
auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
binding_->BindInput(inputs[i].name.c_str(), ort_value);
}
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
}
// Inference with inputs
try {
session_.Run({}, *(binding_.get()));
} catch (const std::exception& e) {
FDERROR << "Failed to Infer: " << e.what() << std::endl;
return false;
}
// Copy result after inference
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
outputs->resize(ort_outputs.size());
for (size_t i = 0; i < ort_outputs.size(); ++i) {
(*outputs)[i].name = outputs_desc_[i].name;
CopyToCpu(ort_outputs[i], &((*outputs)[i]));
}
return true;
}
TensorInfo OrtBackend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
" should less than the number of inputs:" +
std::to_string(NumInputs()) + ".");
TensorInfo info;
info.name = inputs_desc_[index].name;
info.shape.assign(inputs_desc_[index].shape.begin(),
inputs_desc_[index].shape.end());
info.dtype = GetFdDtype(inputs_desc_[index].dtype);
return info;
}
TensorInfo OrtBackend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index:" + std::to_string(index) +
" should less than the number of outputs:" +
std::to_string(NumOutputs()) + ".");
TensorInfo info;
info.name = outputs_desc_[index].name;
info.shape.assign(outputs_desc_[index].shape.begin(),
outputs_desc_[index].shape.end());
info.dtype = GetFdDtype(outputs_desc_[index].dtype);
return info;
}
void OrtBackend::InitCustomOperators() {
#ifndef NON_64_PLATFORM
if (custom_operators_.size() == 0) {
MultiClassNmsOp* custom_op = new MultiClassNmsOp{};
custom_operators_.push_back(custom_op);
}
for (size_t i = 0; i < custom_operators_.size(); ++i) {
custom_op_domain_.Add(custom_operators_[i]);
}
session_options_.Add(custom_op_domain_);
#endif
}
} // namespace fastdeploy

View File

@@ -1,93 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy {
struct OrtValueInfo {
std::string name;
std::vector<int64_t> shape;
ONNXTensorElementDataType dtype;
};
struct OrtBackendOption {
// -1 means default
// 0: ORT_DISABLE_ALL
// 1: ORT_ENABLE_BASIC
// 2: ORT_ENABLE_EXTENDED
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
int graph_optimization_level = -1;
int intra_op_num_threads = -1;
int inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL
// 1: ORT_PARALLEL
int execution_mode = -1;
bool use_gpu = false;
int gpu_id = 0;
// inside parameter, maybe remove next version
bool remove_multiclass_nms_ = false;
std::map<std::string, std::string> custom_op_info_;
};
class OrtBackend : public BaseBackend {
public:
OrtBackend() {}
virtual ~OrtBackend() = default;
void BuildOption(const OrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option = OrtBackendOption(),
bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return inputs_desc_.size(); }
int NumOutputs() const { return outputs_desc_.size(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
static std::vector<OrtCustomOp*> custom_operators_;
void InitCustomOperators();
private:
Ort::Env env_;
Ort::Session session_{nullptr};
Ort::SessionOptions session_options_;
std::shared_ptr<Ort::IoBinding> binding_;
std::vector<OrtValueInfo> inputs_desc_;
std::vector<OrtValueInfo> outputs_desc_;
#ifndef NON_64_PLATFORM
Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
#endif
OrtBackendOption option_;
void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
};
} // namespace fastdeploy

View File

@@ -1,67 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) {
if (fd_dtype == FDDataType::FP32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
} else if (fd_dtype == FDDataType::FP64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
} else if (fd_dtype == FDDataType::INT32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
} else if (fd_dtype == FDDataType::INT64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
}
FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
<< std::endl;
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
}
FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) {
if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
return FDDataType::FP32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
return FDDataType::FP64;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
return FDDataType::INT32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
return FDDataType::INT64;
}
FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
return FDDataType::FP32;
}
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) {
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
"Only support tensor which device is CPU or GPU for OrtBackend.");
if (tensor.device == Device::GPU && is_backend_cuda) {
Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
auto ort_value = Ort::Value::CreateTensor(
memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
tensor.shape.size(), GetOrtDtype(tensor.dtype));
return ort_value;
}
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
auto ort_value = Ort::Value::CreateTensor(
memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
tensor.shape.size(), GetOrtDtype(tensor.dtype));
return ort_value;
}
} // namespace fastdeploy

View File

@@ -1,39 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy {
// Convert FDDataType to OrtDataType
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype);
// Convert OrtDataType to FDDataType
FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype);
// Create Ort::Value
// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
// While is_backend_cuda = true, and tensor.device = Device::GPU
// Will directly share the cuda data in tensor to OrtValue
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false);
} // namespace fastdeploy

View File

@@ -1,105 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/paddle/paddle_backend.h"
namespace fastdeploy {
void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
if (option.use_gpu) {
config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
} else {
config_.DisableGpu();
if (option.enable_mkldnn) {
config_.EnableMKLDNN();
config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
}
}
config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
}
bool PaddleBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const PaddleBackendOption& option) {
if (initialized_) {
FDERROR << "PaddleBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
config_.SetModel(model_file, params_file);
BuildOption(option);
predictor_ = paddle_infer::CreatePredictor(config_);
std::vector<std::string> input_names = predictor_->GetInputNames();
std::vector<std::string> output_names = predictor_->GetOutputNames();
for (size_t i = 0; i < input_names.size(); ++i) {
auto handle = predictor_->GetInputHandle(input_names[i]);
TensorInfo info;
auto shape = handle->shape();
info.shape.assign(shape.begin(), shape.end());
info.dtype = PaddleDataTypeToFD(handle->type());
info.name = input_names[i];
inputs_desc_.emplace_back(info);
}
for (size_t i = 0; i < output_names.size(); ++i) {
auto handle = predictor_->GetOutputHandle(output_names[i]);
TensorInfo info;
auto shape = handle->shape();
info.shape.assign(shape.begin(), shape.end());
info.dtype = PaddleDataTypeToFD(handle->type());
info.name = output_names[i];
outputs_desc_.emplace_back(info);
}
initialized_ = true;
return true;
}
TensorInfo PaddleBackend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
" should less than the number of inputs:" +
std::to_string(NumInputs()) + ".");
return inputs_desc_[index];
}
TensorInfo PaddleBackend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index:" + std::to_string(index) +
" should less than the number of outputs:" +
std::to_string(NumOutputs()) + ".");
return outputs_desc_[index];
}
bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
for (size_t i = 0; i < inputs.size(); ++i) {
auto handle = predictor_->GetInputHandle(inputs[i].name);
ShareTensorFromCpu(handle.get(), inputs[i]);
}
predictor_->Run();
outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
CopyTensorToCpu(handle, &((*outputs)[i]));
}
return true;
}
} // namespace fastdeploy

View File

@@ -1,78 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "paddle_inference_api.h" // NOLINT
namespace fastdeploy {
struct PaddleBackendOption {
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
bool enable_mkldnn = true;
int mkldnn_cache_size = 1;
int cpu_thread_num = 8;
// initialize memory size(MB) for GPU
int gpu_mem_init_size = 100;
// gpu device id
int gpu_id = 0;
};
// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
// Copy memory data from paddle_infer::Tensor to fastdeploy::FDTensor
void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
FDTensor* fd_tensor);
// Convert data type from paddle inference to fastdeploy
FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype);
class PaddleBackend : public BaseBackend {
public:
PaddleBackend() {}
virtual ~PaddleBackend() = default;
void BuildOption(const PaddleBackendOption& option);
bool InitFromPaddle(
const std::string& model_file, const std::string& params_file,
const PaddleBackendOption& option = PaddleBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return inputs_desc_.size(); }
int NumOutputs() const { return outputs_desc_.size(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
private:
paddle_infer::Config config_;
std::shared_ptr<paddle_infer::Predictor> predictor_;
std::vector<TensorInfo> inputs_desc_;
std::vector<TensorInfo> outputs_desc_;
};
} // namespace fastdeploy

View File

@@ -1,76 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/paddle/paddle_backend.h"
namespace fastdeploy {
void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) {
std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
tensor->Reshape(shape);
if (fd_tensor.dtype == FDDataType::FP32) {
tensor->ShareExternalData(static_cast<const float*>(fd_tensor.Data()),
shape, paddle_infer::PlaceType::kCPU);
return;
} else if (fd_tensor.dtype == FDDataType::INT32) {
tensor->ShareExternalData(static_cast<const int32_t*>(fd_tensor.Data()),
shape, paddle_infer::PlaceType::kCPU);
return;
} else if (fd_tensor.dtype == FDDataType::INT64) {
tensor->ShareExternalData(static_cast<const int64_t*>(fd_tensor.Data()),
shape, paddle_infer::PlaceType::kCPU);
return;
}
FDASSERT(false, "Unexpected data type(" + Str(fd_tensor.dtype) +
") while infer with PaddleBackend.");
}
void CopyTensorToCpu(std::unique_ptr<paddle_infer::Tensor>& tensor,
FDTensor* fd_tensor) {
auto fd_dtype = PaddleDataTypeToFD(tensor->type());
std::vector<int64_t> shape;
auto tmp_shape = tensor->shape();
shape.assign(tmp_shape.begin(), tmp_shape.end());
fd_tensor->Allocate(shape, fd_dtype, tensor->name());
if (fd_tensor->dtype == FDDataType::FP32) {
tensor->CopyToCpu(static_cast<float*>(fd_tensor->MutableData()));
return;
} else if (fd_tensor->dtype == FDDataType::INT32) {
tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor->MutableData()));
return;
} else if (fd_tensor->dtype == FDDataType::INT64) {
tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor->MutableData()));
return;
}
FDASSERT(false, "Unexpected data type(" + Str(fd_tensor->dtype) +
") while infer with PaddleBackend.");
}
FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype) {
auto fd_dtype = FDDataType::FP32;
if (dtype == paddle_infer::FLOAT32) {
fd_dtype = FDDataType::FP32;
} else if (dtype == paddle_infer::INT64) {
fd_dtype = FDDataType::INT64;
} else if (dtype == paddle_infer::INT32) {
fd_dtype = FDDataType::INT32;
} else if (dtype == paddle_infer::UINT8) {
fd_dtype = FDDataType::UINT8;
} else {
FDASSERT(false, "Unexpected data type:" + std::to_string(int(dtype)) +
" while call CopyTensorToCpu in PaddleBackend.");
}
return fd_dtype;
}
} // namespace fastdeploy

View File

@@ -1,342 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BATCH_STREAM_H
#define BATCH_STREAM_H
#include "NvInfer.h"
#include "common.h"
#include <algorithm>
#include <stdio.h>
#include <vector>
class IBatchStream {
public:
virtual void reset(int firstBatch) = 0;
virtual bool next() = 0;
virtual void skip(int skipCount) = 0;
virtual float* getBatch() = 0;
virtual float* getLabels() = 0;
virtual int getBatchesRead() const = 0;
virtual int getBatchSize() const = 0;
virtual nvinfer1::Dims getDims() const = 0;
};
class MNISTBatchStream : public IBatchStream {
public:
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
const std::string& labelsFile,
const std::vector<std::string>& directories)
: mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
//!< We already know the dimensions of MNIST images.
{
readDataFile(locateFile(dataFile, directories));
readLabelsFile(locateFile(labelsFile, directories));
}
void reset(int firstBatch) override { mBatchCount = firstBatch; }
bool next() override {
if (mBatchCount >= mMaxBatches) {
return false;
}
++mBatchCount;
return true;
}
void skip(int skipCount) override { mBatchCount += skipCount; }
float* getBatch() override {
return mData.data() +
(mBatchCount * mBatchSize * samplesCommon::volume(mDims));
}
float* getLabels() override {
return mLabels.data() + (mBatchCount * mBatchSize);
}
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override {
return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
}
private:
void readDataFile(const std::string& dataFilePath) {
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
int magicNumber, numImages, imageH, imageW;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2051 &&
"Magic Number does not match the expected value for an MNIST image "
"set");
// Read number of images and dimensions
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
numImages = samplesCommon::swapEndianness(numImages);
imageH = samplesCommon::swapEndianness(imageH);
imageW = samplesCommon::swapEndianness(imageW);
// The MNIST data is made up of unsigned bytes, so we need to cast to float
// and normalize.
int numElements = numImages * imageH * imageW;
std::vector<uint8_t> rawData(numElements);
file.read(reinterpret_cast<char*>(rawData.data()),
numElements * sizeof(uint8_t));
mData.resize(numElements);
std::transform(rawData.begin(), rawData.end(), mData.begin(),
[](uint8_t val) { return static_cast<float>(val) / 255.f; });
}
void readLabelsFile(const std::string& labelsFilePath) {
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
int magicNumber, numImages;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2049 &&
"Magic Number does not match the expected value for an MNIST labels "
"file");
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
numImages = samplesCommon::swapEndianness(numImages);
std::vector<uint8_t> rawLabels(numImages);
file.read(reinterpret_cast<char*>(rawLabels.data()),
numImages * sizeof(uint8_t));
mLabels.resize(numImages);
std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
[](uint8_t val) { return static_cast<float>(val); });
}
int mBatchSize{0};
int mBatchCount{
0}; //!< The batch that will be read on the next invocation of next()
int mMaxBatches{0};
Dims mDims{};
std::vector<float> mData{};
std::vector<float> mLabels{};
};
class BatchStream : public IBatchStream {
public:
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::string suffix, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
mSuffix(suffix), mDataDir(directories) {
FILE* file = fopen(
locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
"rb");
ASSERT(file != nullptr);
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
mDims.nbDims = 4; // The number of dimensions.
mDims.d[0] = d[0]; // Batch Size
mDims.d[1] = d[1]; // Channels
mDims.d[2] = d[2]; // Height
mDims.d[3] = d[3]; // Width
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
mDims.d[3] > 0);
fclose(file);
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::vector<std::string> directories)
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
std::string listFile, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
mListFile(listFile), mDataDir(directories) {
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
// Resets data members
void reset(int firstBatch) override {
mBatchCount = 0;
mFileCount = 0;
mFileBatchPos = mDims.d[0];
skip(firstBatch);
}
// Advance to next batch and return true, or return false if there is no batch
// left.
bool next() override {
if (mBatchCount == mMaxBatches) {
return false;
}
for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
batchPos += csize, mFileBatchPos += csize) {
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
if (mFileBatchPos == mDims.d[0] && !update()) {
return false;
}
// copy the smaller of: elements left to fulfill the request, or elements
// left in the file buffer.
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
csize * mImageSize, getBatch() + batchPos * mImageSize);
std::copy_n(getFileLabels() + mFileBatchPos, csize,
getLabels() + batchPos);
}
mBatchCount++;
return true;
}
// Skips the batches
void skip(int skipCount) override {
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
mFileBatchPos == mDims.d[0]) {
mFileCount += skipCount * mBatchSize / mDims.d[0];
return;
}
int x = mBatchCount;
for (int i = 0; i < skipCount; i++) {
next();
}
mBatchCount = x;
}
float* getBatch() override { return mBatch.data(); }
float* getLabels() override { return mLabels.data(); }
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override { return mDims; }
private:
float* getFileBatch() { return mFileBatch.data(); }
float* getFileLabels() { return mFileLabels.data(); }
bool update() {
if (mListFile.empty()) {
std::string inputFileName = locateFile(
mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
FILE* file = fopen(inputFileName.c_str(), "rb");
if (!file) {
return false;
}
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
mDims.d[3] == d[3]);
size_t readInputCount =
fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
size_t readLabelCount =
fread(getFileLabels(), sizeof(float), mDims.d[0], file);
ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
fclose(file);
} else {
std::vector<std::string> fNames;
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
if (!file) {
return false;
}
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
file.seekg(((mBatchCount * mBatchSize)) * 7);
for (int i = 1; i <= mBatchSize; i++) {
std::string sName;
std::getline(file, sName);
sName = sName + ".ppm";
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
fNames.emplace_back(sName);
}
mFileCount++;
const int imageC = 3;
const int imageH = 300;
const int imageW = 300;
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
fNames.size());
for (uint32_t i = 0; i < fNames.size(); ++i) {
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
}
std::vector<float> data(samplesCommon::volume(mDims));
const float scale = 2.0 / 255.0;
const float bias = 1.0;
long int volChl = mDims.d[2] * mDims.d[3];
// Normalize input data
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
i < mBatchSize; ++i) {
for (int c = 0; c < mDims.d[1]; ++c) {
for (int j = 0; j < volChl; ++j) {
data[i * volImg + c * volChl + j] =
scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
}
}
}
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
}
mFileBatchPos = 0;
return true;
}
int mBatchSize{0};
int mMaxBatches{0};
int mBatchCount{0};
int mFileCount{0};
int mFileBatchPos{0};
int mImageSize{0};
std::vector<float> mBatch; //!< Data for the batch
std::vector<float> mLabels; //!< Labels for the batch
std::vector<float> mFileBatch; //!< List of image files
std::vector<float> mFileLabels; //!< List of label files
std::string mPrefix; //!< Batch file name prefix
std::string mSuffix; //!< Batch file name suffix
nvinfer1::Dims mDims; //!< Input dimensions
std::string mListFile; //!< File name of the list of image names
std::vector<std::string>
mDataDir; //!< Directories where the files can be found
};
#endif

View File

@@ -1 +0,0 @@
exclude_files=.*

View File

@@ -1,118 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "BatchStream.h"
#include "NvInfer.h"
//! \class EntropyCalibratorImpl
//!
//! \brief Implements common functionality for Entropy calibrators.
//!
template <typename TBatchStream> class EntropyCalibratorImpl {
public:
EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
std::string networkName, const char* inputBlobName,
bool readCache = true)
: mStream{stream},
mCalibrationTableName("CalibrationTable" + networkName),
mInputBlobName(inputBlobName), mReadCache(readCache) {
nvinfer1::Dims dims = mStream.getDims();
mInputCount = samplesCommon::volume(dims);
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
mStream.reset(firstBatch);
}
virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
int getBatchSize() const noexcept { return mStream.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept {
if (!mStream.next()) {
return false;
}
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
mInputCount * sizeof(float), cudaMemcpyHostToDevice));
ASSERT(!strcmp(names[0], mInputBlobName));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) noexcept {
mCalibrationCache.clear();
std::ifstream input(mCalibrationTableName, std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good()) {
std::copy(std::istream_iterator<char>(input),
std::istream_iterator<char>(),
std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? mCalibrationCache.data() : nullptr;
}
void writeCalibrationCache(const void* cache, size_t length) noexcept {
std::ofstream output(mCalibrationTableName, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
private:
TBatchStream mStream;
size_t mInputCount;
std::string mCalibrationTableName;
const char* mInputBlobName;
bool mReadCache{true};
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache;
};
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
template <typename TBatchStream>
class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
public:
Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
const char* networkName, const char* inputBlobName,
bool readCache = true)
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept override {
return mImpl.getBatch(bindings, names, nbBindings);
}
const void* readCalibrationCache(size_t& length) noexcept override {
return mImpl.readCalibrationCache(length);
}
void writeCalibrationCache(const void* cache,
size_t length) noexcept override {
mImpl.writeCalibrationCache(cache, length);
}
private:
EntropyCalibratorImpl<TBatchStream> mImpl;
};
#endif // ENTROPY_CALIBRATOR_H

View File

@@ -1,115 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ERROR_RECORDER_H
#define ERROR_RECORDER_H
#include "NvInferRuntimeCommon.h"
#include "logger.h"
#include <atomic>
#include <cstdint>
#include <exception>
#include <mutex>
#include <vector>
using nvinfer1::ErrorCode;
using nvinfer1::IErrorRecorder;
//!
//! A simple implementation of the IErrorRecorder interface for
//! use by samples. This interface also can be used as a reference
//! implementation.
//! The sample Error recorder is based on a vector that pairs the error
//! code and the error string into a single element. It also uses
//! standard mutex's and atomics in order to make sure that the code
//! works in a multi-threaded environment.
//!
class SampleErrorRecorder : public IErrorRecorder {
using errorPair = std::pair<ErrorCode, std::string>;
using errorStack = std::vector<errorPair>;
public:
SampleErrorRecorder() = default;
virtual ~SampleErrorRecorder() noexcept {}
int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
: (*this)[errorIdx].first;
};
IErrorRecorder::ErrorDesc
getErrorDesc(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
: (*this)[errorIdx].second.c_str();
}
// This class can never overflow since we have dynamic resize via std::vector
// usage.
bool hasOverflowed() const noexcept final { return false; }
// Empty the errorStack.
void clear() noexcept final {
try {
// grab a lock so that there is no addition while clearing.
std::lock_guard<std::mutex> guard(mStackLock);
mErrorStack.clear();
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
};
//! Simple helper function that
bool empty() const noexcept { return mErrorStack.empty(); }
bool reportError(ErrorCode val,
IErrorRecorder::ErrorDesc desc) noexcept final {
try {
std::lock_guard<std::mutex> guard(mStackLock);
sample::gLogError << "Error[" << static_cast<int32_t>(val)
<< "]: " << desc << std::endl;
mErrorStack.push_back(errorPair(val, desc));
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
// All errors are considered fatal.
return true;
}
// Atomically increment or decrement the ref counter.
IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
private:
// Simple helper functions.
const errorPair& operator[](size_t index) const noexcept {
return mErrorStack[index];
}
bool invalidIndexCheck(int32_t index) const noexcept {
// By converting signed to unsigned, we only need a single check since
// negative numbers turn into large positive greater than the size.
size_t sIndex = index;
return sIndex >= mErrorStack.size();
}
// Mutex to hold when locking mErrorStack.
std::mutex mStackLock;
// Reference count of the class. Destruction of the class when mRefCount
// is not zero causes undefined behavior.
std::atomic<int32_t> mRefCount{0};
// The error stack that holds the errors recorded by TensorRT.
errorStack mErrorStack;
}; // class SampleErrorRecorder
#endif // ERROR_RECORDER_H

View File

@@ -1 +0,0 @@
目录代码来源自 https://github.com/NVIDIA/TensorRT

View File

@@ -1,169 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_ARGS_PARSER_H
#define TENSORRT_ARGS_PARSER_H
#include <string>
#include <vector>
#ifdef _MSC_VER
#include ".\windows\getopt.h"
#else
#include <getopt.h>
#endif
#include <iostream>
namespace samplesCommon {
//!
//! \brief The SampleParams structure groups the basic parameters required by
//! all sample networks.
//!
struct SampleParams {
int32_t batchSize{1}; //!< Number of inputs in a batch
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
bool int8{false}; //!< Allow runnning the network in Int8 mode.
bool fp16{false}; //!< Allow running the network in FP16 mode.
std::vector<std::string>
dataDirs; //!< Directory paths where sample data files are stored
std::vector<std::string> inputTensorNames;
std::vector<std::string> outputTensorNames;
};
//!
//! \brief The CaffeSampleParams structure groups the additional parameters
//! required by
//! networks that use caffe
//!
struct CaffeSampleParams : public SampleParams {
std::string
prototxtFileName; //!< Filename of prototxt design file of a network
std::string
weightsFileName; //!< Filename of trained weights file of a network
std::string meanFileName; //!< Filename of mean file of a network
};
//!
//! \brief The OnnxSampleParams structure groups the additional parameters
//! required by
//! networks that use ONNX
//!
struct OnnxSampleParams : public SampleParams {
std::string onnxFileName; //!< Filename of ONNX file of a network
};
//!
//! \brief The UffSampleParams structure groups the additional parameters
//! required by
//! networks that use Uff
//!
struct UffSampleParams : public SampleParams {
std::string uffFileName; //!< Filename of uff file of a network
};
//!
//! /brief Struct to maintain command-line arguments.
//!
struct Args {
bool runInInt8{false};
bool runInFp16{false};
bool help{false};
int32_t useDLACore{-1};
int32_t batch{1};
std::vector<std::string> dataDirs;
std::string saveEngine;
std::string loadEngine;
bool useILoop{false};
};
//!
//! \brief Populates the Args struct with the provided command-line parameters.
//!
//! \throw invalid_argument if any of the arguments are not valid
//!
//! \return boolean If return value is true, execution can continue, otherwise
//! program should exit
//!
inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
while (1) {
int32_t arg;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"datadir", required_argument, 0, 'd'},
{"int8", no_argument, 0, 'i'},
{"fp16", no_argument, 0, 'f'},
{"useILoop", no_argument, 0, 'l'},
{"saveEngine", required_argument, 0, 's'},
{"loadEngine", no_argument, 0, 'o'},
{"useDLACore", required_argument, 0, 'u'},
{"batch", required_argument, 0, 'b'},
{nullptr, 0, nullptr, 0}};
int32_t option_index = 0;
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
if (arg == -1) {
break;
}
switch (arg) {
case 'h':
args.help = true;
return true;
case 'd':
if (optarg) {
args.dataDirs.push_back(optarg);
} else {
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
return false;
}
break;
case 's':
if (optarg) {
args.saveEngine = optarg;
}
break;
case 'o':
if (optarg) {
args.loadEngine = optarg;
}
break;
case 'i':
args.runInInt8 = true;
break;
case 'f':
args.runInFp16 = true;
break;
case 'l':
args.useILoop = true;
break;
case 'u':
if (optarg) {
args.useDLACore = std::stoi(optarg);
}
break;
case 'b':
if (optarg) {
args.batch = std::stoi(optarg);
}
break;
default:
return false;
}
}
return true;
}
} // namespace samplesCommon
#endif // TENSORRT_ARGS_PARSER_H

View File

@@ -1,426 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_BUFFERS_H
#define TENSORRT_BUFFERS_H
#include "NvInfer.h"
#include "common.h"
#include "half.h"
#include <cassert>
#include <cuda_runtime_api.h>
#include <iostream>
#include <iterator>
#include <memory>
#include <new>
#include <numeric>
#include <string>
#include <vector>
namespace samplesCommon {
//!
//! \brief The GenericBuffer class is a templated class for buffers.
//!
//! \details This templated RAII (Resource Acquisition Is Initialization) class
//! handles the allocation,
//! deallocation, querying of buffers on both the device and the host.
//! It can handle data of arbitrary types because it stores byte
//! buffers.
//! The template parameters AllocFunc and FreeFunc are used for the
//! allocation and deallocation of the buffer.
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
//! and returns bool. ptr is a pointer to where the allocated buffer
//! address should be stored.
//! size is the amount of memory in bytes to allocate.
//! The boolean indicates whether or not the memory allocation was
//! successful.
//! FreeFunc must be a functor that takes in (void* ptr) and returns
//! void.
//! ptr is the allocated buffer address. It must work with nullptr
//! input.
//!
template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
public:
//!
//! \brief Construct an empty buffer.
//!
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
//!
//! \brief Construct a buffer with the specified allocation size in bytes.
//!
GenericBuffer(size_t size, nvinfer1::DataType type)
: mSize(size), mCapacity(size), mType(type) {
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc();
}
}
GenericBuffer(GenericBuffer&& buf)
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
mBuffer(buf.mBuffer) {
buf.mSize = 0;
buf.mCapacity = 0;
buf.mType = nvinfer1::DataType::kFLOAT;
buf.mBuffer = nullptr;
}
GenericBuffer& operator=(GenericBuffer&& buf) {
if (this != &buf) {
freeFn(mBuffer);
mSize = buf.mSize;
mCapacity = buf.mCapacity;
mType = buf.mType;
mBuffer = buf.mBuffer;
// Reset buf.
buf.mSize = 0;
buf.mCapacity = 0;
buf.mBuffer = nullptr;
}
return *this;
}
//!
//! \brief Returns pointer to underlying array.
//!
void* data() { return mBuffer; }
//!
//! \brief Returns pointer to underlying array.
//!
const void* data() const { return mBuffer; }
//!
//! \brief Returns the size (in number of elements) of the buffer.
//!
size_t size() const { return mSize; }
//!
//! \brief Returns the size (in bytes) of the buffer.
//!
size_t nbBytes() const {
return this->size() * samplesCommon::getElementSize(mType);
}
//!
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
//! or equal to the current capacity.
//!
void resize(size_t newSize) {
mSize = newSize;
if (mCapacity < newSize) {
freeFn(mBuffer);
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc{};
}
mCapacity = newSize;
}
}
//!
//! \brief Overload of resize that accepts Dims
//!
void resize(const nvinfer1::Dims& dims) {
return this->resize(samplesCommon::volume(dims));
}
~GenericBuffer() { freeFn(mBuffer); }
private:
size_t mSize{0}, mCapacity{0};
nvinfer1::DataType mType;
void* mBuffer;
AllocFunc allocFn;
FreeFunc freeFn;
};
class DeviceAllocator {
public:
bool operator()(void** ptr, size_t size) const {
return cudaMalloc(ptr, size) == cudaSuccess;
}
};
class DeviceFree {
public:
void operator()(void* ptr) const { cudaFree(ptr); }
};
class HostAllocator {
public:
bool operator()(void** ptr, size_t size) const {
*ptr = malloc(size);
return *ptr != nullptr;
}
};
class HostFree {
public:
void operator()(void* ptr) const { free(ptr); }
};
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
//!
//! \brief The ManagedBuffer class groups together a pair of corresponding
//! device and host buffers.
//!
class ManagedBuffer {
public:
DeviceBuffer deviceBuffer;
HostBuffer hostBuffer;
};
//!
//! \brief The BufferManager class handles host and device buffer allocation
//! and deallocation.
//!
//! \details This RAII class handles host and device buffer allocation and
//! deallocation,
//! memcpy between host and device buffers to aid with inference,
//! and debugging dumps to validate inference. The BufferManager class
//! is meant to be
//! used to simplify buffer management and any interactions between
//! buffers and the engine.
//!
class BufferManager {
public:
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
//!
//! \brief Create a BufferManager for handling buffer interactions with
//! engine.
//!
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
const int batchSize = 0,
const nvinfer1::IExecutionContext* context = nullptr)
: mEngine(engine), mBatchSize(batchSize) {
// Full Dims implies no batch size.
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
// Create host and device buffers
for (int i = 0; i < mEngine->getNbBindings(); i++) {
auto dims = context ? context->getBindingDimensions(i)
: mEngine->getBindingDimensions(i);
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
nvinfer1::DataType type = mEngine->getBindingDataType(i);
int vecDim = mEngine->getBindingVectorizedDim(i);
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
{
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
vol *= scalarsPerVec;
}
vol *= samplesCommon::volume(dims);
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
manBuf->deviceBuffer = DeviceBuffer(vol, type);
manBuf->hostBuffer = HostBuffer(vol, type);
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
mManagedBuffers.emplace_back(std::move(manBuf));
}
}
//!
//! \brief Returns a vector of device buffers that you can use directly as
//! bindings for the execute and enqueue methods of IExecutionContext.
//!
std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
//!
//! \brief Returns a vector of device buffers.
//!
const std::vector<void*>& getDeviceBindings() const {
return mDeviceBindings;
}
//!
//! \brief Returns the device buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getDeviceBuffer(const std::string& tensorName) const {
return getBuffer(false, tensorName);
}
//!
//! \brief Returns the host buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getHostBuffer(const std::string& tensorName) const {
return getBuffer(true, tensorName);
}
//!
//! \brief Returns the size of the host and device buffers that correspond to
//! tensorName.
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
//!
size_t size(const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return kINVALID_SIZE_VALUE;
return mManagedBuffers[index]->hostBuffer.nbBytes();
}
//!
//! \brief Dump host buffer with specified tensorName to ostream.
//! Prints error message to std::ostream if no such tensor can be
//! found.
//!
void dumpBuffer(std::ostream& os, const std::string& tensorName) {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1) {
os << "Invalid tensor name" << std::endl;
return;
}
void* buf = mManagedBuffers[index]->hostBuffer.data();
size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
size_t rowCount = static_cast<size_t>(
bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
int leadDim = mBatchSize;
int* trailDims = bufDims.d;
int nbDims = bufDims.nbDims;
// Fix explicit Dimension networks
if (!leadDim && nbDims > 0) {
leadDim = bufDims.d[0];
++trailDims;
--nbDims;
}
os << "[" << leadDim;
for (int i = 0; i < nbDims; i++)
os << ", " << trailDims[i];
os << "]" << std::endl;
switch (mEngine->getBindingDataType(index)) {
case nvinfer1::DataType::kINT32:
print<int32_t>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kFLOAT:
print<float>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kHALF:
print<half_float::half>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kINT8:
assert(0 && "Int8 network-level input and output is not supported");
break;
case nvinfer1::DataType::kBOOL:
assert(0 && "Bool network-level input and output are not supported");
break;
}
}
//!
//! \brief Templated print function that dumps buffers of arbitrary type to
//! std::ostream.
//! rowCount parameter controls how many elements are on each line.
//! A rowCount of 1 means that there is only 1 element on each line.
//!
template <typename T>
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
assert(rowCount != 0);
assert(bufSize % sizeof(T) == 0);
T* typedBuf = static_cast<T*>(buf);
size_t numItems = bufSize / sizeof(T);
for (int i = 0; i < static_cast<int>(numItems); i++) {
// Handle rowCount == 1 case
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
os << typedBuf[i] << std::endl;
else if (rowCount == 1)
os << typedBuf[i];
// Handle rowCount > 1 case
else if (i % rowCount == 0)
os << typedBuf[i];
else if (i % rowCount == rowCount - 1)
os << " " << typedBuf[i] << std::endl;
else
os << " " << typedBuf[i];
}
}
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! synchronously.
//!
void copyInputToDevice() { memcpyBuffers(true, false, false); }
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! synchronously.
//!
void copyOutputToHost() { memcpyBuffers(false, true, false); }
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! asynchronously.
//!
void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(true, false, true, stream);
}
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! asynchronously.
//!
void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(false, true, true, stream);
}
~BufferManager() = default;
private:
void* getBuffer(const bool isHost, const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return nullptr;
return (isHost ? mManagedBuffers[index]->hostBuffer.data()
: mManagedBuffers[index]->deviceBuffer.data());
}
void memcpyBuffers(const bool copyInput, const bool deviceToHost,
const bool async, const cudaStream_t& stream = 0) {
for (int i = 0; i < mEngine->getNbBindings(); i++) {
void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
: mManagedBuffers[i]->deviceBuffer.data();
const void* srcPtr = deviceToHost
? mManagedBuffers[i]->deviceBuffer.data()
: mManagedBuffers[i]->hostBuffer.data();
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
const cudaMemcpyKind memcpyType =
deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
if ((copyInput && mEngine->bindingIsInput(i)) ||
(!copyInput && !mEngine->bindingIsInput(i))) {
if (async)
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
else
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
}
}
}
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
std::vector<std::unique_ptr<ManagedBuffer>>
mManagedBuffers; //!< The vector of pointers to managed buffers
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
//! for engine execution
};
} // namespace samplesCommon
#endif // TENSORRT_BUFFERS_H

View File

@@ -1,844 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_COMMON_H
#define TENSORRT_COMMON_H
// For loadLibrary
#ifdef _MSC_VER
// Needed so that the max/min definitions in windows.h do not conflict with
// std::max/min.
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX
#else
#include <dlfcn.h>
#endif
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cmath>
#include <cstring>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ratio>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "safeCommon.h"
using namespace nvinfer1;
using namespace plugin;
#ifdef _MSC_VER
#define FN_NAME __FUNCTION__
#else
#define FN_NAME __func__
#endif
#if defined(__aarch64__) || defined(__QNX__)
#define ENABLE_DLA_API 1
#endif
#define CHECK_RETURN_W_MSG(status, val, errMsg) \
do { \
if (!(status)) { \
sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
<< FN_NAME << "(), line " << __LINE__ << std::endl; \
return val; \
} \
} while (0)
#undef ASSERT
#define ASSERT(condition) \
do { \
if (!(condition)) { \
sample::gLogError << "Assertion failure: " << #condition << std::endl; \
abort(); \
} \
} while (0)
#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
auto deleter = [](T* t) { t->destroy(); };
return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
}
constexpr long double operator"" _GiB(long double val) {
return val * (1 << 30);
}
constexpr long double operator"" _MiB(long double val) {
return val * (1 << 20);
}
constexpr long double operator"" _KiB(long double val) {
return val * (1 << 10);
}
// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
// Since the return type is signed, -1_GiB will work as expected.
constexpr long long int operator"" _GiB(unsigned long long val) {
return val * (1 << 30);
}
constexpr long long int operator"" _MiB(unsigned long long val) {
return val * (1 << 20);
}
constexpr long long int operator"" _KiB(unsigned long long val) {
return val * (1 << 10);
}
struct SimpleProfiler : public nvinfer1::IProfiler {
struct Record {
float time{0};
int count{0};
};
virtual void reportLayerTime(const char* layerName, float ms) noexcept {
mProfile[layerName].count++;
mProfile[layerName].time += ms;
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
mLayerNames.end()) {
mLayerNames.push_back(layerName);
}
}
SimpleProfiler(const char* name,
const std::vector<SimpleProfiler>& srcProfilers =
std::vector<SimpleProfiler>())
: mName(name) {
for (const auto& srcProfiler : srcProfilers) {
for (const auto& rec : srcProfiler.mProfile) {
auto it = mProfile.find(rec.first);
if (it == mProfile.end()) {
mProfile.insert(rec);
} else {
it->second.time += rec.second.time;
it->second.count += rec.second.count;
}
}
}
}
friend std::ostream& operator<<(std::ostream& out,
const SimpleProfiler& value) {
out << "========== " << value.mName << " profile ==========" << std::endl;
float totalTime = 0;
std::string layerNameStr = "TensorRT layer name";
int maxLayerNameLength =
std::max(static_cast<int>(layerNameStr.size()), 70);
for (const auto& elem : value.mProfile) {
totalTime += elem.second.time;
maxLayerNameLength =
std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
}
auto old_settings = out.flags();
auto old_precision = out.precision();
// Output header
{
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
out << std::setw(12) << "Runtime, "
<< "%"
<< " ";
out << std::setw(12) << "Invocations"
<< " ";
out << std::setw(12) << "Runtime, ms" << std::endl;
}
for (size_t i = 0; i < value.mLayerNames.size(); i++) {
const std::string layerName = value.mLayerNames[i];
auto elem = value.mProfile.at(layerName);
out << std::setw(maxLayerNameLength) << layerName << " ";
out << std::setw(12) << std::fixed << std::setprecision(1)
<< (elem.time * 100.0F / totalTime) << "%"
<< " ";
out << std::setw(12) << elem.count << " ";
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
<< std::endl;
}
out.flags(old_settings);
out.precision(old_precision);
out << "========== " << value.mName << " total runtime = " << totalTime
<< " ms ==========" << std::endl;
return out;
}
private:
std::string mName;
std::vector<std::string> mLayerNames;
std::map<std::string, Record> mProfile;
};
//! Locate path to file, given its filename or filepath suffix and possible dirs
//! it might lie in.
//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
//! file path.
inline std::string locateFile(const std::string& filepathSuffix,
const std::vector<std::string>& directories,
bool reportError = true) {
const int MAX_DEPTH{10};
bool found{false};
std::string filepath;
for (auto& dir : directories) {
if (!dir.empty() && dir.back() != '/') {
#ifdef _MSC_VER
filepath = dir + "\\" + filepathSuffix;
#else
filepath = dir + "/" + filepathSuffix;
#endif
} else {
filepath = dir + filepathSuffix;
}
for (int i = 0; i < MAX_DEPTH && !found; i++) {
const std::ifstream checkFile(filepath);
found = checkFile.is_open();
if (found) {
break;
}
filepath = "../" + filepath; // Try again in parent dir
}
if (found) {
break;
}
filepath.clear();
}
// Could not find the file
if (filepath.empty()) {
const std::string dirList = std::accumulate(
directories.begin() + 1, directories.end(), directories.front(),
[](const std::string& a, const std::string& b) {
return a + "\n\t" + b;
});
std::cout << "Could not find " << filepathSuffix
<< " in data directories:\n\t" << dirList << std::endl;
if (reportError) {
std::cout << "&&&& FAILED" << std::endl;
exit(EXIT_FAILURE);
}
}
return filepath;
}
inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
int inW) {
std::ifstream infile(fileName, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
std::string magic, h, w, max;
infile >> magic >> h >> w >> max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(buffer), inH * inW);
}
namespace samplesCommon {
// Swaps endianness of an integral type.
template <typename T,
typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
inline T swapEndianness(const T& value) {
uint8_t bytes[sizeof(T)];
for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
}
return *reinterpret_cast<T*>(bytes);
}
class HostMemory {
public:
HostMemory() = delete;
virtual void* data() const noexcept { return mData; }
virtual std::size_t size() const noexcept { return mSize; }
virtual DataType type() const noexcept { return mType; }
virtual ~HostMemory() {}
protected:
HostMemory(std::size_t size, DataType type)
: mData{nullptr}, mSize(size), mType(type) {}
void* mData;
std::size_t mSize;
DataType mType;
};
template <typename ElemType, DataType dataType>
class TypedHostMemory : public HostMemory {
public:
explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
mData = new ElemType[size];
};
~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
};
using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
inline void* safeCudaMalloc(size_t memSize) {
void* deviceMem;
CHECK(cudaMalloc(&deviceMem, memSize));
if (deviceMem == nullptr) {
std::cerr << "Out of memory" << std::endl;
exit(1);
}
return deviceMem;
}
inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
struct InferDeleter {
template <typename T> void operator()(T* obj) const { delete obj; }
};
template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
static auto StreamDeleter = [](cudaStream_t* pStream) {
if (pStream) {
cudaStreamDestroy(*pStream);
delete pStream;
}
};
inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
new cudaStream_t, StreamDeleter);
if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
cudaSuccess) {
pStream.reset(nullptr);
}
return pStream;
}
//! Return vector of indices that puts magnitudes of sequence in descending
//! order.
template <class Iter>
std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
std::vector<size_t> indices(end - begin);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
return std::abs(begin[j]) < std::abs(begin[i]);
});
return indices;
}
inline bool readReferenceFile(const std::string& fileName,
std::vector<std::string>& refVector) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
"that is not open."
<< std::endl;
return false;
}
std::string line;
while (std::getline(infile, line)) {
if (line.empty())
continue;
refVector.push_back(line);
}
infile.close();
return true;
}
template <typename T>
std::vector<std::string> classify(const std::vector<std::string>& refVector,
const std::vector<T>& output,
const size_t topK) {
const auto inds =
samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
std::vector<std::string> result;
result.reserve(topK);
for (size_t k = 0; k < topK; ++k) {
result.push_back(refVector[inds[k]]);
}
return result;
}
// Returns indices of highest K magnitudes in v.
template <typename T>
std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
std::vector<size_t> indices =
samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
indices.resize(k);
return indices;
}
template <typename T>
bool readASCIIFile(const std::string& fileName, const size_t size,
std::vector<T>& out) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
"not open."
<< std::endl;
return false;
}
out.clear();
out.reserve(size);
out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
infile.close();
return true;
}
template <typename T>
bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
std::ofstream outfile(fileName);
if (!outfile.is_open()) {
std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
"not open."
<< std::endl;
return false;
}
for (auto fn : in) {
outfile << fn << "\n";
}
outfile.close();
return true;
}
inline void print_version() {
std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "."
<< NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
<< NV_TENSORRT_BUILD << std::endl;
}
inline std::string getFileType(const std::string& filepath) {
return filepath.substr(filepath.find_last_of(".") + 1);
}
inline std::string toLower(const std::string& inp) {
std::string out = inp;
std::transform(out.begin(), out.end(), out.begin(), ::tolower);
return out;
}
inline float getMaxValue(const float* buffer, int64_t size) {
assert(buffer != nullptr);
assert(size > 0);
return *std::max_element(buffer, buffer + size);
}
// Ensures that every tensor used by a network has a dynamic range set.
//
// All tensors in a network must have a dynamic range specified if a calibrator
// is not used.
// This function is just a utility to globally fill in missing scales and
// zero-points for the entire network.
//
// If a tensor does not have a dyanamic range set, it is assigned inRange or
// outRange as follows:
//
// * If the tensor is the input to a layer or output of a pooling node, its
// dynamic range is derived from inRange.
// * Otherwise its dynamic range is derived from outRange.
//
// The default parameter values are intended to demonstrate, for final layers in
// the network,
// cases where dynamic ranges are asymmetric.
//
// The default parameter values choosen arbitrarily. Range values should be
// choosen such that
// we avoid underflow or overflow. Also range value should be non zero to avoid
// uniform zero scale tensor.
inline void setAllDynamicRanges(INetworkDefinition* network,
float inRange = 2.0f, float outRange = 4.0f) {
// Ensure that all layer inputs have a scale.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbInputs(); j++) {
ITensor* input{layer->getInput(j)};
// Optional inputs are nullptr here and are from RNN layers.
if (input != nullptr && !input->dynamicRangeIsSet()) {
ASSERT(input->setDynamicRange(-inRange, inRange));
}
}
}
// Ensure that all layer outputs have a scale.
// Tensors that are also inputs to layers are ingored here
// since the previous loop nest assigned scales to them.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbOutputs(); j++) {
ITensor* output{layer->getOutput(j)};
// Optional outputs are nullptr here and are from RNN layers.
if (output != nullptr && !output->dynamicRangeIsSet()) {
// Pooling must have the same input and output scales.
if (layer->getType() == LayerType::kPOOLING) {
ASSERT(output->setDynamicRange(-inRange, inRange));
} else {
ASSERT(output->setDynamicRange(-outRange, outRange));
}
}
}
}
}
inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
INetworkDefinition* n) {
// Set dummy per-tensor dynamic range if Int8 mode is requested.
if (c->getFlag(BuilderFlag::kINT8)) {
sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
"per-tensor dynamic range. Int8 accuracy is not "
"guaranteed."
<< std::endl;
setAllDynamicRanges(n);
}
}
inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
bool allowGPUFallback = true) {
if (useDLACore >= 0) {
if (builder->getNbDLACores() == 0) {
std::cerr << "Trying to use DLA core " << useDLACore
<< " on a platform that doesn't have any DLA cores"
<< std::endl;
assert(
"Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
false);
}
if (allowGPUFallback) {
config->setFlag(BuilderFlag::kGPU_FALLBACK);
}
if (!config->getFlag(BuilderFlag::kINT8)) {
// User has not requested INT8 Mode.
// By default run in FP16 mode. FP32 mode is not permitted.
config->setFlag(BuilderFlag::kFP16);
}
config->setDefaultDeviceType(DeviceType::kDLA);
config->setDLACore(useDLACore);
}
}
inline int32_t parseDLA(int32_t argc, char** argv) {
for (int32_t i = 1; i < argc; i++) {
if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
return std::stoi(argv[i] + 13);
}
}
return -1;
}
inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
switch (t) {
case nvinfer1::DataType::kINT32:
return 4;
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
inline int64_t volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
template <int C, int H, int W> struct PPM {
std::string magic, fileName;
int h, w, max;
uint8_t buffer[C * H * W];
};
// New vPPM(variable sized PPM) class with variable dimensions.
struct vPPM {
std::string magic, fileName;
int h, w, max;
std::vector<uint8_t> buffer;
};
struct BBox {
float x1, y1, x2, y2;
};
template <int C, int H, int W>
void readPPMFile(const std::string& filename,
samplesCommon::PPM<C, H, W>& ppm) {
ppm.fileName = filename;
std::ifstream infile(filename, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void readPPMFile(const std::string& filename, vPPM& ppm,
std::vector<std::string>& input_dir) {
ppm.fileName = filename;
std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
ppm.buffer.push_back(0);
}
infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
template <int C, int H, int W>
void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
const BBox& bbox) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
for (int x = x1; x <= x2; ++x) {
// bbox top border
ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
}
for (int y = y1; y <= y2; ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + x1) * 3] = 255;
ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + x2) * 3] = 255;
ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
}
outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
std::vector<BBox>& dets) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
for (auto bbox : dets) {
for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
// bbox top border
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
}
for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
}
}
outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
class TimerBase {
public:
virtual void start() {}
virtual void stop() {}
float microseconds() const noexcept { return mMs * 1000.f; }
float milliseconds() const noexcept { return mMs; }
float seconds() const noexcept { return mMs / 1000.f; }
void reset() noexcept { mMs = 0.f; }
protected:
float mMs{0.0f};
};
class GpuTimer : public TimerBase {
public:
explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
CHECK(cudaEventCreate(&mStart));
CHECK(cudaEventCreate(&mStop));
}
~GpuTimer() {
CHECK(cudaEventDestroy(mStart));
CHECK(cudaEventDestroy(mStop));
}
void start() { CHECK(cudaEventRecord(mStart, mStream)); }
void stop() {
CHECK(cudaEventRecord(mStop, mStream));
float ms{0.0f};
CHECK(cudaEventSynchronize(mStop));
CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
mMs += ms;
}
private:
cudaEvent_t mStart, mStop;
cudaStream_t mStream;
}; // class GpuTimer
template <typename Clock> class CpuTimer : public TimerBase {
public:
using clock_type = Clock;
void start() { mStart = Clock::now(); }
void stop() {
mStop = Clock::now();
mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
}
private:
std::chrono::time_point<Clock> mStart, mStop;
}; // class CpuTimer
using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
inline std::vector<std::string> splitString(std::string str,
char delimiter = ',') {
std::vector<std::string> splitVect;
std::stringstream ss(str);
std::string substr;
while (ss.good()) {
getline(ss, substr, delimiter);
splitVect.emplace_back(std::move(substr));
}
return splitVect;
}
// Return m rounded up to nearest multiple of n
inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
inline void loadLibrary(const std::string& path) {
#ifdef _MSC_VER
void* handle = LoadLibrary(path.c_str());
#else
int32_t flags{RTLD_LAZY};
#if ENABLE_ASAN
// https://github.com/google/sanitizers/issues/89
// asan doesn't handle module unloading correctly and there are no plans on
// doing
// so. In order to get proper stack traces, don't delete the shared library on
// close so that asan can resolve the symbols correctly.
flags |= RTLD_NODELETE;
#endif // ENABLE_ASAN
void* handle = dlopen(path.c_str(), flags);
#endif
if (handle == nullptr) {
#ifdef _MSC_VER
sample::gLogError << "Could not load plugin library: " << path << std::endl;
#else
sample::gLogError << "Could not load plugin library: " << path
<< ", due to: " << dlerror() << std::endl;
#endif
}
}
inline int32_t getSMVersion() {
int32_t deviceIndex = 0;
CHECK(cudaGetDevice(&deviceIndex));
int32_t major, minor;
CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
deviceIndex));
CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
deviceIndex));
return ((major << 8) | minor);
}
inline bool isSMSafe() {
const int32_t smVersion = getSMVersion();
return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
}
inline bool isDataTypeSupported(DataType dataType) {
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder) {
return false;
}
if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
(dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
return false;
}
return true;
}
} // namespace samplesCommon
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
os << "(";
for (int i = 0; i < dims.nbDims; ++i) {
os << (i ? ", " : "") << dims.d[i];
}
return os << ")";
}
#endif // TENSORRT_COMMON_H

View File

@@ -1,223 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "getOptions.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstring>
#include <set>
namespace nvinfer1 {
namespace utility {
//! Matching for TRTOptions is defined as follows:
//!
//! If A and B both have longName set, A matches B if and only if A.longName ==
//! B.longName and (A.shortName == B.shortName if both have short name set).
//!
//! If A only has shortName set and B only has longName set, then A does not
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
//! the definition of a TRTOption in the input to getOptions. As such, if the
//! definition only has shortName set, it will never be equal to a TRTOption
//! that does not have shortName set (and same for longName).
//!
//! If A and B both have shortName set but B does not have longName set, A
//! matches B if and only if A.shortName == B.shortName.
//!
//! If A has neither long or short name set, A matches B if and only if B has
//! neither long or short name set.
bool matches(const TRTOption& a, const TRTOption& b) {
if (!a.longName.empty() && !b.longName.empty()) {
if (a.shortName && b.shortName) {
return (a.longName == b.longName) && (a.shortName == b.shortName);
}
return a.longName == b.longName;
}
// If only one of them is not set, this will return false anyway.
return a.shortName == b.shortName;
}
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
//! TRTOptions, -1 if not found.
int getTRTOptionIndex(const std::vector<TRTOption>& options,
const TRTOption& opt) {
for (size_t i = 0; i < options.size(); ++i) {
if (matches(opt, options[i])) {
return i;
}
}
return -1;
}
//! validateTRTOption will return a string containing an error message if
//! options
//! contain non-numeric characters, or if there are duplicate option names
//! found.
//! Otherwise, returns the empty string.
std::string validateTRTOption(const std::set<char>& seenShortNames,
const std::set<std::string>& seenLongNames,
const TRTOption& opt) {
if (opt.shortName != 0) {
if (!std::isalnum(opt.shortName)) {
return "Short name '" + std::to_string(opt.shortName) +
"' is non-alphanumeric";
}
if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
return "Short name '" + std::to_string(opt.shortName) +
"' is a duplicate";
}
}
if (!opt.longName.empty()) {
for (const char& c : opt.longName) {
if (!std::isalnum(c) && c != '-' && c != '_') {
return "Long name '" + opt.longName +
"' contains characters that are not '-', '_', or alphanumeric";
}
}
if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
return "Long name '" + opt.longName + "' is a duplicate";
}
}
return "";
}
//! validateTRTOptions will return a string containing an error message if any
//! options contain non-numeric characters, or if there are duplicate option
//! names found. Otherwise, returns the empty string.
std::string validateTRTOptions(const std::vector<TRTOption>& options) {
std::set<char> seenShortNames;
std::set<std::string> seenLongNames;
for (size_t i = 0; i < options.size(); ++i) {
const std::string errMsg =
validateTRTOption(seenShortNames, seenLongNames, options[i]);
if (!errMsg.empty()) {
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
}
seenShortNames.insert(options[i].shortName);
seenLongNames.insert(options[i].longName);
}
return "";
}
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
//! fields set accordingly. Assumes that options is validated.
//! ErrMsg will be set if:
//! - an argument is null
//! - an argument is empty
//! - an argument does not have option (i.e. "-" and "--")
//! - a short argument has more than 1 character
//! - the last argument in the list requires a value
TRTParsedArgs parseArgs(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
TRTParsedArgs parsedArgs;
parsedArgs.values.resize(options.size());
for (int i = 1; i < argc; ++i) // index of current command-line argument
{
if (argv[i] == nullptr) {
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
}
const std::string argStr(argv[i]);
if (argStr.empty()) {
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
}
// No starting hyphen means it is a positional argument
if (argStr[0] != '-') {
parsedArgs.positionalArgs.push_back(argStr);
continue;
}
if (argStr == "-" || argStr == "--") {
return TRTParsedArgs{"Argument does not specify an option at index " +
std::to_string(i)};
}
// If only 1 hyphen, char after is the flag.
TRTOption opt{' ', "", false, ""};
std::string value;
if (argStr[1] != '-') {
// Must only have 1 char after the hyphen
if (argStr.size() > 2) {
return TRTParsedArgs{
"Short arg contains more than 1 character at index " +
std::to_string(i)};
}
opt.shortName = argStr[1];
} else {
opt.longName = argStr.substr(2);
// We need to support --foo=bar syntax, so look for '='
const size_t eqIndex = opt.longName.find('=');
if (eqIndex < opt.longName.size()) {
value = opt.longName.substr(eqIndex + 1);
opt.longName = opt.longName.substr(0, eqIndex);
}
}
const int idx = getTRTOptionIndex(options, opt);
if (idx < 0) {
continue;
}
if (options[idx].valueRequired) {
if (!value.empty()) {
parsedArgs.values[idx].second.push_back(value);
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
continue;
}
if (i + 1 >= argc) {
return TRTParsedArgs{"Last argument requires value, but none given"};
}
const std::string nextArg(argv[i + 1]);
if (nextArg.size() >= 1 && nextArg[0] == '-') {
sample::gLogWarning << "Warning: Using '" << nextArg
<< "' as a value for '" << argStr
<< "', Should this be its own flag?" << std::endl;
}
parsedArgs.values[idx].second.push_back(nextArg);
i += 1; // Next argument already consumed
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
} else {
parsedArgs.values[idx].first += 1;
}
}
return parsedArgs;
}
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
const std::string errMsg = validateTRTOptions(options);
if (!errMsg.empty()) {
return TRTParsedArgs{errMsg};
}
return parseArgs(argc, argv, options);
}
} // namespace utility
} // namespace nvinfer1

View File

@@ -1,128 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_GET_OPTIONS_H
#define TRT_GET_OPTIONS_H
#include <string>
#include <utility>
#include <vector>
namespace nvinfer1 {
namespace utility {
//! TRTOption defines a command line option. At least 1 of shortName and
//! longName
//! must be defined.
//! If bool initialization is undefined behavior on your system, valueRequired
//! must also be explicitly defined.
//! helpText is optional.
struct TRTOption {
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
std::string longName; //!< Option name in long (double hyphen) form (i.e.
//!--foo, --bar)
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4,
//!--foo bar)
std::string helpText; //!< Text to show when printing out the command usage
};
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
//! argument list (argv).
//!
//! errMsg is a string containing an error message if any errors occurred. If it
//! is empty, no errors occurred.
//!
//! values stores a vector of pairs for each option (ordered by order in the
//! input). Each pair contains an int (the number of occurrences) and a vector
//! of strings (a list of values). The user should know which of these to use,
//! and which options required values. For non-value options, only occurrences
//! is
//! populated. For value-required options, occurrences == # of values. Values do
//! not need to be unique.
//!
//! positionalArgs stores additional arguments that are passed in without an
//! option (these must not start with a hyphen).
struct TRTParsedArgs {
std::string errMsg;
std::vector<std::pair<int, std::vector<std::string>>> values;
std::vector<std::string> positionalArgs;
};
//! Parse the input arguments passed to main() and extract options as well as
//! positional arguments.
//!
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
//!
//! If there is a single preceding hyphen, there should be exactly 1 character
//! after the hyphen, which is interpreted as the option.
//!
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
//! is interpreted as the option.
//!
//! If the option requires a value, the next argument is used as the value.
//!
//! Positional arguments must not start with a hyphen.
//!
//! If an argument requires a value, the next argument is interpreted as the
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
//! "--bar" as a value for option "foo" if "foo" requires a value).
//! We also support --name=value syntax. In this case, 'value' would be used as
//! the value, NOT the next argument.
//!
//! For options:
//! { { 'a', "", false },
//! { 'b', "", false },
//! { 0, "cee", false },
//! { 'd', "", true },
//! { 'e', "", true },
//! { 'f', "foo", true } }
//!
//! ./main hello world -a -a --cee -d 12 -f 34
//! and
//! ./main hello world -a -a --cee -d 12 --foo 34
//!
//! will result in:
//!
//! TRTParsedArgs {
//! errMsg: "",
//! values: { { 2, {} },
//! { 0, {} },
//! { 1, {} },
//! { 1, {"12"} },
//! { 0, {} },
//! { 1, {"34"} } }
//! positionalArgs: {"hello", "world"},
//! }
//!
//! Non-POSIX behavior:
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
//! option must have its own hyphen prefix.
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
//! whitespace-separated from the option it is for.
//!
//! @param[in] argc The number of arguments passed to main (including the
//! file name, which is disregarded)
//! @param[in] argv The arguments passed to main (including the file name,
//! which is disregarded)
//! @param[in] options List of TRTOptions to parse
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
//! the fields.
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options);
} // namespace utility
} // namespace nvinfer1
#endif // TRT_GET_OPTIONS_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,38 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "logger.h"
#include "ErrorRecorder.h"
#include "logging.h"
SampleErrorRecorder gRecorder;
namespace sample {
Logger gLogger{Logger::Severity::kINFO};
LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
void setReportableSeverity(Logger::Severity severity) {
gLogger.setReportableSeverity(severity);
gLogVerbose.setReportableSeverity(severity);
gLogInfo.setReportableSeverity(severity);
gLogWarning.setReportableSeverity(severity);
gLogError.setReportableSeverity(severity);
gLogFatal.setReportableSeverity(severity);
}
} // namespace sample

View File

@@ -1,35 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include "logging.h"
class SampleErrorRecorder;
extern SampleErrorRecorder gRecorder;
namespace sample {
extern Logger gLogger;
extern LogStreamConsumer gLogVerbose;
extern LogStreamConsumer gLogInfo;
extern LogStreamConsumer gLogWarning;
extern LogStreamConsumer gLogError;
extern LogStreamConsumer gLogFatal;
void setReportableSeverity(Logger::Severity severity);
} // namespace sample
#endif // LOGGER_H

View File

@@ -1,573 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include "sampleOptions.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ostream>
#include <sstream>
#include <string>
namespace sample {
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf {
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
: mOutput(other.mOutput), mPrefix(other.mPrefix),
mShouldLog(other.mShouldLog) {}
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
LogStreamConsumerBuffer() = delete;
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
~LogStreamConsumerBuffer() override {
// std::streambuf::pbase() gives a pointer to the beginning of the buffered
// part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the
// output sequence
// if the pointer to the beginning is not equal to the pointer to the
// current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr()) {
putOutput();
}
}
//!
//! synchronizes the stream buffer and returns 0 on success
//! synchronizing the stream buffer consists of inserting the buffer contents
//! into the stream,
//! resetting the buffer and flushing the stream
//!
int32_t sync() override {
putOutput();
return 0;
}
void putOutput() {
if (mShouldLog) {
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
mOutput << "[";
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
<< "/";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
<< "-";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into
// the stream
mOutput << mPrefix << str();
}
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog{};
}; // class LogStreamConsumerBuffer
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
//! std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase {
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mBuffer(stream, prefix, shouldLog) {}
protected:
std::mutex mLogMutex;
LogStreamConsumerBuffer mBuffer;
}; // class LogStreamConsumerBase
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when
//! logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the
//! LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to
//! std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from
//! being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
public:
//!
//! \brief Creates a LogStreamConsumer which logs messages with level
//! severity.
//! Reportable severity determines if the messages are severe enough to be
//! logged.
//!
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
nvinfer1::ILogger::Severity severity)
: LogStreamConsumerBase(severityOstream(severity),
severityPrefix(severity),
severity <= reportableSeverity),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
LogStreamConsumer(LogStreamConsumer&& other) noexcept
: LogStreamConsumerBase(severityOstream(other.mSeverity),
severityPrefix(other.mSeverity),
other.mShouldLog),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
LogStreamConsumer(const LogStreamConsumer& other) = delete;
LogStreamConsumer() = delete;
~LogStreamConsumer() = default;
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
void setReportableSeverity(Severity reportableSeverity) {
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
std::mutex& getMutex() { return mLogMutex; }
bool getShouldLog() const { return mShouldLog; }
private:
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
bool mShouldLog;
Severity mSeverity;
}; // class LogStreamConsumer
template <typename T>
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << obj;
}
return logger;
}
//!
//! Special handling std::endl
//!
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
std::ostream& (*f)(std::ostream&)) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << f;
}
return logger;
}
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
const nvinfer1::Dims& dims) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
}
return logger;
}
//!
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and
//! samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or
//! internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to
//! emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output
//! is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results
//! to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
//! duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits
//! directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between
//! messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to
//! access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
//! implementation a member of the Logger
//! object.
//!
class Logger : public nvinfer1::ILogger {
public:
explicit Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity) {}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult {
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger
//! associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the
//! logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will
//! eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) noexcept override {
LogStreamConsumer(mReportableSeverity, severity)
<< "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of
//! this level or higher.
//!
void setReportableSeverity(Severity severity) noexcept {
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print
//! test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom
//! that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom {
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started), mName(name), mCmdline(cmdline) {}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting
//! with
//! "TensorRT" and containing dot-separated strings
//! containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name,
const std::string& cmdline) {
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an
//! array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, int32_t argc,
char const* const* argv) {
// Append TensorRT version as info
const std::string vname =
name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
auto cmdline = genCmdlineString(argc, argv);
return defineTest(vname, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom) {
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of
//! TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int32_t reportPass(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int32_t reportFail(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int32_t reportWaive(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int32_t reportTest(TestAtom const& testAtom, bool pass) {
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const { return mReportableSeverity; }
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the
//! given severity
//!
static const char* severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message
//! with the given result
//!
static const char* testResultString(TestResult result) {
switch (result) {
case TestResult::kRUNNING:
return "RUNNING";
case TestResult::kPASSED:
return "PASSED";
case TestResult::kFAILED:
return "FAILED";
case TestResult::kWAIVED:
return "WAIVED";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the
//! given severity
//!
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(TestAtom const& testAtom, TestResult result) {
severityOstream(Severity::kINFO)
<< "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int32_t argc, char const* const* argv) {
std::stringstream ss;
for (int32_t i = 0; i < argc; i++) {
if (i > 0) {
ss << " ";
}
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
}; // class Logger
namespace {
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINTERNAL_ERROR
//! ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(),
Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
} // namespace sample
#endif // TENSORRT_LOGGING_H

View File

@@ -1,126 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PARSER_ONNX_CONFIG_H
#define PARSER_ONNX_CONFIG_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
#include "NvOnnxParser.h"
#define ONNX_DEBUG 1
/**
* \class ParserOnnxConfig
* \brief Configuration Manager Class Concrete Implementation
*
* \note:
*
*/
using namespace std;
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
protected:
string mModelFilename{};
string mTextFilename{};
string mFullTextFilename{};
nvinfer1::DataType mModelDtype;
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
bool mPrintLayercInfo;
public:
ParserOnnxConfig()
: mModelDtype(nvinfer1::DataType::kFLOAT),
mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
mPrintLayercInfo(false) {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~ParserOnnxConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
mModelDtype = modelDtype;
}
virtual nvinfer1::DataType getModelDtype() const noexcept {
return mModelDtype;
}
virtual const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
virtual void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = string(onnxFilename);
}
virtual nvonnxparser::IOnnxConfig::Verbosity
getVerbosityLevel() const noexcept {
return mVerbosity;
}
virtual void addVerbosity() noexcept { ++mVerbosity; }
virtual void reduceVerbosity() noexcept { --mVerbosity; }
virtual void
setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
mVerbosity = verbosity;
}
virtual const char* getTextFileName() const noexcept {
return mTextFilename.c_str();
}
virtual void setTextFileName(const char* textFilename) noexcept {
mTextFilename = string(textFilename);
}
virtual const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = string(fullTextFilename);
}
virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
virtual void setPrintLayerInfo(bool src) noexcept {
mPrintLayercInfo = src;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
virtual bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
virtual void destroy() noexcept { delete this; }
}; // class ParserOnnxConfig
#endif

View File

@@ -1,65 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_SAFE_COMMON_H
#define TENSORRT_SAFE_COMMON_H
#include "NvInferRuntimeCommon.h"
#include <cstdlib>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
#define CHECK(status) \
do { \
auto ret = (status); \
if (ret != 0) { \
std::cerr << "Cuda failure: " << ret << std::endl; \
abort(); \
} \
} while (0)
namespace samplesCommon {
template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
if (!obj) {
throw std::runtime_error("Failed to create object");
}
return std::shared_ptr<T>(obj);
}
inline uint32_t elementSize(nvinfer1::DataType t) {
switch (t) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kINT8:
return 1;
case nvinfer1::DataType::kBOOL:
return 1;
}
return 0;
}
template <typename A, typename B> inline A divUp(A x, B n) {
return (x + n - 1) / n;
}
} // namespace samplesCommon
#endif // TENSORRT_SAFE_COMMON_H

View File

@@ -1,251 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SampleConfig_H
#define SampleConfig_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
class SampleConfig : public nvonnxparser::IOnnxConfig {
public:
enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
private:
std::string mModelFilename;
std::string mEngineFilename;
std::string mTextFilename;
std::string mFullTextFilename;
std::string mImageFilename;
std::string mReferenceFilename;
std::string mOutputFilename;
std::string mCalibrationFilename;
std::string mTimingCacheFilename;
int64_t mLabel{-1};
int64_t mMaxBatchSize{32};
int64_t mCalibBatchSize{0};
int64_t mMaxNCalibBatch{0};
int64_t mFirstCalibBatch{0};
int64_t mUseDLACore{-1};
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
bool mTF32{true};
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
bool mPrintLayercInfo{false};
bool mDebugBuilder{false};
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
uint64_t mTopK{0};
float mFailurePercentage{-1.0f};
float mTolerance{0.0f};
float mAbsTolerance{1e-5f};
public:
SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "SampleConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
void setModelDtype(const nvinfer1::DataType mdt) noexcept {
mModelDtype = mdt;
}
nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
bool getTF32() const noexcept { return mTF32; }
void setTF32(bool enabled) noexcept { mTF32 = enabled; }
const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = std::string(onnxFilename);
}
Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
void addVerbosity() noexcept { ++mVerbosity; }
void reduceVerbosity() noexcept { --mVerbosity; }
virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
const char* getEngineFileName() const noexcept {
return mEngineFilename.c_str();
}
void setEngineFileName(const char* engineFilename) noexcept {
mEngineFilename = std::string(engineFilename);
}
const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
void setTextFileName(const char* textFilename) noexcept {
mTextFilename = std::string(textFilename);
}
const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = std::string(fullTextFilename);
}
void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label
int64_t getLabel() const noexcept { return mLabel; } //!< get the Label
bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
void setPrintLayerInfo(bool b) noexcept {
mPrintLayercInfo = b;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
void setMaxBatchSize(int64_t maxBatchSize) noexcept {
mMaxBatchSize = maxBatchSize;
} //!< set the Max Batch Size
int64_t getMaxBatchSize() const noexcept {
return mMaxBatchSize;
} //!< get the Max Batch Size
void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
mCalibBatchSize = CalibBatchSize;
} //!< set the calibration batch size
int64_t getCalibBatchSize() const noexcept {
return mCalibBatchSize;
} //!< get calibration batch size
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
mMaxNCalibBatch = MaxNCalibBatch;
} //!< set Max Number of Calibration Batches
int64_t getMaxNCalibBatch() const noexcept {
return mMaxNCalibBatch;
} //!< get the Max Number of Calibration Batches
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
mFirstCalibBatch = FirstCalibBatch;
} //!< set the first calibration batch
int64_t getFirstCalibBatch() const noexcept {
return mFirstCalibBatch;
} //!< get the first calibration batch
void setUseDLACore(int64_t UseDLACore) noexcept {
mUseDLACore = UseDLACore;
} //!< set the DLA core to use
int64_t getUseDLACore() const noexcept {
return mUseDLACore;
} //!< get the DLA core to use
void setDebugBuilder() noexcept {
mDebugBuilder = true;
} //!< enable the Debug info, while building the engine.
bool getDebugBuilder() const noexcept {
return mDebugBuilder;
} //!< get the boolean variable, corresponding to the debug builder
const char*
getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
{
return mImageFilename.c_str();
}
void setImageFileName(
const char* imageFilename) noexcept //!< get the Image file name
{
mImageFilename = std::string(imageFilename);
}
const char* getReferenceFileName() const noexcept {
return mReferenceFilename.c_str();
}
void setReferenceFileName(
const char* referenceFilename) noexcept //!< set reference file name
{
mReferenceFilename = std::string(referenceFilename);
}
void setInputDataFormat(InputDataFormat idt) noexcept {
mInputDataFormat = idt;
} //!< specifies expected data format of the image file (PPM or ASCII)
InputDataFormat getInputDataFormat() const noexcept {
return mInputDataFormat;
} //!< returns the expected data format of the image file.
const char* getOutputFileName()
const noexcept //!< specifies the file to save the results
{
return mOutputFilename.c_str();
}
void setOutputFileName(
const char* outputFilename) noexcept //!< get the output file name
{
mOutputFilename = std::string(outputFilename);
}
const char* getCalibrationFileName() const noexcept {
return mCalibrationFilename.c_str();
} //!< specifies the file containing the list of image files for int8
//! calibration
void setCalibrationFileName(
const char* calibrationFilename) noexcept //!< get the int 8 calibration
//! list file name
{
mCalibrationFilename = std::string(calibrationFilename);
}
uint64_t getTopK() const noexcept { return mTopK; }
void setTopK(uint64_t topK) noexcept {
mTopK = topK;
} //!< If this options is specified, return the K top probabilities.
float getFailurePercentage() const noexcept { return mFailurePercentage; }
void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
float getTolerance() const noexcept { return mTolerance; }
void setTolerance(float t) noexcept { mTolerance = t; }
const char* getTimingCacheFilename() const noexcept {
return mTimingCacheFilename.c_str();
}
void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
mTimingCacheFilename = std::string(timingCacheFilename);
}
bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
void destroy() noexcept { delete this; }
}; // class SampleConfig
#endif

View File

@@ -1,397 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_DEVICE_H
#define TRT_SAMPLE_DEVICE_H
#include <cassert>
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <thread>
namespace sample {
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
if (ret != cudaSuccess) {
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
abort();
}
}
class TrtCudaEvent;
namespace {
void cudaSleep(void* sleep) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
}
} // namespace
//!
//! \class TrtCudaStream
//! \brief Managed CUDA stream
//!
class TrtCudaStream {
public:
TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
TrtCudaStream(const TrtCudaStream&) = delete;
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
TrtCudaStream(TrtCudaStream&&) = delete;
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
cudaStream_t get() const { return mStream; }
void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
void wait(TrtCudaEvent& event);
void sleep(float* ms) {
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
}
private:
cudaStream_t mStream{};
};
//!
//! \class TrtCudaEvent
//! \brief Managed CUDA event
//!
class TrtCudaEvent {
public:
explicit TrtCudaEvent(bool blocking = true) {
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
}
TrtCudaEvent(const TrtCudaEvent&) = delete;
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
TrtCudaEvent(TrtCudaEvent&&) = delete;
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
cudaEvent_t get() const { return mEvent; }
void record(const TrtCudaStream& stream) {
cudaCheck(cudaEventRecord(mEvent, stream.get()));
}
void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
// Returns time elapsed time in milliseconds
float operator-(const TrtCudaEvent& e) const {
float time{0};
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
return time;
}
private:
cudaEvent_t mEvent{};
};
inline void TrtCudaStream::wait(TrtCudaEvent& event) {
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
}
//!
//! \class TrtCudaGraph
//! \brief Managed CUDA graph
//!
class TrtCudaGraph {
public:
explicit TrtCudaGraph() = default;
TrtCudaGraph(const TrtCudaGraph&) = delete;
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
TrtCudaGraph(TrtCudaGraph&&) = delete;
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
~TrtCudaGraph() {
if (mGraphExec) {
cudaGraphExecDestroy(mGraphExec);
}
}
void beginCapture(TrtCudaStream& stream) {
cudaCheck(
cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
}
bool launch(TrtCudaStream& stream) {
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
}
void endCapture(TrtCudaStream& stream) {
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
cudaCheck(cudaGraphDestroy(mGraph));
}
void endCaptureOnError(TrtCudaStream& stream) {
// There are two possibilities why stream capture would fail:
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
// (2) TRT reports a failure.
// In case (1), the returning mGraph should be nullptr.
// In case (2), the returning mGraph is not nullptr, but it should not be
// used.
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
if (ret == cudaErrorStreamCaptureInvalidated) {
assert(mGraph == nullptr);
} else {
assert(ret == cudaSuccess);
assert(mGraph != nullptr);
cudaCheck(cudaGraphDestroy(mGraph));
mGraph = nullptr;
}
// Clean up any CUDA error.
cudaGetLastError();
sample::gLogWarning << "The CUDA graph capture on the stream has failed."
<< std::endl;
}
private:
cudaGraph_t mGraph{};
cudaGraphExec_t mGraphExec{};
};
//!
//! \class TrtCudaBuffer
//! \brief Managed buffer for host and device
//!
template <typename A, typename D> class TrtCudaBuffer {
public:
TrtCudaBuffer() = default;
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
TrtCudaBuffer(TrtCudaBuffer&& rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
if (this != &rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
return *this;
}
~TrtCudaBuffer() { reset(); }
TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
void allocate(size_t size) {
reset();
A()(&mPtr, size);
}
void reset(void* ptr = nullptr) {
if (mPtr) {
D()(mPtr);
}
mPtr = ptr;
}
void* get() const { return mPtr; }
private:
void* mPtr{nullptr};
};
struct DeviceAllocator {
void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
};
struct DeviceDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
};
struct ManagedAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocManaged(ptr, size));
}
};
struct HostAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocHost(ptr, size));
}
};
struct HostDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
};
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
//!
//! \class MirroredBuffer
//! \brief Coupled host and device buffers
//!
class IMirroredBuffer {
public:
//!
//! Allocate memory for the mirrored buffer give the size
//! of the allocation.
//!
virtual void allocate(size_t size) = 0;
//!
//! Get the pointer to the device side buffer.
//!
//! \return pointer to device memory or nullptr if uninitialized.
//!
virtual void* getDeviceBuffer() const = 0;
//!
//! Get the pointer to the host side buffer.
//!
//! \return pointer to host memory or nullptr if uninitialized.
//!
virtual void* getHostBuffer() const = 0;
//!
//! Copy the memory from host to device.
//!
virtual void hostToDevice(TrtCudaStream& stream) = 0;
//!
//! Copy the memory from device to host.
//!
virtual void deviceToHost(TrtCudaStream& stream) = 0;
//!
//! Interface to get the size of the memory
//!
//! \return the size of memory allocated.
//!
virtual size_t getSize() const = 0;
//!
//! Virtual destructor declaraion
//!
virtual ~IMirroredBuffer() = default;
}; // class IMirroredBuffer
//!
//! Class to have a seperate memory buffer for discrete device and host
//! allocations.
//!
class DiscreteMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mHostBuffer.allocate(size);
mDeviceBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
void* getHostBuffer() const { return mHostBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
cudaMemcpyHostToDevice, stream.get()));
}
void deviceToHost(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
cudaMemcpyDeviceToHost, stream.get()));
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtHostBuffer mHostBuffer;
TrtDeviceBuffer mDeviceBuffer;
}; // class DiscreteMirroredBuffer
//!
//! Class to have a unified memory buffer for embedded devices.
//!
class UnifiedMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mBuffer.get(); }
void* getHostBuffer() const { return mBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
void deviceToHost(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtManagedBuffer mBuffer;
}; // class UnifiedMirroredBuffer
inline void setCudaDevice(int device, std::ostream& os) {
cudaCheck(cudaSetDevice(device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// clang-format off
os << "=== Device Information ===" << std::endl;
os << "Selected Device: " << properties.name << std::endl;
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
os << "SMs: " << properties.multiProcessorCount << std::endl;
os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
// clang-format on
}
} // namespace sample
#endif // TRT_SAMPLE_DEVICE_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,195 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_ENGINES_H
#define TRT_SAMPLE_ENGINES_H
#include <iostream>
#include <vector>
//#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvInferConsistency.h"
#include "NvInferSafeRuntime.h"
#include "NvOnnxParser.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
struct Parser {
// TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
TrtUniquePtr<nvonnxparser::IParser> onnxParser;
operator bool() const { return false || onnxParser; }
};
struct BuildEnvironment {
TrtUniquePtr<INetworkDefinition> network;
//! Parser that creates the network. Must be declared *after* network, so that
//! when
//! ~BuildEnvironment() executes, the parser is destroyed before the network
//! is destroyed.
Parser parser;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<uint8_t> engineBlob;
};
//!
//! \brief Generate a network definition for a given model
//!
//! \return Parser The parser used to initialize the network and that holds the
//! weights for the network, or an invalid
//! parser (the returned parser converts to false if tested)
//!
//! Constant input dimensions in the model must not be changed in the
//! corresponding
//! network definition, because its correctness may rely on the constants.
//!
//! \see Parser::operator bool()
//!
Parser modelToNetwork(const ModelOptions& model,
nvinfer1::INetworkDefinition& network, std::ostream& err);
//!
//! \brief Set up network and config
//!
//! \return boolean Return true if network and config were successfully set
//!
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
IBuilder& builder, INetworkDefinition& network,
IBuilderConfig& config, std::ostream& err,
std::vector<std::vector<char>>& sparseWeights);
//!
//! \brief Log refittable layers and weights of a refittable engine
//!
void dumpRefittable(nvinfer1::ICudaEngine& engine);
//!
//! \brief Load a serialized engine
//!
//! \return Pointer to the engine loaded or nullptr if the operation failed
//!
nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
std::ostream& err);
//!
//! \brief Save an engine into a file
//!
//! \return boolean Return true if the engine was successfully saved
//!
bool saveEngine(const nvinfer1::ICudaEngine& engine,
const std::string& fileName, std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, BuildEnvironment& env,
std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys,
std::ostream& err) {
BuildEnvironment env;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
if (getEngineBuildEnv(model, build, sys, env, err)) {
engine.swap(env.engine);
}
return engine;
}
//!
//! \brief Create a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* networkToSerialized(const BuildOptions& build,
const SystemOptions& sys, IBuilder& builder,
INetworkDefinition& network,
std::ostream& err);
//!
//! \brief Tranfer model to a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* modelToSerialized(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
//!
//! \brief Serialize network and save it into a file
//!
//! \return boolean Return true if the network was successfully serialized and
//! saved
//!
bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
bool multiThreading);
//!
//! \brief Set tensor scales from a calibration table
//!
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
const std::vector<IOFormat>& inputFormats,
const std::vector<IOFormat>& outputFormats,
const std::string& calibrationFile);
//!
//! \brief Check if safe runtime is loaded.
//!
bool hasSafeRuntime();
//!
//! \brief Create a safe runtime object if the dynamic library is loaded.
//!
nvinfer1::safe::IRuntime*
createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
//!
//! \brief Check if consistency checker is loaded.
//!
bool hasConsistencyChecker();
//!
//! \brief Create a consistency checker object if the dynamic library is loaded.
//!
nvinfer1::consistency::IConsistencyChecker*
createConsistencyChecker(nvinfer1::ILogger& logger,
IHostMemory const* engine) noexcept;
//!
//! \brief Run consistency check on serialized engine.
//!
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
} // namespace sample
#endif // TRT_SAMPLE_ENGINES_H

View File

@@ -1,943 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <array>
#include <chrono>
#include <cuda_profiler_api.h>
#include <functional>
#include <limits>
#include <memory>
#include <mutex>
#include <numeric>
#include <thread>
#include <utility>
#include <vector>
#if defined(__QNX__)
#include <sys/neutrino.h>
#include <sys/syspage.h>
#endif
#include "NvInfer.h"
#include "ErrorRecorder.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleEngines.h"
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
#include "sampleUtils.h"
namespace sample {
template <class MapType, class EngineType>
bool validateTensorNames(const MapType& map, const EngineType* engine,
const int32_t endBindingIndex) {
// Check if the provided input tensor names match the input tensors of the
// engine.
// Throw an error if the provided input tensor names cannot be found because
// it implies a potential typo.
for (const auto& item : map) {
bool tensorNameFound{false};
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (engine->bindingIsInput(b) &&
engine->getBindingName(b) == item.first) {
tensorNameFound = true;
break;
}
}
if (!tensorNameFound) {
sample::gLogError
<< "Cannot find input tensor with name \"" << item.first
<< "\" in the engine bindings! "
<< "Please make sure the input tensor names are correct."
<< std::endl;
return false;
}
}
return true;
}
template <class EngineType, class ContextType> class FillBindingClosure {
private:
using InputsMap = std::unordered_map<std::string, std::string>;
using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
EngineType const* engine;
ContextType const* context;
InputsMap const& inputs;
BindingsVector& bindings;
int32_t batch;
int32_t endBindingIndex;
void fillOneBinding(int32_t bindingIndex, int64_t vol) {
auto const dims = getDims(bindingIndex);
auto const name = engine->getBindingName(bindingIndex);
auto const isInput = engine->bindingIsInput(bindingIndex);
auto const dataType = engine->getBindingDataType(bindingIndex);
auto const* bindingInOutStr = isInput ? "input" : "output";
for (auto& binding : bindings) {
const auto input = inputs.find(name);
if (isInput && input != inputs.end()) {
sample::gLogInfo << "Using values loaded from " << input->second
<< " for input " << name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType,
input->second);
} else {
sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
<< name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType);
}
sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
<< name << " with dimensions " << dims << std::endl;
}
}
bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
if (!validateTensorNames(inputs, engine, endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --loadInputs flag."
<< std::endl;
return false;
}
for (int32_t b = 0; b < endBindingIndex; b++) {
auto const dims = getDims(b);
auto const comps = engine->getBindingComponentsPerElement(b);
auto const strides = context->getStrides(b);
int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
fillOneBinding(b, vol);
}
return true;
}
Dims getDims(int32_t bindingIndex);
public:
FillBindingClosure(EngineType const* _engine, ContextType const* _context,
InputsMap const& _inputs, BindingsVector& _bindings,
int32_t _batch, int32_t _endBindingIndex)
: engine(_engine), context(_context), inputs(_inputs),
bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
bool operator()() { return fillAllBindings(batch, endBindingIndex); }
};
template <>
Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
getDims(int32_t bindingIndex) {
return context->getBindingDimensions(bindingIndex);
}
template <>
Dims FillBindingClosure<
nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
return engine->getBindingDimensions(bindingIndex);
}
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference) {
int32_t device{};
cudaCheck(cudaGetDevice(&device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// Use managed memory on integrated devices when transfers are skipped
// and when it is explicitly requested on the commandline.
bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
inference.useManaged};
using FillSafeBindings =
FillBindingClosure<nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
auto* safeEngine = iEnv.safeEngine.get();
for (int32_t s = 0; s < inference.streams; ++s) {
iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
const int32_t nBindings = safeEngine->getNbBindings();
auto const* safeContext = iEnv.safeContext.front().get();
// batch is set to 1 because safety only support explicit batch.
return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
inference.inputs, iEnv.bindings, 1, nBindings)();
}
using FillStdBindings =
FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
for (int32_t s = 0; s < inference.streams; ++s) {
auto ec = iEnv.engine->createExecutionContext();
if (ec == nullptr) {
sample::gLogError << "Unable to create execution context for stream " << s
<< "." << std::endl;
return false;
}
iEnv.context.emplace_back(ec);
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
if (iEnv.profiler) {
iEnv.context.front()->setProfiler(iEnv.profiler.get());
// Always run reportToProfiler() after enqueue launch
iEnv.context.front()->setEnqueueEmitsProfile(false);
}
const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
const int32_t nBindings = iEnv.engine->getNbBindings();
const int32_t bindingsInProfile =
nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
const int32_t endBindingIndex =
bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
if (nOptProfiles > 1) {
sample::gLogWarning << "Multiple profiles are currently not supported. "
"Running with one profile."
<< std::endl;
}
// Make sure that the tensor names provided in command-line args actually
// exist in any of the engine bindings
// to avoid silent typos.
if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --shapes flag."
<< std::endl;
return false;
}
// Set all input dimensions before all bindings can be allocated
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (iEnv.engine->bindingIsInput(b)) {
auto dims = iEnv.context.front()->getBindingDimensions(b);
const bool isScalar = dims.nbDims == 0;
const bool isDynamicInput =
std::any_of(dims.d, dims.d + dims.nbDims,
[](int32_t dim) { return dim == -1; }) ||
iEnv.engine->isShapeBinding(b);
if (isDynamicInput) {
auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
std::vector<int32_t> staticDims;
if (shape == inference.shapes.end()) {
// If no shape is provided, set dynamic dimensions to 1.
constexpr int32_t DEFAULT_DIMENSION = 1;
if (iEnv.engine->isShapeBinding(b)) {
if (isScalar) {
staticDims.push_back(1);
} else {
staticDims.resize(dims.d[0]);
std::fill(staticDims.begin(), staticDims.end(),
DEFAULT_DIMENSION);
}
} else {
staticDims.resize(dims.nbDims);
std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
[&](int32_t dimension) {
return dimension >= 0 ? dimension
: DEFAULT_DIMENSION;
});
}
sample::gLogWarning << "Dynamic dimensions required for input: "
<< iEnv.engine->getBindingName(b)
<< ", but no shapes were provided. Automatically "
"overriding shape to: "
<< staticDims << std::endl;
} else if (inference.inputs.count(shape->first) &&
iEnv.engine->isShapeBinding(b)) {
if (isScalar || dims.nbDims == 1) {
// Load shape tensor from file.
size_t const size = isScalar ? 1 : dims.d[0];
staticDims.resize(size);
auto const& filename = inference.inputs.at(shape->first);
auto dst = reinterpret_cast<char*>(staticDims.data());
loadFromFile(filename, dst,
size * sizeof(decltype(staticDims)::value_type));
} else {
sample::gLogWarning << "Cannot load shape tensor " << shape->first
<< " from file, "
<< "ND-Shape isn't supported yet" << std::endl;
// Fallback
staticDims = shape->second;
}
} else {
staticDims = shape->second;
}
for (auto& c : iEnv.context) {
if (iEnv.engine->isShapeBinding(b)) {
if (!c->setInputShapeBinding(b, staticDims.data())) {
return false;
}
} else {
if (!c->setBindingDimensions(b, toDims(staticDims))) {
return false;
}
}
}
}
}
}
auto* engine = iEnv.engine.get();
auto const* context = iEnv.context.front().get();
int32_t const batch =
engine->hasImplicitBatchDimension() ? inference.batch : 1;
return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
batch, endBindingIndex)();
}
namespace {
#if defined(__QNX__)
using TimePoint = double;
#else
using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
#endif
TimePoint getCurrentTime() {
#if defined(__QNX__)
uint64_t const currentCycles = ClockCycles();
uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
// Return current timestamp in ms.
return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
#else
return std::chrono::high_resolution_clock::now();
#endif
}
//!
//! \struct SyncStruct
//! \brief Threads synchronization structure
//!
struct SyncStruct {
std::mutex mutex;
TrtCudaStream mainStream;
TrtCudaEvent gpuStart{cudaEventBlockingSync};
TimePoint cpuStart{};
float sleep{};
};
struct Enqueue {
explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
: mContext(context), mBuffers(buffers) {}
nvinfer1::IExecutionContext& mContext;
void** mBuffers{};
};
//!
//! \class EnqueueImplicit
//! \brief Functor to enqueue inference with implict batch
//!
class EnqueueImplicit : private Enqueue {
public:
explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
int32_t batch)
: Enqueue(context, buffers), mBatch(batch) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueue()"
<< std::endl;
}
return true;
}
return false;
}
private:
int32_t mBatch;
};
//!
//! \class EnqueueExplicit
//! \brief Functor to enqueue inference with explict batch
//!
class EnqueueExplicit : private Enqueue {
public:
explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
: Enqueue(context, buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueueV2()"
<< std::endl;
}
return true;
}
return false;
}
};
//!
//! \class EnqueueGraph
//! \brief Functor to enqueue inference from CUDA Graph
//!
class EnqueueGraph {
public:
explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
TrtCudaGraph& graph)
: mGraph(graph), mContext(context) {}
bool operator()(TrtCudaStream& stream) const {
if (mGraph.launch(stream)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.reportToProfiler()) {
gLogWarning << "Failed to collect layer timing info from previous CUDA "
"graph launch"
<< std::endl;
}
return true;
}
return false;
}
TrtCudaGraph& mGraph;
nvinfer1::IExecutionContext& mContext;
};
//!
//! \class EnqueueSafe
//! \brief Functor to enqueue safe execution context
//!
class EnqueueSafe {
public:
explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
void** buffers)
: mContext(context), mBuffers(buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
return true;
}
return false;
}
nvinfer1::safe::IExecutionContext& mContext;
void** mBuffers{};
};
using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
enum class StreamType : int32_t {
kINPUT = 0,
kCOMPUTE = 1,
kOUTPUT = 2,
kNUM = 3
};
enum class EventType : int32_t {
kINPUT_S = 0,
kINPUT_E = 1,
kCOMPUTE_S = 2,
kCOMPUTE_E = 3,
kOUTPUT_S = 4,
kOUTPUT_E = 5,
kNUM = 6
};
using MultiStream =
std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
static_cast<int32_t>(EventType::kNUM)>;
using EnqueueTimes = std::array<TimePoint, 2>;
//!
//! \class Iteration
//! \brief Inference iteration and streams management
//!
template <class ContextType> class Iteration {
public:
Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
Bindings& bindings)
: mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
mContext(&context) {
for (int32_t d = 0; d < mDepth; ++d) {
for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
}
}
createEnqueueFunction(inference, context, bindings);
}
bool query(bool skipTransfers) {
if (mActive[mNext]) {
return true;
}
if (!skipTransfers) {
record(EventType::kINPUT_S, StreamType::kINPUT);
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
record(EventType::kINPUT_E, StreamType::kINPUT);
wait(EventType::kINPUT_E,
StreamType::kCOMPUTE); // Wait for input DMA before compute
}
record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
recordEnqueueTime();
if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
return false;
}
recordEnqueueTime();
record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
if (!skipTransfers) {
wait(EventType::kCOMPUTE_E,
StreamType::kOUTPUT); // Wait for compute before output DMA
record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
}
mActive[mNext] = true;
moveNext();
return true;
}
float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
if (mActive[mNext]) {
if (skipTransfers) {
getEvent(EventType::kCOMPUTE_E).synchronize();
} else {
getEvent(EventType::kOUTPUT_E).synchronize();
}
trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
mActive[mNext] = false;
return getEvent(EventType::kCOMPUTE_S) - gpuStart;
}
return 0;
}
void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
for (int32_t d = 0; d < mDepth; ++d) {
sync(cpuStart, gpuStart, trace, skipTransfers);
moveNext();
}
}
void wait(TrtCudaEvent& gpuStart) {
getStream(StreamType::kINPUT).wait(gpuStart);
}
void setInputData() {
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
}
void fetchOutputData() {
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
}
private:
void moveNext() { mNext = mDepth - 1 - mNext; }
TrtCudaStream& getStream(StreamType t) {
return mStream[static_cast<int32_t>(t)];
}
TrtCudaEvent& getEvent(EventType t) {
return *mEvents[mNext][static_cast<int32_t>(t)];
}
void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
void recordEnqueueTime() {
mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
enqueueStart = 1 - enqueueStart;
}
TimePoint getEnqueueTime(bool start) {
return mEnqueueTimes[mNext][start ? 0 : 1];
}
void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
InferenceTrace getTrace(const TimePoint& cpuStart,
const TrtCudaEvent& gpuStart, bool skipTransfers) {
float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_S) - gpuStart;
float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_E) - gpuStart;
float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_S) - gpuStart;
float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_E) - gpuStart;
return InferenceTrace(mStreamId,
std::chrono::duration<float, std::milli>(
getEnqueueTime(true) - cpuStart)
.count(),
std::chrono::duration<float, std::milli>(
getEnqueueTime(false) - cpuStart)
.count(),
is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
}
void createEnqueueFunction(const InferenceOptions& inference,
nvinfer1::IExecutionContext& context,
Bindings& bindings) {
if (inference.batch) {
mEnqueue = EnqueueFunction(EnqueueImplicit(
context, mBindings.getDeviceBuffers(), inference.batch));
} else {
mEnqueue = EnqueueFunction(
EnqueueExplicit(context, mBindings.getDeviceBuffers()));
}
if (inference.graph) {
TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
// Avoid capturing initialization calls by executing the enqueue function
// at least
// once before starting CUDA graph capture.
const auto ret = mEnqueue(stream);
assert(ret);
stream.synchronize();
mGraph.beginCapture(stream);
// The built TRT engine may contain operations that are not permitted
// under CUDA graph capture mode.
// When the stream is capturing, the enqueue call may return false if the
// current CUDA graph capture fails.
if (mEnqueue(stream)) {
mGraph.endCapture(stream);
mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
} else {
mGraph.endCaptureOnError(stream);
// Ensure any CUDA error has been cleaned up.
cudaCheck(cudaGetLastError());
sample::gLogWarning << "The built TensorRT engine contains operations "
"that are not permitted under "
"CUDA graph capture mode."
<< std::endl;
sample::gLogWarning << "The specified --useCudaGraph flag has been "
"ignored. The inference will be "
"launched without using CUDA graph launch."
<< std::endl;
}
}
}
void createEnqueueFunction(const InferenceOptions&,
nvinfer1::safe::IExecutionContext& context,
Bindings&) {
mEnqueue =
EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
}
Bindings& mBindings;
TrtCudaGraph mGraph;
EnqueueFunction mEnqueue;
int32_t mStreamId{0};
int32_t mNext{0};
int32_t mDepth{2}; // default to double buffer to hide DMA transfers
std::vector<bool> mActive;
MultiStream mStream;
std::vector<MultiEvent> mEvents;
int32_t enqueueStart{0};
std::vector<EnqueueTimes> mEnqueueTimes;
ContextType* mContext{nullptr};
};
template <class ContextType>
bool inferenceLoop(
std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
bool skipTransfers, float idleMs) {
float durationMs = 0;
int32_t skip = 0;
for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
++i) {
for (auto& s : iStreams) {
if (!s->query(skipTransfers)) {
return false;
}
}
for (auto& s : iStreams) {
durationMs = std::max(durationMs,
s->sync(cpuStart, gpuStart, trace, skipTransfers));
}
if (durationMs < warmupMs) // Warming up
{
if (durationMs) // Skip complete iterations
{
++skip;
}
continue;
}
if (idleMs != 0.F) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(idleMs));
}
}
for (auto& s : iStreams) {
s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
}
return true;
}
template <class ContextType>
void inferenceExecution(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
const int32_t threadIdx, const int32_t streamsPerThread,
int32_t device, std::vector<InferenceTrace>& trace) {
float warmupMs = inference.warmup;
float durationMs = inference.duration * 1000.F + warmupMs;
cudaCheck(cudaSetDevice(device));
std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
for (int32_t s = 0; s < streamsPerThread; ++s) {
const int32_t streamId{threadIdx * streamsPerThread + s};
auto* iteration = new Iteration<ContextType>(
streamId, inference, *iEnv.template getContext<ContextType>(streamId),
*iEnv.bindings[streamId]);
if (inference.skipTransfers) {
iteration->setInputData();
}
iStreams.emplace_back(iteration);
}
for (auto& s : iStreams) {
s->wait(sync.gpuStart);
}
std::vector<InferenceTrace> localTrace;
if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
inference.iterations, durationMs, warmupMs, localTrace,
inference.skipTransfers, inference.idle)) {
iEnv.error = true;
}
if (inference.skipTransfers) {
for (auto& s : iStreams) {
s->fetchOutputData();
}
}
sync.mutex.lock();
trace.insert(trace.end(), localTrace.begin(), localTrace.end());
sync.mutex.unlock();
}
inline std::thread makeThread(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
int32_t threadIdx, int32_t streamsPerThread,
int32_t device,
std::vector<InferenceTrace>& trace) {
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
} // namespace
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace) {
cudaCheck(cudaProfilerStart());
trace.resize(0);
SyncStruct sync;
sync.sleep = inference.sleep;
sync.mainStream.sleep(&sync.sleep);
sync.cpuStart = getCurrentTime();
sync.gpuStart.record(sync.mainStream);
// When multiple streams are used, trtexec can run inference in two modes:
// (1) if inference.threads is true, then run each stream on each thread.
// (2) if inference.threads is false, then run all streams on the same thread.
const int32_t numThreads = inference.threads ? inference.streams : 1;
const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
std::vector<std::thread> threads;
for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
streamsPerThread, device, trace));
}
for (auto& th : threads) {
th.join();
}
cudaCheck(cudaProfilerStop());
auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
return a.h2dStart < b.h2dStart;
};
std::sort(trace.begin(), trace.end(), cmpTrace);
return !iEnv.error;
}
namespace {
size_t reportGpuMemory() {
static size_t prevFree{0};
size_t free{0};
size_t total{0};
size_t newlyAllocated{0};
cudaCheck(cudaMemGetInfo(&free, &total));
sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
if (prevFree != 0) {
newlyAllocated = (prevFree - free);
sample::gLogInfo << ", newly allocated GPU memory = "
<< newlyAllocated / 1024.0_MiB << " GiB";
}
sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
<< std::endl;
prevFree = free;
return newlyAllocated;
}
} // namespace
//! Returns true if deserialization is slower than expected or fails.
bool timeDeserialize(InferenceEnvironment& iEnv) {
constexpr int32_t kNB_ITERS{20};
std::unique_ptr<IRuntime> rt{
createInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<ICudaEngine> engine;
std::unique_ptr<safe::IRuntime> safeRT{
sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<safe::ICudaEngine> safeEngine;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
safeRT->setErrorRecorder(&gRecorder);
}
auto timeDeserializeFn = [&]() -> float {
bool deserializeOK{false};
engine.reset(nullptr);
safeEngine.reset(nullptr);
auto startClock = std::chrono::high_resolution_clock::now();
if (iEnv.safe) {
safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size()));
deserializeOK = (safeEngine != nullptr);
} else {
engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size(), nullptr));
deserializeOK = (engine != nullptr);
}
auto endClock = std::chrono::high_resolution_clock::now();
// return NAN if deserialization failed.
return deserializeOK
? std::chrono::duration<float, std::milli>(endClock - startClock)
.count()
: NAN;
};
// Warmup the caches to make sure that cache thrashing isn't throwing off the
// results
{
sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
for (int32_t i = 0, e = 2; i < e; ++i) {
timeDeserializeFn();
}
}
sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
float const first = timeDeserializeFn();
// Check if first deserialization suceeded.
if (std::isnan(first)) {
sample::gLogError << "Engine deserialization failed." << std::endl;
return true;
}
sample::gLogInfo << "First deserialization time = " << first
<< " milliseconds" << std::endl;
// Record initial gpu memory state.
reportGpuMemory();
float totalTime{0.F};
for (int32_t i = 0; i < kNB_ITERS; ++i) {
totalTime += timeDeserializeFn();
}
const auto averageTime = totalTime / kNB_ITERS;
// reportGpuMemory sometimes reports zero after a single deserialization of a
// small engine,
// so use the size of memory for all the iterations.
const auto totalEngineSizeGpu = reportGpuMemory();
sample::gLogInfo << "Total deserialization time = " << totalTime
<< " milliseconds in " << kNB_ITERS
<< " iterations, average time = " << averageTime
<< " milliseconds, first time = " << first
<< " milliseconds." << std::endl;
sample::gLogInfo << "Deserialization Bandwidth = "
<< 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
<< std::endl;
// If the first deserialization is more than tolerance slower than
// the average deserialization, return true, which means an error occurred.
// The tolerance is set to 2x since the deserialization time is quick and
// susceptible
// to caching issues causing problems in the first timing.
const auto tolerance = 2.0F;
const bool isSlowerThanExpected = first > averageTime * tolerance;
if (isSlowerThanExpected) {
sample::gLogInfo << "First deserialization time divided by average time is "
<< (first / averageTime) << ". Exceeds tolerance of "
<< tolerance << "x." << std::endl;
}
return isSlowerThanExpected;
}
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format) {
auto runtime = std::unique_ptr<IRuntime>(
createInferRuntime(sample::gLogger.getTRTLogger()));
auto inspector =
std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
if (!iEnv.context.empty()) {
inspector->setExecutionContext(iEnv.context.front().get());
}
std::string result = inspector->getEngineInformation(format);
return result;
}
} // namespace sample

View File

@@ -1,88 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_INFERENCE_H
#define TRT_SAMPLE_INFERENCE_H
#include "sampleReporting.h"
#include "sampleUtils.h"
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "NvInferSafeRuntime.h"
namespace sample {
struct InferenceEnvironment {
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<Profiler> profiler;
std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
std::vector<std::unique_ptr<Bindings>> bindings;
bool error{false};
std::vector<uint8_t> engineBlob;
bool safe{false};
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
template <class ContextType>
inline ContextType* getContext(int32_t streamIdx);
};
template <>
inline nvinfer1::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return context[streamIdx].get();
}
template <>
inline nvinfer1::safe::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return safeContext[streamIdx].get();
}
//!
//! \brief Set up contexts and bindings for inference
//!
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference);
//!
//! \brief Deserialize the engine and time how long it takes.
//!
bool timeDeserialize(InferenceEnvironment& iEnv);
//!
//! \brief Run inference and collect timing, return false if any error hit
//! during inference
//!
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace);
//!
//! \brief Get layer information of the engine.
//!
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format);
} // namespace sample
#endif // TRT_SAMPLE_INFERENCE_H

File diff suppressed because it is too large Load Diff

View File

@@ -1,311 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_OPTIONS_H
#define TRT_SAMPLE_OPTIONS_H
#include <algorithm>
#include <array>
#include <iostream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "NvInfer.h"
namespace sample {
// Build default params
constexpr int32_t maxBatchNotProvided{0};
constexpr int32_t defaultMinTiming{1};
constexpr int32_t defaultAvgTiming{8};
// System default params
constexpr int32_t defaultDevice{0};
// Inference default params
constexpr int32_t defaultBatch{1};
constexpr int32_t batchNotProvided{0};
constexpr int32_t defaultStreams{1};
constexpr int32_t defaultIterations{10};
constexpr float defaultWarmUp{200.F};
constexpr float defaultDuration{3.F};
constexpr float defaultSleep{};
constexpr float defaultIdle{};
// Reporting default params
constexpr int32_t defaultAvgRuns{10};
constexpr float defaultPercentile{99};
enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
using Arguments = std::unordered_multimap<std::string, std::string>;
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
using ShapeRange =
std::array<std::vector<int32_t>,
nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
using LayerOutputTypes =
std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
struct Options {
virtual void parse(Arguments& arguments) = 0;
};
struct BaseModelOptions : public Options {
ModelFormat format{ModelFormat::kANY};
std::string model;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct UffInput : public Options {
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
bool NHWC{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ModelOptions : public Options {
BaseModelOptions baseModel;
std::string prototxt;
std::vector<std::string> outputs;
UffInput uffInputs;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct BuildOptions : public Options {
int32_t maxBatch{maxBatchNotProvided};
double workspace{-1.0};
double dlaSRAM{-1.0};
double dlaLocalDRAM{-1.0};
double dlaGlobalDRAM{-1.0};
int32_t minTiming{defaultMinTiming};
int32_t avgTiming{defaultAvgTiming};
bool tf32{true};
bool fp16{false};
bool int8{false};
bool directIO{false};
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
LayerPrecisions layerPrecisions;
LayerOutputTypes layerOutputTypes;
bool safe{false};
bool consistency{false};
bool restricted{false};
bool save{false};
bool load{false};
bool refittable{false};
SparsityFlag sparsity{SparsityFlag::kDISABLE};
nvinfer1::ProfilingVerbosity profilingVerbosity{
nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
std::string engine;
std::string calibration;
std::unordered_map<std::string, ShapeRange> shapes;
std::unordered_map<std::string, ShapeRange> shapesCalib;
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
nvinfer1::TacticSources enabledTactics{0};
nvinfer1::TacticSources disabledTactics{0};
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
std::string timingCacheFile{};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SystemOptions : public Options {
int32_t device{defaultDevice};
int32_t DLACore{-1};
bool fallback{false};
std::vector<std::string> plugins;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct InferenceOptions : public Options {
int32_t batch{batchNotProvided};
int32_t iterations{defaultIterations};
int32_t streams{defaultStreams};
float warmup{defaultWarmUp};
float duration{defaultDuration};
float sleep{defaultSleep};
float idle{defaultIdle};
bool overlap{true};
bool skipTransfers{false};
bool useManaged{false};
bool spin{false};
bool threads{false};
bool graph{false};
bool skip{false};
bool rerun{false};
bool timeDeserialize{false};
bool timeRefit{false};
std::unordered_map<std::string, std::string> inputs;
std::unordered_map<std::string, std::vector<int32_t>> shapes;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ReportingOptions : public Options {
bool verbose{false};
int32_t avgs{defaultAvgRuns};
float percentile{defaultPercentile};
bool refit{false};
bool output{false};
bool profile{false};
bool layerInfo{false};
std::string exportTimes;
std::string exportOutput;
std::string exportProfile;
std::string exportLayerInfo;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SafeBuilderOptions : public Options {
std::string serialized{};
std::string onnxModelFile{};
bool help{false};
bool verbose{false};
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
bool int8{false};
std::string calibFile{};
std::vector<std::string> plugins;
bool consistency{false};
bool standard{false};
void parse(Arguments& arguments) override;
static void printHelp(std::ostream& out);
};
struct AllOptions : public Options {
ModelOptions model;
BuildOptions build;
SystemOptions system;
InferenceOptions inference;
ReportingOptions reporting;
bool helps{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
bool parseHelp(Arguments& arguments);
void helpHelp(std::ostream& out);
// Functions to print options
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
std::ostream& operator<<(std::ostream& os, const UffInput& input);
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const nvinfer1::WeightsRole role) {
switch (role) {
case nvinfer1::WeightsRole::kKERNEL: {
os << "Kernel";
break;
}
case nvinfer1::WeightsRole::kBIAS: {
os << "Bias";
break;
}
case nvinfer1::WeightsRole::kSHIFT: {
os << "Shift";
break;
}
case nvinfer1::WeightsRole::kSCALE: {
os << "Scale";
break;
}
case nvinfer1::WeightsRole::kCONSTANT: {
os << "Constant";
break;
}
case nvinfer1::WeightsRole::kANY: {
os << "Any";
break;
}
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const std::vector<int32_t>& vec) {
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
os << (i ? "x" : "") << vec[i];
}
return os;
}
} // namespace sample
#endif // TRT_SAMPLES_OPTIONS_H

View File

@@ -1,480 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <exception>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <utility>
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
using namespace nvinfer1;
namespace sample {
namespace {
//!
//! \brief Find percentile in an ascending sequence of timings
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
//!
template <typename T>
float findPercentile(float percentile,
std::vector<InferenceTime> const& timings,
T const& toFloat) {
int32_t const all = static_cast<int32_t>(timings.size());
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
if (percentile < 0.0f || percentile > 100.0f) {
throw std::runtime_error("percentile is not in [0, 100]!");
}
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
}
//!
//! \brief Find median in a sorted sequence of timings
//!
template <typename T>
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
int32_t const m = timings.size() / 2;
if (timings.size() % 2) {
return toFloat(timings[m]);
}
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
}
//!
//! \brief Find coefficient of variance (which is std / mean) in a sorted
//! sequence of timings given the mean
//!
template <typename T>
float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
T const& toFloat, float mean) {
if (timings.empty()) {
return 0;
}
if (mean == 0.F) {
return std::numeric_limits<float>::infinity();
}
auto const metricAccumulator = [toFloat, mean](float acc,
InferenceTime const& a) {
float const diff = toFloat(a) - mean;
return acc + diff * diff;
};
float const variance =
std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
timings.size();
return std::sqrt(variance) / mean * 100.F;
}
inline InferenceTime traceToTiming(const InferenceTrace& a) {
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
(a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
(a.d2hEnd - a.h2dStart));
}
} // namespace
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float benchTimeMs, std::ostream& os) {
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
<< std::endl;
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
<< " s" << std::endl;
}
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os) {
int32_t count = 0;
InferenceTime sum;
os << std::endl;
os << "=== Trace details ===" << std::endl;
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
for (auto const& t : timings) {
sum += t;
if (++count == runsPerAvg) {
// clang-format off
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
<< " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
// clang-format on
count = 0;
sum.enq = 0;
sum.h2d = 0;
sum.compute = 0;
sum.d2h = 0;
sum.e2e = 0;
}
}
}
void printMetricExplanations(std::ostream& os) {
os << std::endl;
os << "=== Explanations of the performance metrics ===" << std::endl;
os << "Total Host Walltime: the host walltime from when the first query "
"(after warmups) is enqueued to when the "
"last query is completed."
<< std::endl;
os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
<< std::endl;
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
"the queries. If this is significantly "
"shorter than Total Host Walltime, the GPU may be under-utilized "
"because of host-side overheads or data "
"transfers."
<< std::endl;
os << "Throughput: the observed throughput computed by dividing the number "
"of queries by the Total Host Walltime. "
"If this is significantly lower than the reciprocal of GPU Compute "
"Time, the GPU may be under-utilized "
"because of host-side overheads or data transfers."
<< std::endl;
os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
"than GPU Compute Time, the GPU may be "
"under-utilized."
<< std::endl;
os << "H2D Latency: the latency for host-to-device data transfers for input "
"tensors of a single query."
<< std::endl;
os << "D2H Latency: the latency for device-to-host data transfers for output "
"tensors of a single query."
<< std::endl;
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
"Latency. This is the latency to infer a "
"single query."
<< std::endl;
os << "End-to-End Host Latency: the duration from when the H2D of a query is "
"called to when the D2H of the same "
"query is completed, which includes the latency to wait for the "
"completion of the previous query. This is "
"the latency of a query if multiple queries are enqueued consecutively."
<< std::endl;
}
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile) {
auto const metricComparator = [metricGetter](InferenceTime const& a,
InferenceTime const& b) {
return metricGetter(a) < metricGetter(b);
};
auto const metricAccumulator = [metricGetter](float acc,
InferenceTime const& a) {
return acc + metricGetter(a);
};
std::vector<InferenceTime> newTimings = timings;
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
PerformanceResult result;
result.min = metricGetter(newTimings.front());
result.max = metricGetter(newTimings.back());
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
metricAccumulator) /
newTimings.size();
result.median = findMedian(newTimings, metricGetter);
result.percentile = findPercentile(percentile, newTimings, metricGetter);
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
return result;
}
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
float percentile, int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
auto const latencyResult =
getPerformanceResult(timings, getLatency, percentile);
auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
auto const e2eLatencyResult =
getPerformanceResult(timings, getEndToEnd, percentile);
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
auto const enqueueResult =
getPerformanceResult(timings, getEnqueue, percentile);
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
auto const gpuComputeResult =
getPerformanceResult(timings, getCompute, percentile);
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
auto const toPerfString = [percentile](const PerformanceResult& r) {
std::stringstream s;
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
<< " ms, "
<< "median = " << r.median << " ms, percentile(" << percentile
<< "%) = " << r.percentile << " ms";
return s.str();
};
osInfo << std::endl;
osInfo << "=== Performance summary ===" << std::endl;
osInfo << "Throughput: " << throughput << " qps" << std::endl;
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
<< std::endl;
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
osInfo << "Total GPU Compute Time: "
<< gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
// Report warnings if the throughput is bound by other factors than GPU
// Compute Time.
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
if (enqueueResult.median >
kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
"Compute and the GPU may be under-utilized."
<< std::endl;
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs "
"where possible) may increase the "
"throughput."
<< std::endl;
}
if (h2dResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by host-to-device transfers for "
"the inputs rather than GPU Compute and "
"the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
if (d2hResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by device-to-host transfers for "
"the outputs rather than GPU Compute "
"and the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
// Report warnings if the GPU Compute Time is unstable.
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
osWarning
<< "* GPU compute time is unstable, with coefficient of variance = "
<< gpuComputeResult.coeffVar << "%." << std::endl;
osWarning << " If not already in use, locking GPU clock frequency or "
"adding --useSpinWait may improve the "
<< "stability." << std::endl;
}
// Explain what the metrics mean.
osInfo << "Explanations of the performance metrics are printed in the "
"verbose logs."
<< std::endl;
printMetricExplanations(osVerbose);
osInfo << std::endl;
}
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
const ReportingOptions& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
return a.computeStart >= warmupMs;
};
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
int32_t const warmups = noWarmup - trace.begin();
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
// when implicit batch used, batchSize = options.inference.batch, which is
// parsed through --batch
// when explicit batch used, batchSize = options.inference.batch = 0
// treat inference with explicit batch as a single query and report the
// throughput
batchSize = batchSize ? batchSize : 1;
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
warmupMs, benchTime, osInfo);
std::vector<InferenceTime> timings(trace.size() - warmups);
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
printTiming(timings, reporting.avgs, osInfo);
printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
osWarning, osVerbose);
if (!reporting.exportTimes.empty()) {
exportJSONTrace(trace, reporting.exportTimes);
}
}
//! Printed format:
//! [ value, ...]
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
//! h2d" : time, "start compute" : time,
//! "end compute" : time, "start d2h" : time, "end d2h" : time,
//! "h2d" : time, "compute" : time,
//! "d2h" : time, "latency" : time, "end to end" : time }
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName) {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl;
char const* sep = " ";
for (auto const& t : trace) {
InferenceTime const it(traceToTiming(t));
os << sep << "{ ";
sep = ", ";
// clang-format off
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep
<< "\"endToEndMs\" : " << it.e2e << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
if (mIterator == mLayers.end()) {
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
mUpdatesCount += mLayers.empty() || first;
if (first) {
mIterator = mLayers.begin();
} else {
mLayers.emplace_back();
mLayers.back().name = layerName;
mIterator = mLayers.end() - 1;
}
}
mIterator->timeMs += timeMs;
++mIterator;
}
void Profiler::print(std::ostream& os) const noexcept {
std::string const nameHdr("Layer");
std::string const timeHdr(" Time (ms)");
std::string const avgHdr(" Avg. Time (ms)");
std::string const percentageHdr(" Time %");
float const totalTimeMs = getTotalTime();
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
return a.name.size() < b.name.size();
};
auto const longestName =
std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
auto const nameLength =
std::max(longestName->name.size() + 1, nameHdr.size());
auto const timeLength = timeHdr.size();
auto const avgLength = avgHdr.size();
auto const percentageLength = percentageHdr.size();
os << std::endl
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
<< std::endl;
for (auto const& p : mLayers) {
// clang-format off
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
<< std::endl;
}
{
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
// clang-format on
}
os << std::endl;
}
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl
<< " { \"count\" : " << mUpdatesCount << " }" << std::endl;
auto const totalTimeMs = getTotalTime();
for (auto const& l : mLayers) {
// clang-format off
os << ", {" << " \"name\" : \"" << l.name << "\""
", \"timeMs\" : " << l.timeMs
<< ", \"averageMs\" : " << l.timeMs / mUpdatesCount
<< ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
<< " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Input Tensors:" << std::endl;
bindings.dumpInputs(context, os);
}
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Output Tensors:" << std::endl;
bindings.dumpOutputs(context, os);
}
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch) {
std::ofstream os(fileName, std::ofstream::trunc);
std::string sep = " ";
auto const output = bindings.getOutputBindings();
os << "[" << std::endl;
for (auto const& binding : output) {
// clang-format off
os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
sep = ", ";
os << " " << sep << "\"dimensions\" : \"";
bindings.dumpBindingDimensions(binding.second, context, os);
os << "\"" << std::endl;
os << " " << sep << "\"values\" : [ ";
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
os << " ]" << std::endl << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
} // namespace sample

View File

@@ -1,211 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H
#include <functional>
#include <iostream>
#include "NvInfer.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime {
InferenceTime(float q, float i, float c, float o, float e)
: enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
InferenceTime() = default;
InferenceTime(InferenceTime const&) = default;
InferenceTime(InferenceTime&&) = default;
InferenceTime& operator=(InferenceTime const&) = default;
InferenceTime& operator=(InferenceTime&&) = default;
~InferenceTime() = default;
float enq{0}; // Enqueue
float h2d{0}; // Host to Device
float compute{0}; // Compute
float d2h{0}; // Device to Host
float e2e{0}; // end to end
// ideal latency
float latency() const { return h2d + compute + d2h; }
};
//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace {
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
float ce, float os, float oe)
: stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
InferenceTrace() = default;
InferenceTrace(InferenceTrace const&) = default;
InferenceTrace(InferenceTrace&&) = default;
InferenceTrace& operator=(InferenceTrace const&) = default;
InferenceTrace& operator=(InferenceTrace&&) = default;
~InferenceTrace() = default;
int32_t stream{0};
float enqStart{0};
float enqEnd{0};
float h2dStart{0};
float h2dEnd{0};
float computeStart{0};
float computeEnd{0};
float d2hStart{0};
float d2hEnd{0};
};
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
a.d2h + b.d2h, a.e2e + b.e2e);
}
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
return a = a + b;
}
//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult {
float min{0};
float max{0};
float mean{0};
float median{0};
float percentile{0};
float coeffVar{0}; // coefficient of variation
};
//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float walltime, std::ostream& os);
//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os);
//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile);
//!
//! \brief Print the explanations of the performance metrics printed in
//! printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);
//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
ReportingOptions const& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName);
//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Print output tensors to stream
//!
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Export output tensors to JSON file
//!
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch);
//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile {
std::string name;
float timeMs{0};
};
//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in
//! the same order
//!
class Profiler : public nvinfer1::IProfiler {
public:
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
void print(std::ostream& os) const noexcept;
//!
//! \brief Export a profile to JSON file
//!
void exportJSONProfile(std::string const& fileName) const noexcept;
private:
float getTotalTime() const noexcept {
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
return accumulator + lp.timeMs;
};
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
}
std::vector<LayerProfile> mLayers;
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
int32_t mUpdatesCount{0};
};
} // namespace sample
#endif // TRT_SAMPLE_REPORTING_H

View File

@@ -1,494 +0,0 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <unordered_map>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include "NvInfer.h"
#include "common.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleOptions.h"
namespace sample {
inline int dataTypeSize(nvinfer1::DataType dataType) {
switch (dataType) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
template <typename T> inline T roundUp(T m, T n) {
return ((m + n - 1) / n) * n;
}
inline int volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
int vecDim, int comps, int batch) {
int maxNbElems = 1;
for (int i = 0; i < dims.nbDims; ++i) {
// Get effective length of axis.
int d = dims.d[i];
// Any dimension is 0, it is an empty tensor.
if (d == 0) {
return 0;
}
if (i == vecDim) {
d = samplesCommon::divUp(d, comps);
}
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
}
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}
inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
if (vecDim != -1) {
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
}
return volume(dims) * std::max(batch, 1);
}
inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
sample::gLogWarning
<< "Vector too long, only first 8 elements are used in dimension."
<< std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename T>
inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
if (std::is_integral<T>::value) {
std::uniform_int_distribution<int> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
} else {
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
}
// Specialization needed for custom type __half
template <typename H>
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
H* typedBuffer = static_cast<H*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<H>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <>
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
__half max) {
fillBufferHalf(buffer, volume, min, max);
}
template <typename T>
inline void dumpBuffer(const void* buffer, const std::string& separator,
std::ostream& os, const Dims& dims, const Dims& strides,
int32_t vectorDim, int32_t spv) {
const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
std::multiplies<int64_t>());
const T* typedBuffer = static_cast<const T*>(buffer);
std::string sep;
for (int64_t v = 0; v < volume; ++v) {
int64_t curV = v;
int32_t dataOffset = 0;
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
int32_t dimVal = curV % dims.d[dimIndex];
if (dimIndex == vectorDim) {
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
} else {
dataOffset +=
dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
}
curV /= dims.d[dimIndex];
ASSERT(curV >= 0);
}
os << sep << typedBuffer[dataOffset];
sep = separator;
}
}
inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
ASSERT(dst);
std::ifstream file(fileName, std::ios::in | std::ios::binary);
if (file.is_open()) {
file.read(dst, size);
file.close();
} else {
std::stringstream msg;
msg << "Cannot open file " << fileName << "!";
throw std::invalid_argument(msg.str());
}
}
struct Binding {
bool isInput{false};
std::unique_ptr<IMirroredBuffer> buffer;
int64_t volume{0};
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
void fill(const std::string& fileName) {
loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
buffer->getSize());
}
void fill() {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
break;
}
case nvinfer1::DataType::kINT32: {
fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kINT8: {
fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kFLOAT: {
fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
case nvinfer1::DataType::kHALF: {
fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
}
}
void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
int32_t spv, const std::string separator = " ") const {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT32: {
dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT8: {
dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kFLOAT: {
dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kHALF: {
dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
}
}
};
class Bindings {
public:
Bindings() = delete;
explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
nvinfer1::DataType dataType,
const std::string& fileName = "") {
while (mBindings.size() <= static_cast<size_t>(b)) {
mBindings.emplace_back();
mDevicePointers.emplace_back();
}
mNames[name] = b;
if (mBindings[b].buffer == nullptr) {
if (mUseManaged) {
mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
} else {
mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
}
}
mBindings[b].isInput = isInput;
// Some memory allocators return nullptr when allocating zero bytes, but
// TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
if (volume == 0) {
mBindings[b].buffer->allocate(1);
} else {
mBindings[b].buffer->allocate(
static_cast<size_t>(volume) *
static_cast<size_t>(dataTypeSize(dataType)));
}
mBindings[b].volume = volume;
mBindings[b].dataType = dataType;
mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
if (isInput) {
if (fileName.empty()) {
fill(b);
} else {
fill(b, fileName);
}
}
}
void** getDeviceBuffers() { return mDevicePointers.data(); }
void transferInputToDevice(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (mBindings[b.second].isInput) {
mBindings[b.second].buffer->hostToDevice(stream);
}
}
}
void transferOutputToHost(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (!mBindings[b.second].isInput) {
mBindings[b.second].buffer->deviceToHost(stream);
}
}
}
void fill(int binding, const std::string& fileName) {
mBindings[binding].fill(fileName);
}
void fill(int binding) { mBindings[binding].fill(); }
void dumpBindingDimensions(int binding,
const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
const auto dims = context.getBindingDimensions(binding);
// Do not add a newline terminator, because the caller may be outputting a
// JSON string.
os << dims;
}
void dumpBindingValues(const nvinfer1::IExecutionContext& context,
int binding, std::ostream& os,
const std::string& separator = " ",
int32_t batch = 1) const {
Dims dims = context.getBindingDimensions(binding);
Dims strides = context.getStrides(binding);
int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
const int32_t spv =
context.getEngine().getBindingComponentsPerElement(binding);
if (context.getEngine().hasImplicitBatchDimension()) {
auto insertN = [](Dims& d, int32_t bs) {
const int32_t nbDims = d.nbDims;
ASSERT(nbDims < Dims::MAX_DIMS);
std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
d.d[0] = bs;
d.nbDims = nbDims + 1;
};
int32_t batchStride = 0;
for (int32_t i = 0; i < strides.nbDims; ++i) {
if (strides.d[i] * dims.d[i] > batchStride) {
batchStride = strides.d[i] * dims.d[i];
}
}
insertN(dims, batch);
insertN(strides, batchStride);
vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
}
mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
}
void dumpInputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isInput = [](const Binding& b) { return b.isInput; };
dumpBindings(context, isInput, os);
}
void dumpOutputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
dumpBindings(context, isOutput, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto all = [](const Binding& b) { return true; };
dumpBindings(context, all, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
bool (*predicate)(const Binding& b),
std::ostream& os) const {
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
os << n.first << ": (";
dumpBindingDimensions(binding, context, os);
os << ")" << std::endl;
dumpBindingValues(context, binding, os);
os << std::endl;
}
}
}
std::unordered_map<std::string, int> getInputBindings() const {
auto isInput = [](const Binding& b) { return b.isInput; };
return getBindings(isInput);
}
std::unordered_map<std::string, int> getOutputBindings() const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
return getBindings(isOutput);
}
std::unordered_map<std::string, int> getBindings() const {
auto all = [](const Binding& b) { return true; };
return getBindings(all);
}
std::unordered_map<std::string, int>
getBindings(bool (*predicate)(const Binding& b)) const {
std::unordered_map<std::string, int> bindings;
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
bindings.insert(n);
}
}
return bindings;
}
private:
std::unordered_map<std::string, int32_t> mNames;
std::vector<Binding> mBindings;
std::vector<void*> mDevicePointers;
bool mUseManaged{false};
};
template <typename T> struct TrtDestroyer {
void operator()(T* t) { t->destroy(); }
};
template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
size_t nbBindings, bool isInput = true) {
bool broadcast = formats.size() == 1;
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
if (!formats.empty() && !validFormatsCount) {
if (isInput) {
throw std::invalid_argument(
"The number of inputIOFormats must match network's inputs or be one "
"for broadcasting.");
} else {
throw std::invalid_argument(
"The number of outputIOFormats must match network's outputs or be "
"one for broadcasting.");
}
}
return broadcast;
}
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
if (!iFile) {
sample::gLogWarning << "Could not read timing cache from: " << inFileName
<< ". A new timing cache will be generated and written."
<< std::endl;
return std::vector<char>();
}
iFile.seekg(0, std::ifstream::end);
size_t fsize = iFile.tellg();
iFile.seekg(0, std::ifstream::beg);
std::vector<char> content(fsize);
iFile.read(content.data(), fsize);
iFile.close();
sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
<< inFileName << std::endl;
return content;
}
inline void saveTimingCacheFile(const std::string outFileName,
const IHostMemory* blob) {
std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
if (!oFile) {
sample::gLogWarning << "Could not write timing cache to: " << outFileName
<< std::endl;
return;
}
oFile.write((char*)blob->data(), blob->size());
oFile.close();
sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
<< outFileName << std::endl;
}
inline int32_t getCudaDriverVersion() {
int32_t version{-1};
cudaCheck(cudaDriverGetVersion(&version));
return version;
}
inline int32_t getCudaRuntimeVersion() {
int32_t version{-1};
cudaCheck(cudaRuntimeGetVersion(&version));
return version;
}
} // namespace sample
#endif // TRT_SAMPLE_UTILS_H

View File

@@ -1,568 +0,0 @@
/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <errno.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
#ifdef REPLACE_GETOPT
int opterr = 1; /* if error message should be printed */
int optind = 1; /* index into parent argv vector */
int optopt = '?'; /* character checked for validity */
#undef optreset /* see getopt.h */
#define optreset __mingw_optreset
int optreset; /* reset getopt */
char* optarg; /* argument associated with option */
#endif
#define PRINT_ERROR ((opterr) && (*options != ':'))
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
/* return values */
#define BADCH (int) '?'
#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
#define INORDER (int) 1
#ifndef __CYGWIN__
#define __progname __argv[0]
#else
extern char __declspec(dllimport) * __progname;
#endif
#ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int);
static int parse_long_options(char* const*, const char*, const struct option*, int*, int);
static int gcd(int, int);
static void permute_args(int, int, int, char* const*);
static char* place = EMSG; /* option letter processing */
/* XXX: set optreset to 1 rather than these two */
static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
static const char recargchar[] = "option requires an argument -- %c";
static const char recargstring[] = "option requires an argument -- %s";
static const char ambig[] = "ambiguous option -- %.*s";
static const char noarg[] = "option doesn't take an argument -- %.*s";
static const char illoptchar[] = "unknown option -- %c";
static const char illoptstring[] = "unknown option -- %s";
static void _vwarnx(const char* fmt, va_list ap)
{
(void) fprintf(stderr, "%s: ", __progname);
if (fmt != NULL)
(void) vfprintf(stderr, fmt, ap);
(void) fprintf(stderr, "\n");
}
static void warnx(const char* fmt, ...)
{
va_list ap;
va_start(ap, fmt);
_vwarnx(fmt, ap);
va_end(ap);
}
/*
* Compute the greatest common divisor of a and b.
*/
static int gcd(int a, int b)
{
int c;
c = a % b;
while (c != 0)
{
a = b;
b = c;
c = a % b;
}
return (b);
}
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
{
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
char* swap;
/*
* compute lengths of blocks and number and size of cycles
*/
nnonopts = panonopt_end - panonopt_start;
nopts = opt_end - panonopt_end;
ncycle = gcd(nnonopts, nopts);
cyclelen = (opt_end - panonopt_start) / ncycle;
for (i = 0; i < ncycle; i++)
{
cstart = panonopt_end + i;
pos = cstart;
for (j = 0; j < cyclelen; j++)
{
if (pos >= panonopt_end)
pos -= nnonopts;
else
pos += nopts;
swap = nargv[pos];
/* LINTED const cast */
((char**) nargv)[pos] = nargv[cstart];
/* LINTED const cast */
((char**) nargv)[cstart] = swap;
}
}
}
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int parse_long_options(
char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too)
{
char *current_argv, *has_equal;
size_t current_argv_len;
int i, ambiguous, match;
#define IDENTICAL_INTERPRETATION(_x, _y) \
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \
&& long_options[(_x)].val == long_options[(_y)].val)
current_argv = place;
match = -1;
ambiguous = 0;
optind++;
if ((has_equal = strchr(current_argv, '=')) != NULL)
{
/* argument found (--option=arg) */
current_argv_len = has_equal - current_argv;
has_equal++;
}
else
current_argv_len = strlen(current_argv);
for (i = 0; long_options[i].name; i++)
{
/* find matching long option */
if (strncmp(current_argv, long_options[i].name, current_argv_len))
continue;
if (strlen(long_options[i].name) == current_argv_len)
{
/* exact match */
match = i;
ambiguous = 0;
break;
}
/*
* If this is a known short option, don't allow
* a partial match of a single character.
*/
if (short_too && current_argv_len == 1)
continue;
if (match == -1) /* partial match */
match = i;
else if (!IDENTICAL_INTERPRETATION(i, match))
ambiguous = 1;
}
if (ambiguous)
{
/* ambiguous abbreviation */
if (PRINT_ERROR)
warnx(ambig, (int) current_argv_len, current_argv);
optopt = 0;
return (BADCH);
}
if (match != -1)
{ /* option found */
if (long_options[match].has_arg == no_argument && has_equal)
{
if (PRINT_ERROR)
warnx(noarg, (int) current_argv_len, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
return (BADARG);
}
if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
{
if (has_equal)
optarg = has_equal;
else if (long_options[match].has_arg == required_argument)
{
/*
* optional argument doesn't use next nargv
*/
optarg = nargv[optind++];
}
}
if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
{
/*
* Missing argument; leading ':' indicates no error
* should be generated.
*/
if (PRINT_ERROR)
warnx(recargstring, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
--optind;
return (BADARG);
}
}
else
{ /* unknown option */
if (short_too)
{
--optind;
return (-1);
}
if (PRINT_ERROR)
warnx(illoptstring, current_argv);
optopt = 0;
return (BADCH);
}
if (idx)
*idx = match;
if (long_options[match].flag)
{
*long_options[match].flag = long_options[match].val;
return (0);
}
else
return (long_options[match].val);
#undef IDENTICAL_INTERPRETATION
}
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int getopt_internal(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags)
{
const char* oli; /* option letter list index */
int optchar, short_too;
static int posixly_correct = -1;
if (options == NULL)
return (-1);
/*
* XXX Some GNU programs (like cvs) set optind to 0 instead of
* XXX using optreset. Work around this braindamage.
*/
if (optind == 0)
optind = optreset = 1;
/*
* Disable GNU extensions if POSIXLY_CORRECT is set or options
* string begins with a '+'.
*
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
* optreset != 0 for GNU compatibility.
*/
if (posixly_correct == -1 || optreset != 0)
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
if (*options == '-')
flags |= FLAG_ALLARGS;
else if (posixly_correct || *options == '+')
flags &= ~FLAG_PERMUTE;
if (*options == '+' || *options == '-')
options++;
optarg = NULL;
if (optreset)
nonopt_start = nonopt_end = -1;
start:
if (optreset || !*place)
{ /* update scanning pointer */
optreset = 0;
if (optind >= nargc)
{ /* end of argument vector */
place = EMSG;
if (nonopt_end != -1)
{
/* do permutation, if we have to */
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
else if (nonopt_start != -1)
{
/*
* If we skipped non-options, set optind
* to the first of them.
*/
optind = nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
{
place = EMSG; /* found non-option */
if (flags & FLAG_ALLARGS)
{
/*
* GNU extension:
* return non-option as argument to option 1
*/
optarg = nargv[optind++];
return (INORDER);
}
if (!(flags & FLAG_PERMUTE))
{
/*
* If no permutation wanted, stop parsing
* at first non-option.
*/
return (-1);
}
/* do permutation */
if (nonopt_start == -1)
nonopt_start = optind;
else if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
nonopt_start = optind - (nonopt_end - nonopt_start);
nonopt_end = -1;
}
optind++;
/* process next argument */
goto start;
}
if (nonopt_start != -1 && nonopt_end == -1)
nonopt_end = optind;
/*
* If we have "-" do nothing, if "--" we are done.
*/
if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
{
optind++;
place = EMSG;
/*
* We found an option (--), so if we skipped
* non-options, we have to permute.
*/
if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
}
/*
* Check long options if:
* 1) we were passed some
* 2) the arg is not just "-"
* 3) either the arg starts with -- we are getopt_long_only()
*/
if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
{
short_too = 0;
if (*place == '-')
place++; /* --foo long option */
else if (*place != ':' && strchr(options, *place) != NULL)
short_too = 1; /* could be short option too */
optchar = parse_long_options(nargv, options, long_options, idx, short_too);
if (optchar != -1)
{
place = EMSG;
return (optchar);
}
}
if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
|| (oli = strchr(options, optchar)) == NULL)
{
/*
* If the user specified "-" and '-' isn't listed in
* options, return -1 (non-option) as per POSIX.
* Otherwise, it is an unknown option character (or ':').
*/
if (optchar == (int) '-' && *place == '\0')
return (-1);
if (!*place)
++optind;
if (PRINT_ERROR)
warnx(illoptchar, optchar);
optopt = optchar;
return (BADCH);
}
if (long_options != NULL && optchar == 'W' && oli[1] == ';')
{
/* -W long-option */
if (*place) /* no space */
/* NOTHING */;
else if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else /* white space */
place = nargv[optind];
optchar = parse_long_options(nargv, options, long_options, idx, 0);
place = EMSG;
return (optchar);
}
if (*++oli != ':')
{ /* doesn't take argument */
if (!*place)
++optind;
}
else
{ /* takes (optional) argument */
optarg = NULL;
if (*place) /* no white space */
optarg = place;
else if (oli[1] != ':')
{ /* arg not optional */
if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else
optarg = nargv[optind];
}
place = EMSG;
++optind;
}
/* dump back option letter */
return (optchar);
}
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int getopt(int nargc, char* const* nargv, const char* options)
{
/*
* We don't pass FLAG_PERMUTE to getopt_internal() since
* the BSD getopt(3) (unlike GNU) has never done this.
*
* Furthermore, since many privileged programs call getopt()
* before dropping privileges it makes sense to keep things
* as simple (and bug-free) as possible.
*/
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
}
#endif /* REPLACE_GETOPT */
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
}
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
}

View File

@@ -1,124 +0,0 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file has no copyright assigned and is placed in the Public Domain.
* This file is a part of the w64 mingw-runtime package.
*
* The w64 mingw-runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#define __GETOPT_H__
/* All the headers include this file. */
#include <crtdefs.h>
#if defined(WINGETOPT_SHARED_LIB)
#if defined(BUILDING_WINGETOPT_DLL)
#define WINGETOPT_API __declspec(dllexport)
#else
#define WINGETOPT_API __declspec(dllimport)
#endif
#else
#define WINGETOPT_API
#endif
#ifdef __cplusplus
extern "C"
{
#endif
WINGETOPT_API extern int optind; /* index of first non-option in argv */
WINGETOPT_API extern int optopt; /* single option character, as parsed */
WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
/* (user may set to zero, to suppress) */
WINGETOPT_API extern char* optarg; /* pointer to argument of current option */
extern int getopt(int nargc, char* const* nargv, const char* options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
#define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C"
{
#endif
struct option /* specification for a long form option... */
{
const char* name; /* option name, without leading hyphens */
int has_arg; /* does it take an argument? */
int* flag; /* where to save its status, or NULL */
int val; /* its associated status value */
};
enum /* permitted values for its `has_arg' field... */
{
no_argument = 0, /* option never takes an argument */
required_argument, /* option always requires an argument */
optional_argument /* option may take an argument */
};
extern int getopt_long(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
extern int getopt_long_only(
int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
#define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
}
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */

View File

@@ -1,528 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/tensorrt/trt_backend.h"
#include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h"
#endif
namespace fastdeploy {
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
if (dtype == nvinfer1::DataType::kFLOAT) {
return sizeof(float);
} else if (dtype == nvinfer1::DataType::kHALF) {
return sizeof(float) / 2;
} else if (dtype == nvinfer1::DataType::kINT8) {
return sizeof(int8_t);
} else if (dtype == nvinfer1::DataType::kINT32) {
return sizeof(int32_t);
}
// kBOOL
return sizeof(bool);
}
FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
if (dtype == nvinfer1::DataType::kFLOAT) {
return FDDataType::FP32;
} else if (dtype == nvinfer1::DataType::kHALF) {
return FDDataType::FP16;
} else if (dtype == nvinfer1::DataType::kINT8) {
return FDDataType::INT8;
} else if (dtype == nvinfer1::DataType::kINT32) {
return FDDataType::INT32;
}
// kBOOL
return FDDataType::BOOL;
}
std::vector<int> toVec(const nvinfer1::Dims& dim) {
std::vector<int> out(dim.d, dim.d + dim.nbDims);
return out;
}
bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader,
const TrtBackendOption& option) {
// paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()];
// std::string input_shapes[reader.NumInputs()];
std::vector<paddle2onnx::ModelTensorInfo> inputs(reader.NumInputs());
std::vector<std::string> input_shapes(reader.NumInputs());
for (int i = 0; i < reader.NumInputs(); ++i) {
reader.GetInputInfo(i, &inputs[i]);
// change 0 to -1, when input_dim is a string, onnx will make it to zero
for (int j = 0; j < inputs[i].rank; ++j) {
if (inputs[i].shape[j] <= 0) {
inputs[i].shape[j] = -1;
}
}
input_shapes[i] = "";
for (int j = 0; j < inputs[i].rank; ++j) {
if (j != inputs[i].rank - 1) {
input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", ");
} else {
input_shapes[i] += std::to_string(inputs[i].shape[j]);
}
}
}
bool all_check_passed = true;
for (int i = 0; i < reader.NumInputs(); ++i) {
bool contain_unknown_dim = false;
for (int j = 0; j < inputs[i].rank; ++j) {
if (inputs[i].shape[j] < 0) {
contain_unknown_dim = true;
}
}
std::string name(inputs[i].name, strlen(inputs[i].name));
FDINFO << "The loaded model's input tensor:" << name
<< " has shape [" + input_shapes[i] << "]." << std::endl;
if (contain_unknown_dim) {
auto iter1 = option.min_shape.find(name);
auto iter2 = option.max_shape.find(name);
auto iter3 = option.opt_shape.find(name);
if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() ||
iter3 == option.opt_shape.end()) {
FDERROR << "The loaded model's input tensor:" << name
<< " has dynamic shape [" + input_shapes[i] +
"], but didn't configure it's shape for tensorrt with "
"SetTrtInputShape correctly."
<< std::endl;
all_check_passed = false;
}
}
}
return all_check_passed;
}
bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
const TrtBackendOption& option) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
cudaSetDevice(option.gpu_id);
std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
std::string engine_buffer;
engine_buffer.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(engine_buffer.at(0)), engine_buffer.size());
fin.close();
SampleUniquePtr<IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())};
if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false;
}
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(engine_buffer.data(),
engine_buffer.size()),
samplesCommon::InferDeleter());
if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false;
}
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
engine_->createExecutionContext());
FDASSERT(cudaStreamCreate(&stream_) == 0,
"[ERROR] Error occurs while calling cudaStreamCreate().");
GetInputOutputInfo();
initialized_ = true;
return true;
}
bool TrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const TrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
#ifdef ENABLE_PADDLE_FRONTEND
std::vector<paddle2onnx::CustomOp> custom_ops;
for (auto& item : option.custom_op_info_) {
paddle2onnx::CustomOp op;
std::strcpy(op.op_name, item.first.c_str());
std::strcpy(op.export_op_name, item.second.c_str());
custom_ops.emplace_back(op);
}
char* model_content_ptr;
int model_content_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true, custom_ops.data(),
custom_ops.size())) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl;
return false;
}
if (option.remove_multiclass_nms_) {
char* new_model = nullptr;
int new_model_size = 0;
if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
&new_model, &new_model_size)) {
FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
return false;
}
delete[] model_content_ptr;
std::string onnx_model_proto(new_model, new_model + new_model_size);
delete[] new_model;
return InitFromOnnx(onnx_model_proto, option, true);
}
std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size);
delete[] model_content_ptr;
model_content_ptr = nullptr;
return InitFromOnnx(onnx_model_proto, option, true);
#else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead."
<< std::endl;
return false;
#endif
}
bool TrtBackend::InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option,
bool from_memory_buffer) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
cudaSetDevice(option.gpu_id);
std::string onnx_content = "";
if (!from_memory_buffer) {
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
onnx_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(onnx_content.at(0)), onnx_content.size());
fin.close();
} else {
onnx_content = model_file;
}
// This part of code will record the original outputs order
// because the converted tensorrt network may exist wrong order of outputs
outputs_order_.clear();
auto onnx_reader =
paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
std::string name(
onnx_reader.output_names[i],
onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
outputs_order_[name] = i;
}
if (!CheckDynamicShapeConfig(onnx_reader, option)) {
FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl;
return false;
}
if (option.serialize_file != "") {
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
if (fin) {
FDINFO << "Detect serialized TensorRT Engine file in "
<< option.serialize_file << ", will load it directly."
<< std::endl;
fin.close();
return InitFromTrt(option.serialize_file);
}
}
if (!CreateTrtEngine(onnx_content, option)) {
return false;
}
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
engine_->createExecutionContext());
FDASSERT(cudaStreamCreate(&stream_) == 0,
"[ERROR] Error occurs while calling cudaStreamCreate().");
GetInputOutputInfo();
initialized_ = true;
return true;
}
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
AllocateBufferInDynamicShape(inputs, outputs);
std::vector<void*> input_binds(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
if (inputs[i].dtype == FDDataType::INT64) {
int64_t* data = static_cast<int64_t*>(inputs[i].Data());
std::vector<int32_t> casted_data(data, data + inputs[i].Numel());
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
static_cast<void*>(casted_data.data()),
inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice,
stream_) == 0,
"[ERROR] Error occurs while copy memory from CPU to GPU.");
} else {
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
inputs[i].Data(), inputs[i].Nbytes(),
cudaMemcpyHostToDevice, stream_) == 0,
"[ERROR] Error occurs while copy memory from CPU to GPU.");
}
}
if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
FDERROR << "Failed to Infer with TensorRT." << std::endl;
return false;
}
for (size_t i = 0; i < outputs->size(); ++i) {
FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
outputs_buffer_[(*outputs)[i].name].data(),
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
stream_) == 0,
"[ERROR] Error occurs while copy memory from GPU to CPU.");
}
return true;
}
void TrtBackend::GetInputOutputInfo() {
inputs_desc_.clear();
outputs_desc_.clear();
auto num_binds = engine_->getNbBindings();
for (auto i = 0; i < num_binds; ++i) {
std::string name = std::string(engine_->getBindingName(i));
auto shape = toVec(engine_->getBindingDimensions(i));
auto dtype = engine_->getBindingDataType(i);
if (engine_->bindingIsInput(i)) {
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
inputs_buffer_[name] = DeviceBuffer(dtype);
} else {
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
outputs_buffer_[name] = DeviceBuffer(dtype);
}
}
bindings_.resize(num_binds);
}
void TrtBackend::AllocateBufferInDynamicShape(
const std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs) {
for (const auto& item : inputs) {
auto idx = engine_->getBindingIndex(item.name.c_str());
std::vector<int> shape(item.shape.begin(), item.shape.end());
auto dims = sample::toDims(shape);
context_->setBindingDimensions(idx, dims);
if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
inputs_buffer_[item.name].resize(dims);
bindings_[idx] = inputs_buffer_[item.name].data();
}
}
if (outputs->size() != outputs_desc_.size()) {
outputs->resize(outputs_desc_.size());
}
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
auto output_dims = context_->getBindingDimensions(idx);
// find the original index of output
auto iter = outputs_order_.find(outputs_desc_[i].name);
FDASSERT(iter != outputs_order_.end(),
"Cannot find output:" + outputs_desc_[i].name +
" of tensorrt network from the original model.");
auto ori_idx = iter->second;
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
(*outputs)[ori_idx].shape.assign(output_dims.d,
output_dims.d + output_dims.nbDims);
(*outputs)[ori_idx].name = outputs_desc_[i].name;
(*outputs)[ori_idx].data.resize(volume(output_dims) *
TrtDataTypeSize(outputs_desc_[i].dtype));
if ((*outputs)[ori_idx].Nbytes() >
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
}
}
}
bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
const TrtBackendOption& option) {
const auto explicitBatch =
1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
builder_ = SampleUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder_) {
FDERROR << "Failed to call createInferBuilder()." << std::endl;
return false;
}
network_ = SampleUniquePtr<nvinfer1::INetworkDefinition>(
builder_->createNetworkV2(explicitBatch));
if (!network_) {
FDERROR << "Failed to call createNetworkV2()." << std::endl;
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(
builder_->createBuilderConfig());
if (!config) {
FDERROR << "Failed to call createBuilderConfig()." << std::endl;
return false;
}
if (option.enable_fp16) {
if (!builder_->platformHasFastFp16()) {
FDWARNING << "Detected FP16 is not supported in the current GPU, "
"will use FP32 instead."
<< std::endl;
} else {
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
}
parser_ = SampleUniquePtr<nvonnxparser::IParser>(
nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger()));
if (!parser_) {
FDERROR << "Failed to call createParser()." << std::endl;
return false;
}
if (!parser_->parse(onnx_model.data(), onnx_model.size())) {
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
return false;
}
FDINFO << "Start to building TensorRT Engine..." << std::endl;
bool fp16 = builder_->platformHasFastFp16();
builder_->setMaxBatchSize(option.max_batch_size);
config->setMaxWorkspaceSize(option.max_workspace_size);
if (option.max_shape.size() > 0) {
auto profile = builder_->createOptimizationProfile();
FDASSERT(option.max_shape.size() == option.min_shape.size() &&
option.min_shape.size() == option.opt_shape.size(),
"[TrtBackend] Size of max_shape/opt_shape/min_shape in "
"TrtBackendOption should keep same.");
for (const auto& item : option.min_shape) {
// set min shape
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMIN,
sample::toDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: " + item.first +
" in TrtBackend.");
// set optimization shape
auto iter = option.opt_shape.find(item.first);
FDASSERT(iter != option.opt_shape.end(),
"[TrtBackend] Cannot find input name: " + item.first +
" in TrtBackendOption::opt_shape.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kOPT,
sample::toDims(iter->second)),
"[TrtBackend] Failed to set opt_shape for input: " + item.first +
" in TrtBackend.");
// set max shape
iter = option.max_shape.find(item.first);
FDASSERT(iter != option.max_shape.end(),
"[TrtBackend] Cannot find input name: " + item.first +
" in TrtBackendOption::max_shape.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMAX,
sample::toDims(iter->second)),
"[TrtBackend] Failed to set max_shape for input: " + item.first +
" in TrtBackend.");
}
config->addOptimizationProfile(profile);
}
SampleUniquePtr<IHostMemory> plan{
builder_->buildSerializedNetwork(*network_, *config)};
if (!plan) {
FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
return false;
}
SampleUniquePtr<IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())};
if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false;
}
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(plan->data(), plan->size()),
samplesCommon::InferDeleter());
if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false;
}
FDINFO << "TensorRT Engine is built succussfully." << std::endl;
if (option.serialize_file != "") {
FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file
<< "." << std::endl;
std::ofstream engine_file(option.serialize_file.c_str());
if (!engine_file) {
FDERROR << "Failed to open " << option.serialize_file << " to write."
<< std::endl;
return false;
}
engine_file.write(static_cast<char*>(plan->data()), plan->size());
engine_file.close();
FDINFO << "TensorRTEngine is serialized to local file "
<< option.serialize_file
<< ", we can load this model from the seralized engine "
"directly next time."
<< std::endl;
}
return true;
}
TensorInfo TrtBackend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
" should less than the number of inputs:" +
std::to_string(NumInputs()) + ".");
TensorInfo info;
info.name = inputs_desc_[index].name;
info.shape.assign(inputs_desc_[index].shape.begin(),
inputs_desc_[index].shape.end());
info.dtype = GetFDDataType(inputs_desc_[index].dtype);
return info;
}
TensorInfo TrtBackend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index:" + std::to_string(index) +
" should less than the number of outputs:" +
std::to_string(NumOutputs()) + ".");
TensorInfo info;
info.name = outputs_desc_[index].name;
info.shape.assign(outputs_desc_[index].shape.begin(),
outputs_desc_[index].shape.end());
info.dtype = GetFDDataType(outputs_desc_[index].dtype);
return info;
}
} // namespace fastdeploy

View File

@@ -1,113 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/common/argsParser.h"
#include "fastdeploy/backends/tensorrt/common/buffers.h"
#include "fastdeploy/backends/tensorrt/common/common.h"
#include "fastdeploy/backends/tensorrt/common/logger.h"
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
#include <cuda_runtime_api.h>
#include "NvInfer.h"
namespace fastdeploy {
using namespace samplesCommon;
struct TrtValueInfo {
std::string name;
std::vector<int> shape;
nvinfer1::DataType dtype;
};
struct TrtBackendOption {
int gpu_id = 0;
bool enable_fp16 = false;
bool enable_int8 = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
std::map<std::string, std::vector<int32_t>> max_shape;
std::map<std::string, std::vector<int32_t>> min_shape;
std::map<std::string, std::vector<int32_t>> opt_shape;
std::string serialize_file = "";
// inside parameter, maybe remove next version
bool remove_multiclass_nms_ = false;
std::map<std::string, std::string> custom_op_info_;
};
std::vector<int> toVec(const nvinfer1::Dims& dim);
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
class TrtBackend : public BaseBackend {
public:
TrtBackend() : engine_(nullptr), context_(nullptr) {}
virtual ~TrtBackend() = default;
void BuildOption(const TrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const TrtBackendOption& option = TrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option = TrtBackendOption(),
bool from_memory_buffer = false);
bool InitFromTrt(const std::string& trt_engine_file,
const TrtBackendOption& option = TrtBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return inputs_desc_.size(); }
int NumOutputs() const { return outputs_desc_.size(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
private:
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
std::shared_ptr<nvinfer1::IExecutionContext> context_;
SampleUniquePtr<nvonnxparser::IParser> parser_;
SampleUniquePtr<nvinfer1::IBuilder> builder_;
SampleUniquePtr<nvinfer1::INetworkDefinition> network_;
cudaStream_t stream_{};
std::vector<void*> bindings_;
std::vector<TrtValueInfo> inputs_desc_;
std::vector<TrtValueInfo> outputs_desc_;
std::map<std::string, DeviceBuffer> inputs_buffer_;
std::map<std::string, DeviceBuffer> outputs_buffer_;
// Sometimes while the number of outputs > 1
// the output order of tensorrt may not be same
// with the original onnx model
// So this parameter will record to origin outputs
// order, to help recover the rigt order
std::map<std::string, int> outputs_order_;
void GetInputOutputInfo();
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs);
bool CreateTrtEngine(const std::string& onnx_model,
const TrtBackendOption& option);
};
} // namespace fastdeploy

View File

@@ -1,54 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifndef FASTDEPLOY_DEBUG
#cmakedefine FASTDEPLOY_DEBUG
#endif
#ifndef FASTDEPLOY_LIB
#cmakedefine FASTDEPLOY_LIB
#endif
#ifndef ENABLE_PADDLE_FRONTEND
#cmakedefine ENABLE_PADDLE_FRONTEND
#endif
#ifndef ENABLE_ORT_BACKEND
#cmakedefine ENABLE_ORT_BACKEND
#endif
#ifndef ENABLE_PADDLE_BACKEND
#cmakedefine ENABLE_PADDLE_BACKEND
#endif
#ifndef WITH_GPU
#cmakedefine WITH_GPU
#endif
#ifndef ENABLE_TRT_BACKEND
#cmakedefine ENABLE_TRT_BACKEND
#endif
#ifndef ENABLE_VISION
#cmakedefine ENABLE_VISION
#endif
#ifndef ENABLE_OPENCV_CUDA
#cmakedefine ENABLE_OPENCV_CUDA
#endif
#ifndef ENABLE_VISION_VISUALIZE
#cmakedefine ENABLE_VISION_VISUALIZE
#endif

View File

@@ -1,134 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
#ifdef WITH_GPU
#include <cuda_runtime_api.h>
#endif
namespace fastdeploy {
void* FDTensor::MutableData() {
if (external_data_ptr != nullptr) {
return external_data_ptr;
}
return data.data();
}
void* FDTensor::Data() {
if (external_data_ptr != nullptr) {
if (device == Device::GPU) {
#ifdef WITH_GPU
// need to copy cuda mem to cpu first
temporary_cpu_buffer.resize(Nbytes());
FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
Nbytes(), cudaMemcpyDeviceToHost) == 0,
"[ERROR] Error occurs while copy memory from GPU to CPU");
return temporary_cpu_buffer.data();
#else
FDASSERT(false,
"The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
"an unexpected problem happend.");
#endif
} else {
return external_data_ptr;
}
}
return data.data();
}
const void* FDTensor::Data() const {
if (external_data_ptr != nullptr) {
return external_data_ptr;
}
return data.data();
}
void FDTensor::SetExternalData(const std::vector<int64_t>& new_shape,
const FDDataType& data_type, void* data_buffer) {
dtype = data_type;
shape.assign(new_shape.begin(), new_shape.end());
external_data_ptr = data_buffer;
}
void FDTensor::Allocate(const std::vector<int64_t>& new_shape,
const FDDataType& data_type,
const std::string& tensor_name) {
dtype = data_type;
name = tensor_name;
shape.assign(new_shape.begin(), new_shape.end());
int unit = FDDataTypeSize(data_type);
int total_size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
data.resize(total_size * unit);
}
int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
int FDTensor::Numel() const {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
template <typename T>
void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
double* min) {
T* ptr = static_cast<T*>(src_ptr);
*mean = 0;
*max = -99999999;
*min = 99999999;
for (int i = 0; i < size; ++i) {
if (*(ptr + i) > *max) {
*max = *(ptr + i);
}
if (*(ptr + i) < *min) {
*min = *(ptr + i);
}
*mean += *(ptr + i);
}
*mean = *mean / size;
}
void FDTensor::PrintInfo(const std::string& prefix) {
double mean = 0;
double max = -99999999;
double min = 99999999;
if (dtype == FDDataType::FP32) {
CalculateStatisInfo<float>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::FP64) {
CalculateStatisInfo<double>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT8) {
CalculateStatisInfo<int8_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::UINT8) {
CalculateStatisInfo<uint8_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT32) {
CalculateStatisInfo<int32_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT64) {
CalculateStatisInfo<int64_t>(Data(), Numel(), &mean, &max, &min);
} else {
FDASSERT(false,
"PrintInfo function doesn't support current situation, maybe you "
"need enhance this function now.")
}
std::cout << prefix << ": shape=";
for (int i = 0; i < shape.size(); ++i) {
std::cout << shape[i] << " ";
}
std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max
<< ", min=" << min << std::endl;
}
FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
} // namespace fastdeploy

View File

@@ -1,87 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "fastdeploy/core/fd_type.h"
namespace fastdeploy {
struct FASTDEPLOY_DECL FDTensor {
std::vector<int8_t> data;
std::vector<int64_t> shape;
std::string name = "";
FDDataType dtype;
// This use to skip memory copy step
// the external_data_ptr will point to the user allocated memory
// user has to maintain the memory, allocate and release
void* external_data_ptr = nullptr;
// The internal data will be on CPU
// Some times, the external data is on the GPU, and we are going to use
// GPU to inference the model
// so we can skip data transfer, which may improve the efficience
Device device = Device::CPU;
// if the external data is not on CPU, we use this temporary buffer
// to transfer data to CPU at some cases we need to visit the
// other devices' data
std::vector<int8_t> temporary_cpu_buffer;
// Get data buffer pointer
void* MutableData();
// Use this data to get the tensor data to process
// Since the most senario is process data in CPU
// this function weill return a pointer to cpu memory
// buffer.
// If the original data is on other device, the data
// will copy to cpu store in `temporary_cpu_buffer`
void* Data();
const void* Data() const;
// Set user memory buffer for Tensor, the memory is managed by
// the user it self, but the Tensor will share the memory with user
// So take care with the user buffer
void SetExternalData(const std::vector<int64_t>& new_shape,
const FDDataType& data_type, void* data_buffer);
// Initialize Tensor
// Include setting attribute for tensor
// and allocate cpu memory buffer
void Allocate(const std::vector<int64_t>& new_shape,
const FDDataType& data_type,
const std::string& tensor_name = "");
// Total size of tensor memory buffer in bytes
int Nbytes() const;
// Total number of elements in this tensor
int Numel() const;
// Debug function
// Use this function to print shape, dtype, mean, max, min
// prefix will also be printed as tag
void PrintInfo(const std::string& prefix = "TensorInfo: ");
FDTensor() {}
explicit FDTensor(const std::string& tensor_name);
};
} // namespace fastdeploy

View File

@@ -1,123 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_type.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
int FDDataTypeSize(const FDDataType& data_type) {
FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
if (data_type == FDDataType::BOOL) {
return sizeof(bool);
} else if (data_type == FDDataType::INT16) {
return sizeof(int16_t);
} else if (data_type == FDDataType::INT32) {
return sizeof(int32_t);
} else if (data_type == FDDataType::INT64) {
return sizeof(int64_t);
} else if (data_type == FDDataType::FP32) {
return sizeof(float);
} else if (data_type == FDDataType::FP64) {
return sizeof(double);
} else if (data_type == FDDataType::UINT8) {
return sizeof(uint8_t);
} else {
FDASSERT(false, "Unexpected data type: " + Str(data_type));
}
return -1;
}
std::string Str(const Device& d) {
std::string out;
switch (d) {
case Device::DEFAULT:
out = "Device::DEFAULT";
break;
case Device::CPU:
out = "Device::CPU";
break;
case Device::GPU:
out = "Device::GPU";
break;
default:
out = "Device::UNKOWN";
}
return out;
}
std::string Str(const FDDataType& fdt) {
std::string out;
switch (fdt) {
case FDDataType::BOOL:
out = "FDDataType::BOOL";
break;
case FDDataType::INT16:
out = "FDDataType::INT16";
break;
case FDDataType::INT32:
out = "FDDataType::INT32";
break;
case FDDataType::INT64:
out = "FDDataType::INT64";
break;
case FDDataType::FP32:
out = "FDDataType::FP32";
break;
case FDDataType::FP64:
out = "FDDataType::FP64";
break;
case FDDataType::FP16:
out = "FDDataType::FP16";
break;
case FDDataType::UINT8:
out = "FDDataType::UINT8";
break;
case FDDataType::INT8:
out = "FDDataType::INT8";
break;
default:
out = "FDDataType::UNKNOWN";
}
return out;
}
template <typename PlainType>
const FDDataType TypeToDataType<PlainType>::dtype = UNKNOWN1;
template <>
const FDDataType TypeToDataType<bool>::dtype = BOOL;
template <>
const FDDataType TypeToDataType<int16_t>::dtype = INT16;
template <>
const FDDataType TypeToDataType<int32_t>::dtype = INT32;
template <>
const FDDataType TypeToDataType<int64_t>::dtype = INT64;
template <>
const FDDataType TypeToDataType<float>::dtype = FP32;
template <>
const FDDataType TypeToDataType<double>::dtype = FP64;
template <>
const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
template <>
const FDDataType TypeToDataType<int8_t>::dtype = INT8;
} // namespace fastdeploy

View File

@@ -1,63 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ostream>
#include <sstream>
#include <string>
#include "fastdeploy/core/config.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
enum FASTDEPLOY_DECL Device { DEFAULT, CPU, GPU };
FASTDEPLOY_DECL std::string Str(const Device& d);
enum FASTDEPLOY_DECL FDDataType {
BOOL,
INT16,
INT32,
INT64,
FP16,
FP32,
FP64,
UNKNOWN1,
UNKNOWN2,
UNKNOWN3,
UNKNOWN4,
UNKNOWN5,
UNKNOWN6,
UNKNOWN7,
UNKNOWN8,
UNKNOWN9,
UNKNOWN10,
UNKNOWN11,
UNKNOWN12,
UNKNOWN13,
UINT8,
INT8
};
FASTDEPLOY_DECL std::string Str(const FDDataType& fdt);
FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype);
template <typename PlainType>
struct FASTDEPLOY_DECL TypeToDataType {
static const FDDataType dtype;
};
} // namespace fastdeploy

View File

@@ -1,145 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/utils/unique_ptr.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
bool FastDeployModel::InitRuntime() {
FDASSERT(
CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
"ModelFormatCheck Failed.");
if (runtime_initialized_) {
FDERROR << "The model is already initialized, cannot be initliazed again."
<< std::endl;
return false;
}
if (runtime_option.backend != Backend::UNKNOWN) {
if (runtime_option.backend == Backend::ORT) {
if (!IsBackendAvailable(Backend::ORT)) {
FDERROR
<< "Backend::ORT is not complied with current FastDeploy library."
<< std::endl;
return false;
}
} else if (runtime_option.backend == Backend::TRT) {
if (!IsBackendAvailable(Backend::TRT)) {
FDERROR
<< "Backend::TRT is not complied with current FastDeploy library."
<< std::endl;
return false;
}
} else if (runtime_option.backend == Backend::PDINFER) {
if (!IsBackendAvailable(Backend::PDINFER)) {
FDERROR << "Backend::PDINFER is not compiled with current FastDeploy "
"library."
<< std::endl;
return false;
}
} else {
FDERROR
<< "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now."
<< std::endl;
return false;
}
runtime_ = utils::make_unique<Runtime>();
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
if (runtime_option.device == Device::CPU) {
return CreateCpuBackend();
} else if (runtime_option.device == Device::GPU) {
#ifdef WITH_GPU
return CreateGpuBackend();
#else
FDERROR << "The compiled FastDeploy library doesn't support GPU now."
<< std::endl;
return false;
#endif
}
FDERROR << "Only support CPU/GPU now." << std::endl;
return false;
}
bool FastDeployModel::CreateCpuBackend() {
if (valid_cpu_backends.size() == 0) {
FDERROR << "There's no valid cpu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
if (!IsBackendAvailable(valid_cpu_backends[i])) {
continue;
}
runtime_option.backend = valid_cpu_backends[i];
runtime_ = std::unique_ptr<Runtime>(new Runtime());
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
return false;
}
bool FastDeployModel::CreateGpuBackend() {
if (valid_gpu_backends.size() == 0) {
FDERROR << "There's no valid gpu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
if (!IsBackendAvailable(valid_gpu_backends[i])) {
continue;
}
runtime_option.backend = valid_gpu_backends[i];
runtime_ = std::unique_ptr<Runtime>(new Runtime());
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
FDERROR << "Cannot find an available gpu backend to load this model."
<< std::endl;
return false;
}
bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors) {
return runtime_->Infer(input_tensors, output_tensors);
}
void FastDeployModel::EnableDebug() {
#ifdef FASTDEPLOY_DEBUG
debug_ = true;
#else
FDWARNING << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
"cannot enable debug mode."
<< std::endl;
debug_ = false;
#endif
}
bool FastDeployModel::DebugEnabled() { return debug_; }
} // namespace fastdeploy

View File

@@ -1,67 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/fastdeploy_runtime.h"
namespace fastdeploy {
class FASTDEPLOY_DECL FastDeployModel {
public:
virtual std::string ModelName() const { return "NameUndefined"; }
virtual bool InitRuntime();
virtual bool CreateCpuBackend();
virtual bool CreateGpuBackend();
virtual bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors);
RuntimeOption runtime_option;
std::vector<Backend> valid_cpu_backends = {Backend::ORT};
std::vector<Backend> valid_gpu_backends = {Backend::ORT};
std::vector<Backend> valid_external_backends;
bool initialized = false;
virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
virtual TensorInfo InputInfoOfRuntime(int index) {
return runtime_->GetInputInfo(index);
}
virtual TensorInfo OutputInfoOfRuntime(int index) {
return runtime_->GetOutputInfo(index);
}
virtual bool Initialized() const {
return runtime_initialized_ && initialized;
}
virtual void EnableDebug();
virtual bool DebugEnabled();
private:
std::unique_ptr<Runtime> runtime_;
bool runtime_initialized_ = false;
bool debug_ = false;
};
#define TIMERECORD_START(id) \
TimeCounter tc_##id; \
tc_##id.Start();
#define TIMERECORD_END(id, prefix) \
if (DebugEnabled()) { \
tc_##id.End(); \
FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \
<< prefix << " duration = " << tc_##id.Duration() << "s." \
<< std::endl; \
}
} // namespace fastdeploy

View File

@@ -1,365 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/fastdeploy_runtime.h"
#include "fastdeploy/utils/unique_ptr.h"
#include "fastdeploy/utils/utils.h"
#ifdef ENABLE_ORT_BACKEND
#include "fastdeploy/backends/ort/ort_backend.h"
#endif
#ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h"
#endif
#ifdef ENABLE_PADDLE_BACKEND
#include "fastdeploy/backends/paddle/paddle_backend.h"
#endif
namespace fastdeploy {
std::vector<Backend> GetAvailableBackends() {
std::vector<Backend> backends;
#ifdef ENABLE_ORT_BACKEND
backends.push_back(Backend::ORT);
#endif
#ifdef ENABLE_TRT_BACKEND
backends.push_back(Backend::TRT);
#endif
#ifdef ENABLE_PADDLE_BACKEND
backends.push_back(Backend::PDINFER);
#endif
return backends;
}
bool IsBackendAvailable(const Backend& backend) {
std::vector<Backend> backends = GetAvailableBackends();
for (size_t i = 0; i < backends.size(); ++i) {
if (backend == backends[i]) {
return true;
}
}
return false;
}
std::string Str(const Backend& b) {
if (b == Backend::ORT) {
return "Backend::ORT";
} else if (b == Backend::TRT) {
return "Backend::TRT";
} else if (b == Backend::PDINFER) {
return "Backend::PDINFER";
}
return "UNKNOWN-Backend";
}
std::string Str(const Frontend& f) {
if (f == Frontend::PADDLE) {
return "Frontend::PADDLE";
} else if (f == Frontend::ONNX) {
return "Frontend::ONNX";
}
return "UNKNOWN-Frontend";
}
bool CheckModelFormat(const std::string& model_file,
const Frontend& model_format) {
if (model_format == Frontend::PADDLE) {
if (model_file.size() < 8 ||
model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
FDERROR << "With model format of Frontend::PADDLE, the model file "
"should ends with `.pdmodel`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == Frontend::ONNX) {
if (model_file.size() < 5 ||
model_file.substr(model_file.size() - 5, 5) != ".onnx") {
FDERROR << "With model format of Frontend::ONNX, the model file "
"should ends with `.onnx`, but now it's "
<< model_file << std::endl;
return false;
}
} else {
FDERROR << "Only support model format with frontend Frontend::PADDLE / "
"Frontend::ONNX."
<< std::endl;
return false;
}
return true;
}
Frontend GuessModelFormat(const std::string& model_file) {
if (model_file.size() > 8 &&
model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
FDLogger() << "Model Format: PaddlePaddle." << std::endl;
return Frontend::PADDLE;
} else if (model_file.size() > 5 &&
model_file.substr(model_file.size() - 5, 5) == ".onnx") {
FDLogger() << "Model Format: ONNX." << std::endl;
return Frontend::ONNX;
}
FDERROR << "Cannot guess which model format you are using, please set "
"RuntimeOption::model_format manually."
<< std::endl;
return Frontend::PADDLE;
}
void RuntimeOption::SetModelPath(const std::string& model_path,
const std::string& params_path,
const std::string& _model_format) {
if (_model_format == "paddle") {
model_file = model_path;
params_file = params_path;
model_format = Frontend::PADDLE;
} else if (_model_format == "onnx") {
model_file = model_path;
model_format = Frontend::ONNX;
} else {
FDASSERT(false, "The model format only can be 'paddle' or 'onnx'.");
}
}
void RuntimeOption::UseGpu(int gpu_id) {
#ifdef WITH_GPU
device = Device::GPU;
device_id = gpu_id;
#else
FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
<< std::endl;
device = Device::CPU;
#endif
}
void RuntimeOption::UseCpu() { device = Device::CPU; }
void RuntimeOption::SetCpuThreadNum(int thread_num) {
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
cpu_thread_num = thread_num;
}
// use paddle inference backend
void RuntimeOption::UsePaddleBackend() {
#ifdef ENABLE_PADDLE_BACKEND
backend = Backend::PDINFER;
#else
FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
#endif
}
// use onnxruntime backend
void RuntimeOption::UseOrtBackend() {
#ifdef ENABLE_ORT_BACKEND
backend = Backend::ORT;
#else
FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
#endif
}
void RuntimeOption::UseTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
backend = Backend::TRT;
#else
FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
#endif
}
void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; }
void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; }
void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
FDASSERT(size > 0, "Parameter size must greater than 0.");
pd_mkldnn_cache_size = size;
}
void RuntimeOption::SetTrtInputShape(const std::string& input_name,
const std::vector<int32_t>& min_shape,
const std::vector<int32_t>& opt_shape,
const std::vector<int32_t>& max_shape) {
trt_min_shape[input_name].clear();
trt_max_shape[input_name].clear();
trt_opt_shape[input_name].clear();
trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
if (opt_shape.size() == 0) {
trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
} else {
trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
}
if (max_shape.size() == 0) {
trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
} else {
trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
}
}
void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
trt_serialize_file = cache_file_path;
}
bool Runtime::Init(const RuntimeOption& _option) {
option = _option;
if (option.model_format == Frontend::AUTOREC) {
option.model_format = GuessModelFormat(_option.model_file);
}
if (option.backend == Backend::UNKNOWN) {
if (IsBackendAvailable(Backend::ORT)) {
option.backend = Backend::ORT;
} else if (IsBackendAvailable(Backend::PDINFER)) {
option.backend = Backend::PDINFER;
} else {
FDERROR << "Please define backend in RuntimeOption, current it's "
"Backend::UNKNOWN."
<< std::endl;
return false;
}
}
if (option.backend == Backend::ORT) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::TRT only supports Device::CPU/Device::GPU.");
CreateOrtBackend();
} else if (option.backend == Backend::TRT) {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
CreateTrtBackend();
} else if (option.backend == Backend::PDINFER) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::TRT only supports Device::CPU/Device::GPU.");
FDASSERT(
option.model_format == Frontend::PADDLE,
"Backend::PDINFER only supports model format of Frontend::PADDLE.");
CreatePaddleBackend();
} else {
FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
<< std::endl;
return false;
}
return true;
}
TensorInfo Runtime::GetInputInfo(int index) {
return backend_->GetInputInfo(index);
}
TensorInfo Runtime::GetOutputInfo(int index) {
return backend_->GetOutputInfo(index);
}
bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors) {
return backend_->Infer(input_tensors, output_tensors);
}
void Runtime::CreatePaddleBackend() {
#ifdef ENABLE_PADDLE_BACKEND
auto pd_option = PaddleBackendOption();
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
pd_option.gpu_id = option.device_id;
pd_option.cpu_thread_num = option.cpu_thread_num;
FDASSERT(option.model_format == Frontend::PADDLE,
"PaddleBackend only support model format of Frontend::PADDLE.");
backend_ = utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
pd_option),
"Load model from Paddle failed while initliazing PaddleBackend.");
#else
FDASSERT(false,
"PaddleBackend is not available, please compiled with "
"ENABLE_PADDLE_BACKEND=ON.");
#endif
}
void Runtime::CreateOrtBackend() {
#ifdef ENABLE_ORT_BACKEND
auto ort_option = OrtBackendOption();
ort_option.graph_optimization_level = option.ort_graph_opt_level;
ort_option.intra_op_num_threads = option.cpu_thread_num;
ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
ort_option.execution_mode = option.ort_execution_mode;
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
ort_option.gpu_id = option.device_id;
// TODO(jiangjiajun): inside usage, maybe remove this later
ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
ort_option.custom_op_info_ = option.custom_op_info_;
FDASSERT(option.model_format == Frontend::PADDLE ||
option.model_format == Frontend::ONNX,
"OrtBackend only support model format of Frontend::PADDLE / "
"Frontend::ONNX.");
backend_ = utils::make_unique<OrtBackend>();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
if (option.model_format == Frontend::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
"Load model from ONNX failed while initliazing OrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, ort_option),
"Load model from Paddle failed while initliazing OrtBackend.");
}
#else
FDASSERT(false,
"OrtBackend is not available, please compiled with "
"ENABLE_ORT_BACKEND=ON.");
#endif
}
void Runtime::CreateTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
auto trt_option = TrtBackendOption();
trt_option.gpu_id = option.device_id;
trt_option.enable_fp16 = option.trt_enable_fp16;
trt_option.enable_int8 = option.trt_enable_int8;
trt_option.max_batch_size = option.trt_max_batch_size;
trt_option.max_workspace_size = option.trt_max_workspace_size;
trt_option.max_shape = option.trt_max_shape;
trt_option.min_shape = option.trt_min_shape;
trt_option.opt_shape = option.trt_opt_shape;
trt_option.serialize_file = option.trt_serialize_file;
// TODO(jiangjiajun): inside usage, maybe remove this later
trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
trt_option.custom_op_info_ = option.custom_op_info_;
FDASSERT(option.model_format == Frontend::PADDLE ||
option.model_format == Frontend::ONNX,
"TrtBackend only support model format of Frontend::PADDLE / "
"Frontend::ONNX.");
backend_ = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
if (option.model_format == Frontend::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
"Load model from ONNX failed while initliazing TrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, trt_option),
"Load model from Paddle failed while initliazing TrtBackend.");
}
#else
FDASSERT(false,
"TrtBackend is not available, please compiled with "
"ENABLE_TRT_BACKEND=ON.");
#endif
}
} // namespace fastdeploy

View File

@@ -1,159 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/perf.h"
namespace fastdeploy {
enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
// AUTOREC will according to the name of model file
// to decide which Frontend is
enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
FASTDEPLOY_DECL std::string Str(const Backend& b);
FASTDEPLOY_DECL std::string Str(const Frontend& f);
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
bool CheckModelFormat(const std::string& model_file,
const Frontend& model_format);
Frontend GuessModelFormat(const std::string& model_file);
struct FASTDEPLOY_DECL RuntimeOption {
// set path of model file and params file
// for onnx, only need to define model_file, but also need to
// define model_format
// model_format support 'paddle' / 'onnx' now.
void SetModelPath(const std::string& model_path,
const std::string& params_path = "",
const std::string& _model_format = "paddle");
// set model inference in GPU
void UseCpu();
// set model inference in CPU
void UseGpu(int gpu_id = 0);
// set number of thread while inference in CPU
void SetCpuThreadNum(int thread_num);
// use paddle inference backend
void UsePaddleBackend();
// use onnxruntime backend
void UseOrtBackend();
// use tensorrt backend
void UseTrtBackend();
// enable mkldnn while use paddle inference in CPU
void EnablePaddleMKLDNN();
// disable mkldnn while use paddle inference in CPU
void DisablePaddleMKLDNN();
// set size of cached shape while enable mkldnn with paddle inference backend
void SetPaddleMKLDNNCacheSize(int size);
// set tensorrt shape while the inputs of model contain dynamic shape
// min_shape: the minimum shape
// opt_shape: the most common shape while inference, default be empty
// max_shape: the maximum shape, default be empty
// if opt_shape, max_shape are empty, they will keep same with the min_shape
// which means the shape will be fixed as min_shape while inference
void SetTrtInputShape(
const std::string& input_name, const std::vector<int32_t>& min_shape,
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
const std::vector<int32_t>& max_shape = std::vector<int32_t>());
// enable half precision while use tensorrt backend
void EnableTrtFP16();
// disable half precision, change to full precision(float32)
void DisableTrtFP16();
void SetTrtCacheFile(const std::string& cache_file_path);
Backend backend = Backend::UNKNOWN;
// for cpu inference and preprocess
int cpu_thread_num = 8;
int device_id = 0;
Device device = Device::CPU;
// ======Only for ORT Backend========
// -1 means use default value by ort
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
// ORT_ENABLE_ALL
int ort_graph_opt_level = -1;
int ort_inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
int ort_execution_mode = -1;
// ======Only for Paddle Backend=====
bool pd_enable_mkldnn = true;
int pd_mkldnn_cache_size = 1;
// ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
std::string trt_serialize_file = "";
bool trt_enable_fp16 = false;
bool trt_enable_int8 = false;
size_t trt_max_batch_size = 32;
size_t trt_max_workspace_size = 1 << 30;
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
Frontend model_format = Frontend::AUTOREC; // format of input model
// inside parameters, only for inside usage
// remove multiclass_nms in Paddle2ONNX
bool remove_multiclass_nms_ = false;
// for Paddle2ONNX to export custom operators
std::map<std::string, std::string> custom_op_info_;
};
struct FASTDEPLOY_DECL Runtime {
public:
// explicit Runtime(const RuntimeOption& _option = RuntimeOption());
bool Init(const RuntimeOption& _option);
bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors);
void CreateOrtBackend();
void CreatePaddleBackend();
void CreateTrtBackend();
int NumInputs() { return backend_->NumInputs(); }
int NumOutputs() { return backend_->NumOutputs(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
RuntimeOption option;
private:
std::unique_ptr<BaseBackend> backend_;
};
} // namespace fastdeploy

View File

@@ -1,32 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/function/eigen.h"
namespace fastdeploy {
std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::instance_ = nullptr;
std::shared_ptr<EigenDeviceWrapper> EigenDeviceWrapper::GetInstance() {
if (instance_ == nullptr) {
instance_ = std::make_shared<EigenDeviceWrapper>();
}
return instance_;
}
const Eigen::DefaultDevice* EigenDeviceWrapper::GetDevice() const {
return &device_;
}
} // namespace fastdeploy

View File

@@ -1,109 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <memory>
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace fastdeploy {
// EigenDim converts shape into Eigen::DSizes.
template <int D>
struct EigenDim {
using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
static Type From(const std::vector<int64_t>& dims) {
Type ret;
for (int64_t d = 0; d < dims.size(); d++) {
ret[d] = dims[d];
}
return ret;
}
};
// Interpret FDTensor as EigenTensor and EigenConstTensor.
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenTensor {
using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
using ConstType =
Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
static Type From(FDTensor& tensor,
const std::vector<int64_t>& dims) { // NOLINT
return Type(reinterpret_cast<T*>(tensor.Data()), EigenDim<D>::From(dims));
}
static Type From(FDTensor& tensor) { // NOLINT
return From(tensor, tensor.shape);
} // NOLINT
static ConstType From(const FDTensor& tensor,
const std::vector<int64_t>& dims) {
return ConstType(reinterpret_cast<const T*>(tensor.Data()),
EigenDim<D>::From(dims));
}
static ConstType From(const FDTensor& tensor) {
return From(tensor, tensor.shape);
}
};
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenScalar {
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
using Type = Eigen::TensorMap<
Eigen::TensorFixedSize<T, Eigen::Sizes<>, MajorType, IndexType>>;
using ConstType = Eigen::TensorMap<
Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
static Type From(FDTensor& tensor) {
return Type(reinterpret_cast<T*>(tensor.Data()));
} // NOLINT
static ConstType From(const FDTensor& tensor) {
return ConstType(reinterpret_cast<const T*>(tensor.Data()));
}
};
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten reshapes a Tensor into an EigenVector.
static typename EigenVector::Type Flatten(FDTensor& tensor) { // NOLINT
return EigenVector::From(tensor, {tensor.Numel()});
}
static typename EigenVector::ConstType Flatten(
const FDTensor& tensor) { // NOLINT
return EigenVector::From(tensor, {tensor.Numel()});
}
};
class EigenDeviceWrapper {
public:
static std::shared_ptr<EigenDeviceWrapper> GetInstance();
const Eigen::DefaultDevice* GetDevice() const;
private:
Eigen::DefaultDevice device_;
static std::shared_ptr<EigenDeviceWrapper> instance_;
};
} // namespace fastdeploy

View File

@@ -1,246 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <set>
#include "fastdeploy/function/eigen.h"
#include "fastdeploy/function/reduce.h"
#include "fastdeploy/function/reduce_functor.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
#ifdef ENABLE_FDTENSOR_FUNC
template <typename T, size_t D, size_t R_D, typename Functor>
void ReduceFunctor(const FDTensor& input, FDTensor* output,
const std::vector<int64_t>& dims, bool keep_dim) {
auto x = EigenTensor<T, D>::From(input);
auto x_rank = static_cast<int>(x.dimensions().size());
auto reduce_dim = Eigen::array<int, R_D>();
std::vector<int64_t> dims_ref = dims;
auto out_dims = input.shape;
for (size_t i = 0; i < dims_ref.size(); ++i) {
if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i];
reduce_dim[i] = dims_ref[i];
out_dims[dims_ref[i]] = 1;
}
auto origin_output_dims = out_dims;
output->Allocate(origin_output_dims, TypeToDataType<T>::dtype);
// construct the squeezed output tensor
if (x_rank > 1) {
const int kDelFlag = -2;
for (size_t i = 0; i < dims_ref.size(); ++i) {
out_dims[dims_ref[i]] = kDelFlag;
}
out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag),
out_dims.end());
}
auto& place = *EigenDeviceWrapper::GetInstance()->GetDevice();
Functor functor;
if (D == 1) {
auto out = EigenScalar<T>::From(*output);
functor(place, &x, &out, reduce_dim);
} else {
auto out = EigenTensor<T, (D - R_D)>::From(*output, out_dims);
functor(place, &x, &out, reduce_dim);
if (!keep_dim) {
output->shape = std::move(out_dims);
}
}
}
#define HANDLE_REDUCE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceFunctor<OutT, NDIM, RDIM, Functor>(input, output, dims, keep_dim); \
}
inline void GetShuffledDim(const std::vector<int64_t>& src_dims,
std::vector<int64_t>* dst_dims,
const std::vector<int64_t>& reduced_dims,
std::vector<int>* perm_axis) {
// check if it's a reduced dim
std::vector<bool> src_dims_check(src_dims.size(), false);
size_t src_size = src_dims.size();
size_t reduce_size = reduced_dims.size();
std::vector<int64_t> regular_reduced_dims = reduced_dims;
for (size_t i = 0; i < regular_reduced_dims.size(); i++) {
if (regular_reduced_dims[i] < 0) {
regular_reduced_dims[i] = src_size + regular_reduced_dims[i];
}
}
for (size_t i = 0; i < reduce_size; ++i) {
dst_dims->at(src_size - reduce_size + i) =
src_dims[regular_reduced_dims[i]];
(*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i];
src_dims_check[regular_reduced_dims[i]] = true;
}
size_t offset = 0;
for (size_t i = 0; i < src_dims_check.size(); ++i) {
bool is_reduced = src_dims_check[i];
if (!is_reduced) {
(*perm_axis)[offset] = i;
dst_dims->at(offset++) = src_dims[i];
}
}
}
template <typename OutT>
void GetShuffledInput(const FDTensor& input, FDTensor* shuffled_input,
const std::vector<int64_t>& dims) {
auto shuffled_dims = input.shape;
std::vector<int> perm_axis(input.shape.size());
GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis);
shuffled_input->Allocate(shuffled_dims, input.dtype);
// TODO(zhoushunjie) : Need to implement trans function
// phi::funcs::TransposeNormal<DeviceContext, OutT> trans;
// trans(dev_ctx, input, shuffled_input, perm_axis);
}
//////////////// HandleLargeDim
template <typename OutT, typename Functor>
void HandleLargeDim(const FDTensor& input, FDTensor* output,
const std::vector<int64_t>& dims, bool keep_dim) {
// shuffle the reduced dim to the end
FDTensor shuffled_input;
GetShuffledInput<OutT>(input, &shuffled_input, dims);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
const int64_t unreduced = output->Numel();
const int64_t reduced = shuffled_input.Numel() / unreduced;
shuffled_input.Allocate({unreduced, reduced}, TypeToDataType<OutT>::dtype);
auto output_dim = output->shape;
output->Allocate({unreduced}, TypeToDataType<OutT>::dtype);
ReduceFunctor<OutT, 2, 1, Functor>(shuffled_input, output, {1}, keep_dim);
output->shape = output_dim;
}
////////////// ReduceKernel
template <typename OutT, typename Functor>
void ReduceKernelImpl(const FDTensor& input, FDTensor* output,
const std::vector<int64_t>& dims, bool keep_dim,
bool reduce_all) {
output->Allocate({1}, TypeToDataType<OutT>::dtype);
const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice();
if (reduce_all) {
// Flatten and reduce 1-D tensor
auto x = EigenVector<OutT>::Flatten(input);
auto out = EigenScalar<OutT>::From(*output);
auto reduce_dim = Eigen::array<int, 1>({{0}});
Functor functor;
functor(dev, &x, &out, reduce_dim);
} else {
int ndim = input.shape.size();
int rdim = dims.size();
if (ndim > 3) {
HandleLargeDim<OutT, Functor>(input, output, dims, keep_dim);
} else {
HANDLE_REDUCE_DIM(4, 3);
HANDLE_REDUCE_DIM(4, 2);
HANDLE_REDUCE_DIM(4, 1);
HANDLE_REDUCE_DIM(3, 2);
HANDLE_REDUCE_DIM(3, 1);
HANDLE_REDUCE_DIM(2, 1);
HANDLE_REDUCE_DIM(1, 1);
}
}
}
template <typename OutT, typename Functor>
void BoolReduceKernel(const FDTensor& input, FDTensor* output,
const std::vector<int64_t>& dims, bool keep_dim,
bool reduce_all) {
// The dims has full dim, set the reduce_all is True
const auto& input_dim_size = input.shape.size();
std::set<int> dims_set(dims.begin(), dims.end());
bool full_dim = true;
for (auto i = 0; i < input_dim_size; i++) {
if (dims_set.find(i) == dims_set.end()) {
full_dim = false;
break;
}
}
reduce_all = (reduce_all || full_dim);
ReduceKernelImpl<bool, Functor>(input, output, dims, keep_dim, reduce_all);
}
template <typename Functor>
void Reduce(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
// If the dims has full dim, set the reduce_all is True
const int& input_dim_size = x.shape.size();
std::set<int> dims_set(dims.begin(), dims.end());
bool full_dim = true;
for (int i = 0; i < input_dim_size; ++i) {
if (dims_set.find(i) == dims_set.end() &&
dims_set.find(i - input_dim_size) == dims_set.end()) {
full_dim = false;
break;
}
}
reduce_all = (reduce_all || full_dim);
FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] {
ReduceKernelImpl<data_t, Functor>(x, out, dims, keep_dim,
reduce_all);
}));
}
void Max(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
Reduce<MaxFunctor>(x, out, dims, keep_dim, reduce_all);
}
void Min(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
Reduce<MinFunctor>(x, out, dims, keep_dim, reduce_all);
}
void Sum(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
Reduce<SumFunctor>(x, out, dims, keep_dim, reduce_all);
}
void All(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
BoolReduceKernel<bool, AllFunctor>(x, out, dims, keep_dim, reduce_all);
}
void Any(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
BoolReduceKernel<bool, AnyFunctor>(x, out, dims, keep_dim, reduce_all);
}
void Mean(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
Reduce<MeanFunctor>(x, out, dims, keep_dim, reduce_all);
}
void Prod(const FDTensor& x, FDTensor* out, const std::vector<int64_t>& dims,
bool keep_dim, bool reduce_all) {
Reduce<ProdFunctor>(x, out, dims, keep_dim, reduce_all);
}
#endif
} // namespace fastdeploy

View File

@@ -1,100 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/core/fd_tensor.h"
namespace fastdeploy {
#ifdef ENABLE_FDTENSOR_FUNC
/** Excute the maximum operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Max(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the minimum operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Min(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the sum operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Sum(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the all operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void All(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the any operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Any(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the mean operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Mean(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
/** Excute the product operation for input FDTensor along given dims.
@param x The input tensor.
@param out The output tensor which stores the result.
@param dims The vector of axis which will be reduced.
@param keep_dim Whether to keep the reduced dims, default false.
@param reduce_all Whether to reduce all dims, default false.
*/
FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out,
const std::vector<int64_t>& dims,
bool keep_dim = false, bool reduce_all = false);
#endif
} // namespace fastdeploy

View File

@@ -1,76 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/function/eigen.h"
namespace fastdeploy {
//////// Max Functor ///////
struct MaxFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->maximum(dim);
}
};
//////// Min Functor ///////
struct MinFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->minimum(dim);
}
};
//////// Sum Functor ///////
struct SumFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->sum(dim);
}
};
//////// All Functor ///////
struct AllFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->all(dim);
}
};
//////// Any Functor ///////
struct AnyFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->any(dim);
}
};
//////// Mean Functor ///////
struct MeanFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->mean(dim);
}
};
//////// Prod Functor ///////
struct ProdFunctor {
template <typename X, typename Y, typename Dim>
void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) {
y->device(dev) = x->prod(dim);
}
};
} // namespace fastdeploy

View File

@@ -1,35 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
#include "fastdeploy/fastdeploy_model.h"
namespace fastdeploy {
void BindFDModel(pybind11::module& m) {
pybind11::class_<FastDeployModel>(m, "FastDeployModel")
.def(pybind11::init<>(), "Default Constructor")
.def("model_name", &FastDeployModel::ModelName)
.def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime)
.def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime)
.def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime)
.def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime)
.def("initialized", &FastDeployModel::Initialized)
.def_readwrite("runtime_option", &FastDeployModel::runtime_option)
.def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends)
.def_readwrite("valid_gpu_backends",
&FastDeployModel::valid_gpu_backends);
}
} // namespace fastdeploy

View File

@@ -1,134 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindRuntime(pybind11::module& m) {
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
.def(pybind11::init())
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
.def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN)
.def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN)
.def("set_paddle_mkldnn_cache_size",
&RuntimeOption::SetPaddleMKLDNNCacheSize)
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
.def_readwrite("model_file", &RuntimeOption::model_file)
.def_readwrite("params_file", &RuntimeOption::params_file)
.def_readwrite("model_format", &RuntimeOption::model_format)
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)
.def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
.def_readwrite("ort_inter_op_num_threads",
&RuntimeOption::ort_inter_op_num_threads)
.def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size);
pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name)
.def_readwrite("shape", &TensorInfo::shape)
.def_readwrite("dtype", &TensorInfo::dtype);
pybind11::class_<Runtime>(m, "Runtime")
.def(pybind11::init())
.def("init", &Runtime::Init)
.def("infer",
[](Runtime& self, std::map<std::string, pybind11::array>& data) {
std::vector<FDTensor> inputs(data.size());
int index = 0;
for (auto iter = data.begin(); iter != data.end(); ++iter) {
inputs[index].dtype =
NumpyDataTypeToFDDataType(iter->second.dtype());
inputs[index].shape.insert(
inputs[index].shape.begin(), iter->second.shape(),
iter->second.shape() + iter->second.ndim());
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
// use SetExternalData
inputs[index].data.resize(iter->second.nbytes());
memcpy(inputs[index].data.data(), iter->second.mutable_data(),
iter->second.nbytes());
inputs[index].name = iter->first;
index += 1;
}
std::vector<FDTensor> outputs(self.NumOutputs());
self.Infer(inputs, &outputs);
std::vector<pybind11::array> results;
results.reserve(outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(
pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;
})
.def("num_inputs", &Runtime::NumInputs)
.def("num_outputs", &Runtime::NumOutputs)
.def("get_input_info", &Runtime::GetInputInfo)
.def("get_output_info", &Runtime::GetOutputInfo)
.def_readonly("option", &Runtime::option);
pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
"Backend for inference.")
.value("UNKOWN", Backend::UNKNOWN)
.value("ORT", Backend::ORT)
.value("TRT", Backend::TRT)
.value("PDINFER", Backend::PDINFER);
pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
"Frontend for inference.")
.value("PADDLE", Frontend::PADDLE)
.value("ONNX", Frontend::ONNX);
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
"Device for inference.")
.value("CPU", Device::CPU)
.value("GPU", Device::GPU);
pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
"Data type of FastDeploy.")
.value("BOOL", FDDataType::BOOL)
.value("INT8", FDDataType::INT8)
.value("INT16", FDDataType::INT16)
.value("INT32", FDDataType::INT32)
.value("INT64", FDDataType::INT64)
.value("FP32", FDDataType::FP32)
.value("FP64", FDDataType::FP64)
.value("UINT8", FDDataType::UINT8);
m.def("get_available_backends", []() { return GetAvailableBackends(); });
}
} // namespace fastdeploy

View File

@@ -1,127 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindRuntime(pybind11::module&);
void BindFDModel(pybind11::module&);
void BindVision(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
pybind11::dtype dt;
if (fd_dtype == FDDataType::INT32) {
dt = pybind11::dtype::of<int32_t>();
} else if (fd_dtype == FDDataType::INT64) {
dt = pybind11::dtype::of<int64_t>();
} else if (fd_dtype == FDDataType::FP32) {
dt = pybind11::dtype::of<float>();
} else if (fd_dtype == FDDataType::FP64) {
dt = pybind11::dtype::of<double>();
} else if (fd_dtype == FDDataType::UINT8) {
dt = pybind11::dtype::of<uint8_t>();
} else {
FDASSERT(false, "The function doesn't support data type of " +
Str(fd_dtype) + ".");
}
return dt;
}
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
return FDDataType::INT32;
} else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
return FDDataType::INT64;
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
return FDDataType::FP32;
} else if (np_dtype.is(pybind11::dtype::of<double>())) {
return FDDataType::FP64;
} else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
return FDDataType::UINT8;
}
FDASSERT(false,
"NumpyDataTypeToFDDataType() only support "
"int32/int64/float32/float64 now.");
return FDDataType::FP32;
}
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
bool share_buffer) {
tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
tensor->shape.insert(tensor->shape.begin(), pyarray.shape(),
pyarray.shape() + pyarray.ndim());
if (share_buffer) {
tensor->external_data_ptr = pyarray.mutable_data();
} else {
tensor->data.resize(pyarray.nbytes());
memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
}
}
pybind11::array TensorToPyArray(const FDTensor& tensor) {
auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype);
auto out = pybind11::array(numpy_dtype, tensor.shape);
memcpy(out.mutable_data(), tensor.Data(), tensor.Numel() * FDDataTypeSize(tensor.dtype));
return out;
}
#ifdef ENABLE_VISION
int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
return CV_32S;
} else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
return CV_8U;
} else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
return CV_8U;
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
return CV_32F;
} else {
FDASSERT(
false,
"NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
"now.");
}
return CV_8U;
}
cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
FDASSERT(
pyarray.ndim() == 3,
"Require rank of array to be 3 with HWC format while converting it to "
"cv::Mat.");
int channel = *(pyarray.shape() + 2);
int height = *(pyarray.shape());
int width = *(pyarray.shape() + 1);
return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
pyarray.mutable_data());
}
#endif
PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
m.doc() =
"Make programer easier to deploy deeplearning model, save time to save "
"the world!";
BindRuntime(m);
BindFDModel(m);
#ifdef ENABLE_VISION
auto vision_module =
m.def_submodule("vision", "Vision module of FastDeploy.");
BindVision(vision_module);
#endif
}
} // namespace fastdeploy

View File

@@ -1,90 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <type_traits>
#include "fastdeploy/fastdeploy_runtime.h"
#ifdef ENABLE_VISION
#include "fastdeploy/vision.h"
#endif
namespace fastdeploy {
void BindBackend(pybind11::module&);
void BindVision(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype);
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
bool share_buffer = false);
pybind11::array TensorToPyArray(const FDTensor& tensor);
#ifdef ENABLE_VISION
cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
#endif
template <typename T>
FDDataType CTypeToFDDataType() {
if (std::is_same<T, int32_t>::value) {
return FDDataType::INT32;
} else if (std::is_same<T, int64_t>::value) {
return FDDataType::INT64;
} else if (std::is_same<T, float>::value) {
return FDDataType::FP32;
} else if (std::is_same<T, double>::value) {
return FDDataType::FP64;
}
FDASSERT(false,
"CTypeToFDDataType only support int32/int64/float32/float64 now.");
return FDDataType::FP32;
}
template <typename T>
std::vector<pybind11::array> PyBackendInfer(
T& self, const std::vector<std::string>& names,
std::vector<pybind11::array>& data) {
std::vector<FDTensor> inputs(data.size());
for (size_t i = 0; i < data.size(); ++i) {
// TODO(jiangjiajun) here is considered to use user memory directly
inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype());
inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
data[i].shape() + data[i].ndim());
inputs[i].data.resize(data[i].nbytes());
memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
inputs[i].name = names[i];
}
std::vector<FDTensor> outputs(self.NumOutputs());
self.Infer(inputs, &outputs);
std::vector<pybind11::array> results;
results.reserve(outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;
}
} // namespace fastdeploy

View File

@@ -1,19 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/core/config.h"
#ifdef ENABLE_TEXT
#include "fastdeploy/text/text_model.h"
#endif

View File

@@ -1,26 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace text {
struct FASTDEPLOY_DECL TextPreprocessOption {};
struct FASTDEPLOY_DECL TextPostprocessOption {};
struct FASTDEPLOY_DECL PredictionOption {};
} // namespace text
} // namespace fastdeploy

View File

@@ -1,18 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/text/common/result.h"
namespace fastdeploy {
namespace text {} // namespace text
} // namespace fastdeploy

View File

@@ -1,23 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace text {
struct FASTDEPLOY_DECL Result {};
} // namespace text
} // namespace fastdeploy

View File

@@ -1,31 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/text/postprocessor/postprocessor.h"
namespace fastdeploy {
namespace text {
bool Postprocessor::Decode(const std::vector<FDTensor>& model_result,
Result* decoded_result) const {
return true;
}
bool Postprocessor::DecodeBatch(const std::vector<FDTensor>& model_result,
Result* decoded_result) const {
return true;
}
} // namespace text
} // namespace fastdeploy

View File

@@ -1,34 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/text/common/result.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace text {
class Postprocessor {
public:
virtual bool Decode(const std::vector<FDTensor>& model_result,
Result* decoded_result) const;
virtual bool DecodeBatch(const std::vector<FDTensor>& model_result,
Result* decoded_result) const;
};
} // namespace text
} // namespace fastdeploy

View File

@@ -1,32 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/text/preprocessor/preprocessor.h"
namespace fastdeploy {
namespace text {
bool Preprocessor::Encode(const std::string& raw_text,
std::vector<FDTensor>* encoded_tensor) const {
return true;
}
bool Preprocessor::EncodeBatch(const std::vector<std::string>& raw_texts,
std::vector<FDTensor>* encoded_tensor) const {
return true;
}
} // namespace text
} // namespace fastdeploy

View File

@@ -1,34 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace text {
class Preprocessor {
public:
virtual bool Encode(const std::string& raw_text,
std::vector<FDTensor>* encoded_tensor) const;
virtual bool EncodeBatch(const std::vector<std::string>& raw_texts,
std::vector<FDTensor>* encoded_tensor) const;
};
} // namespace text
} // namespace fastdeploy

View File

@@ -1,79 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/text/text_model.h"
#include "fastdeploy/text/common/option.h"
#include "fastdeploy/text/common/result.h"
#include "fastdeploy/text/postprocessor/postprocessor.h"
#include "fastdeploy/text/preprocessor/preprocessor.h"
namespace fastdeploy {
namespace text {
bool TextModel::Predict(const std::string& raw_text, Result* result,
const PredictionOption& option) {
// Preprocess
std::vector<FDTensor> input_tensor;
std::vector<FDTensor> output_tensor;
if (!preprocessor_->Encode(raw_text, &input_tensor)) {
FDERROR << "Failed to preprocess input data while using model:"
<< ModelName() << "." << std::endl;
return false;
}
// Inference Runtime
if (!Infer(input_tensor, &output_tensor)) {
FDERROR << "Failed to inference while using model:" << ModelName() << "."
<< std::endl;
return false;
}
// Postprocess
if (postprocessor_->Decode(output_tensor, result)) {
FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
<< std::endl;
return false;
}
return true;
}
bool TextModel::PredictBatch(const std::vector<std::string>& raw_text_array,
Result* results, const PredictionOption& option) {
// Preprocess
std::vector<FDTensor> input_tensor;
std::vector<FDTensor> output_tensor;
if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) {
FDERROR << "Failed to preprocess input data while using model:"
<< ModelName() << "." << std::endl;
return false;
}
// Inference Runtime
if (!Infer(input_tensor, &output_tensor)) {
FDERROR << "Failed to inference while using model:" << ModelName() << "."
<< std::endl;
return false;
}
// Postprocess
if (postprocessor_->DecodeBatch(output_tensor, results)) {
FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
<< std::endl;
return false;
}
return true;
}
} // namespace text
} // namespace fastdeploy

View File

@@ -1,51 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/utils/unique_ptr.h"
namespace fastdeploy {
namespace text {
class Preprocessor;
class Postprocessor;
class Result;
class PredictionOption;
class FASTDEPLOY_DECL TextModel : public FastDeployModel {
public:
virtual std::string ModelName() const { return "TextModel"; }
virtual bool Predict(const std::string& raw_text, Result* result,
const PredictionOption& option);
virtual bool PredictBatch(const std::vector<std::string>& raw_text_array,
Result* result, const PredictionOption& option);
template <typename T, typename... Args>
void SetPreprocessor(Args&&... args) {
preprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
}
template <typename T, typename... Args>
void SetPostprocessor(Args&&... args) {
postprocessor_ = utils::make_unique<T>(std::forward<Args>(args)...);
}
private:
std::unique_ptr<Preprocessor> preprocessor_;
std::unique_ptr<Postprocessor> postprocessor_;
};
} // namespace text
} // namespace fastdeploy

View File

@@ -1,13 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

View File

@@ -1,49 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/utils/utils.h"
#include <chrono> // NOLINT
namespace fastdeploy {
class FASTDEPLOY_DECL TimeCounter {
public:
void Start() { begin_ = std::chrono::system_clock::now(); }
void End() { end_ = std::chrono::system_clock::now(); }
double Duration() {
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
return static_cast<double>(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den;
}
void PrintInfo(const std::string& prefix = "TimeCounter: ",
bool print_out = true) {
if (!print_out) {
return;
}
FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
}
private:
std::chrono::time_point<std::chrono::system_clock> begin_;
std::chrono::time_point<std::chrono::system_clock> end_;
};
} // namespace fastdeploy

View File

@@ -1,58 +0,0 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
namespace fastdeploy {
namespace utils {
// Trait to select overloads and return types for MakeUnique.
template <typename T>
struct MakeUniqueResult {
using scalar = std::unique_ptr<T>;
};
template <typename T>
struct MakeUniqueResult<T[]> {
using array = std::unique_ptr<T[]>;
};
template <typename T, size_t N>
struct MakeUniqueResult<T[N]> {
using invalid = void;
};
// MakeUnique<T>(...) is an early implementation of C++14 std::make_unique.
// It is designed to be 100% compatible with std::make_unique so that the
// eventual switchover will be a simple renaming operation.
template <typename T, typename... Args>
typename MakeUniqueResult<T>::scalar make_unique(Args &&... args) { // NOLINT
return std::unique_ptr<T>(
new T(std::forward<Args>(args)...)); // NOLINT(build/c++11)
}
// Overload for array of unknown bound.
// The allocation of arrays needs to use the array form of new,
// and cannot take element constructor arguments.
template <typename T>
typename MakeUniqueResult<T>::array make_unique(size_t n) {
return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]());
}
// Reject arrays of known bound.
template <typename T, typename... Args>
typename MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) =
delete; // NOLINT
} // namespace utils
} // namespace fastdeploy

View File

@@ -1,49 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
FDLogger::FDLogger(bool verbose, const std::string& prefix) {
verbose_ = verbose;
line_ = "";
prefix_ = prefix;
}
FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) {
if (!verbose_) {
return *this;
}
std::cout << prefix_ << " " << line_ << std::endl;
line_ = "";
return *this;
}
bool ReadBinaryFromFile(const std::string& file, std::string* contents) {
std::ifstream fin(file, std::ios::in | std::ios::binary);
if (!fin.is_open()) {
FDERROR << "Failed to open file: " << file << " to read." << std::endl;
return false;
}
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
return true;
}
} // namespace fastdeploy

View File

@@ -1,150 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdlib.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#if defined(_WIN32)
#ifdef FASTDEPLOY_LIB
#define FASTDEPLOY_DECL __declspec(dllexport)
#else
#define FASTDEPLOY_DECL __declspec(dllimport)
#endif // FASTDEPLOY_LIB
#else
#define FASTDEPLOY_DECL __attribute__((visibility("default")))
#endif // _WIN32
namespace fastdeploy {
class FASTDEPLOY_DECL FDLogger {
public:
FDLogger() {
line_ = "";
prefix_ = "[FastDeploy]";
verbose_ = true;
}
explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]");
template <typename T>
FDLogger& operator<<(const T& val) {
if (!verbose_) {
return *this;
}
std::stringstream ss;
ss << val;
line_ += ss.str();
return *this;
}
FDLogger& operator<<(std::ostream& (*os)(std::ostream&));
~FDLogger() {
if (!verbose_ && line_ != "") {
std::cout << line_ << std::endl;
}
}
private:
std::string line_;
std::string prefix_;
bool verbose_ = true;
};
FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
std::string* contents);
#ifndef __REL_FILE__
#define __REL_FILE__ __FILE__
#endif
#define FDERROR \
FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#define FDWARNING \
FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#define FDINFO \
FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \
<< ")::" << __FUNCTION__ << "\t"
#define FDASSERT(condition, message) \
if (!(condition)) { \
FDERROR << message << std::endl; \
std::abort(); \
}
///////// Basic Marco ///////////
#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \
case enum_type: { \
using HINT = type; \
__VA_ARGS__(); \
break; \
}
#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \
FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__)
#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::BOOL, bool, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
__VA_ARGS__) \
default: \
FDASSERT(false, "Invalid enum data type.") \
} \
}()
#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \
__VA_ARGS__) \
default: \
FDASSERT(false, "Invalid enum data type.") \
} \
}()
#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \
[&] { \
const auto& __dtype__ = TYPE; \
switch (__dtype__) { \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \
__VA_ARGS__) \
FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \
__VA_ARGS__) \
default: \
FDASSERT(false, "Invalid enum data type.") \
} \
}()
} // namespace fastdeploy

View File

@@ -1,41 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/core/config.h"
#ifdef ENABLE_VISION
#include "fastdeploy/vision/detection/contrib/nanodet_plus.h"
#include "fastdeploy/vision/detection/contrib/scaledyolov4.h"
#include "fastdeploy/vision/detection/contrib/yolor.h"
#include "fastdeploy/vision/detection/contrib/yolov5.h"
#include "fastdeploy/vision/detection/contrib/yolov5lite.h"
#include "fastdeploy/vision/detection/contrib/yolov6.h"
#include "fastdeploy/vision/detection/contrib/yolov7.h"
#include "fastdeploy/vision/detection/contrib/yolox.h"
#include "fastdeploy/vision/facedet/contrib/retinaface.h"
#include "fastdeploy/vision/facedet/contrib/scrfd.h"
#include "fastdeploy/vision/facedet/contrib/ultraface.h"
#include "fastdeploy/vision/facedet/contrib/yolov5face.h"
#include "fastdeploy/vision/faceid/contrib/arcface.h"
#include "fastdeploy/vision/faceid/contrib/cosface.h"
#include "fastdeploy/vision/faceid/contrib/insightface_rec.h"
#include "fastdeploy/vision/faceid/contrib/partial_fc.h"
#include "fastdeploy/vision/faceid/contrib/vpl.h"
#include "fastdeploy/vision/matting/contrib/modnet.h"
#include "fastdeploy/vision/ppcls/model.h"
#include "fastdeploy/vision/detection/ppdet/model.h"
#include "fastdeploy/vision/ppseg/model.h"
#endif
#include "fastdeploy/vision/visualize/visualize.h"

View File

@@ -1,3 +0,0 @@
# 如何添加一个模型
本文档以[yolov5](https://github.com/ultralytics/yolov5)为例,说明如何添加新的模型支持。

View File

@@ -1,61 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/base.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace vision {
ProcLib Processor::default_lib = ProcLib::DEFAULT;
bool Processor::CpuRun(Mat* mat) {
FDERROR << "Unimplemented CpuRun." << std::endl;
return false;
}
#ifdef ENABLE_OPENCV_CUDA
bool Processor::GpuRun(Mat* mat) {
FDERROR << "Unimplemented GpuRun." << std::endl;
return false;
}
#endif
bool Processor::operator()(Mat* mat, ProcLib lib) {
// if default_lib is set
// then use default_lib
ProcLib target = lib;
if (default_lib != ProcLib::DEFAULT) {
target = default_lib;
}
if (target == ProcLib::OPENCV_CUDA) {
#ifdef ENABLE_OPENCV_CUDA
bool ret = GpuRun(mat);
mat->device = Device::GPU;
return ret;
#else
FDERROR
<< "OpenCV is not compiled with CUDA, cannot process image with CUDA."
<< std::endl;
return false;
#endif
}
bool ret = CpuRun(mat);
mat->device = Device::CPU;
return ret;
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,48 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/utils/utils.h"
#include "fastdeploy/vision/common/processors/mat.h"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
namespace fastdeploy {
namespace vision {
enum ProcLib { DEFAULT, OPENCV_CPU, OPENCV_CUDA };
class Processor {
public:
// default_lib has the highest priority
// all the function in `processor` will force to use
// default_lib if this flag is set.
// DEFAULT means this flag is not set
static ProcLib default_lib;
// virtual bool ShapeInfer(const std::vector<int>& in_shape,
// std::vector<int>* out_shape) = 0;
virtual std::string Name() = 0;
virtual bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
virtual bool GpuRun(Mat* mat);
#endif
virtual bool operator()(Mat* mat,
ProcLib lib = ProcLib::OPENCV_CPU);
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,64 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/cast.h"
namespace fastdeploy {
namespace vision {
bool Cast::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
int c = im->channels();
if (dtype_ == "float") {
if (im->type() != CV_32FC(c)) {
im->convertTo(*im, CV_32FC(c));
}
} else if (dtype_ == "double") {
if (im->type() != CV_64FC(c)) {
im->convertTo(*im, CV_64FC(c));
}
} else {
FDWARNING << "Cast not support for " << dtype_
<< " now! will skip this operation." << std::endl;
}
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool Cast::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
int c = im->channels();
if (dtype_ == "float") {
if (im->type() != CV_32FC(c)) {
im->convertTo(*im, CV_32FC(c));
}
} else if (dtype_ == "double") {
if (im->type() != CV_64FC(c)) {
im->convertTo(*im, CV_64FC(c));
}
} else {
FDWARNING << "Cast not support for " << dtype_
<< " now! will skip this operation." << std::endl;
}
return true;
}
#endif
bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) {
auto c = Cast(dtype);
return c(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,37 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class Cast : public Processor {
public:
explicit Cast(const std::string& dtype = "float") : dtype_(dtype) {}
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "Cast"; }
static bool Run(Mat* mat, const std::string& dtype,
ProcLib lib = ProcLib::OPENCV_CPU);
private:
std::string dtype_;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,63 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/center_crop.h"
namespace fastdeploy {
namespace vision {
bool CenterCrop::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
mat->SetWidth(width_);
mat->SetHeight(height_);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool CenterCrop::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
int height = static_cast<int>(im->rows);
int width = static_cast<int>(im->cols);
if (height < height_ || width < width_) {
FDERROR << "[CenterCrop] Image size less than crop size" << std::endl;
return false;
}
int offset_x = static_cast<int>((width - width_) / 2);
int offset_y = static_cast<int>((height - height_) / 2);
cv::Rect crop_roi(offset_x, offset_y, width_, height_);
*im = (*im)(crop_roi);
mat->SetWidth(width_);
mat->SetHeight(height_);
return true;
}
#endif
bool CenterCrop::Run(Mat* mat, const int& width, const int& height,
ProcLib lib) {
auto c = CenterCrop(width, height);
return c(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,40 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class CenterCrop : public Processor {
public:
CenterCrop(int width, int height) : height_(height), width_(width) {}
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "CenterCrop"; }
static bool Run(Mat* mat, const int& width, const int& height,
ProcLib lib = ProcLib::OPENCV_CPU);
private:
int height_;
int width_;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,58 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/color_space_convert.h"
namespace fastdeploy {
namespace vision {
bool BGR2RGB::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool BGR2RGB::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
cv::cuda::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
return true;
}
#endif
bool RGB2BGR::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool RGB2BGR::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
cv::cuda::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
return true;
}
#endif
bool BGR2RGB::Run(Mat* mat, ProcLib lib) {
auto b = BGR2RGB();
return b(mat, lib);
}
bool RGB2BGR::Run(Mat* mat, ProcLib lib) {
auto r = RGB2BGR();
return r(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,44 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class BGR2RGB : public Processor {
public:
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
virtual std::string Name() { return "BGR2RGB"; }
static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
};
class RGB2BGR : public Processor {
public:
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "RGB2BGR"; }
static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,62 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/convert.h"
namespace fastdeploy {
namespace vision {
Convert::Convert(const std::vector<float>& alpha,
const std::vector<float>& beta) {
FDASSERT(alpha.size() == beta.size(),
"Convert: requires the size of alpha equal to the size of beta.");
FDASSERT(alpha.size() != 0,
"Convert: requires the size of alpha and beta > 0.");
alpha_.assign(alpha.begin(), alpha.end());
beta_.assign(beta.begin(), beta.end());
}
bool Convert::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
std::vector<cv::Mat> split_im;
cv::split(*im, split_im);
for (int c = 0; c < im->channels(); c++) {
split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
}
cv::merge(split_im, *im);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool Convert::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
std::vector<cv::cuda::GpuMat> split_im;
cv::cuda::split(*im, split_im);
for (int c = 0; c < im->channels(); c++) {
split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
}
cv::cuda::merge(split_im, *im);
return true;
}
#endif
bool Convert::Run(Mat* mat, const std::vector<float>& alpha,
const std::vector<float>& beta, ProcLib lib) {
auto c = Convert(alpha, beta);
return c(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,42 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class Convert : public Processor {
public:
Convert(const std::vector<float>& alpha, const std::vector<float>& beta);
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "Convert"; }
// Compute `result = mat * alpha + beta` directly by channel.
// The default behavior is the same as OpenCV's convertTo method.
static bool Run(Mat* mat, const std::vector<float>& alpha,
const std::vector<float>& beta,
ProcLib lib = ProcLib::OPENCV_CPU);
private:
std::vector<float> alpha_;
std::vector<float> beta_;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,75 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/hwc2chw.h"
namespace fastdeploy {
namespace vision {
bool HWC2CHW::CpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
<< std::endl;
return false;
}
cv::Mat* im = mat->GetCpuMat();
cv::Mat im_clone = im->clone();
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
// float* data = reinterpret_cast<float*>(im->data);
for (int i = 0; i < rc; ++i) {
// cv::extractChannel(im_clone, cv::Mat(rh, rw, im->type() % 8, data + i
// * rh * rw),
// i);
cv::extractChannel(
im_clone,
cv::Mat(rh, rw, im->type() % 8,
im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())),
i);
}
mat->layout = Layout::CHW;
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool HWC2CHW::GpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "HWC2CHW: The input data is not Layout::HWC format!"
<< std::endl;
return false;
}
cv::cuda::GpuMat* im = mat->GetGpuMat();
cv::cuda::GpuMat im_clone = im->clone();
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
int num_pixels = rh * rw;
std::vector<cv::cuda::GpuMat> channels{
cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[0])),
cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels])),
cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels * 2]))};
cv::cuda::split(im_clone, channels);
mat->layout = Layout::CHW;
return true;
}
#endif
bool HWC2CHW::Run(Mat* mat, ProcLib lib) {
auto h = HWC2CHW();
return h(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,33 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class HWC2CHW : public Processor {
public:
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "HWC2CHW"; }
static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU);
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,117 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/mat.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace vision {
#ifdef ENABLE_OPENCV_CUDA
cv::cuda::GpuMat* Mat::GetGpuMat() {
if (device == Device::CPU) {
gpu_mat.upload(cpu_mat);
}
return &gpu_mat;
}
#endif
cv::Mat* Mat::GetCpuMat() {
#ifdef ENABLE_OPENCV_CUDA
if (device == Device::GPU) {
gpu_mat.download(cpu_mat);
}
#endif
return &cpu_mat;
}
void Mat::ShareWithTensor(FDTensor* tensor) {
if (device == Device::GPU) {
#ifdef ENABLE_OPENCV_CUDA
tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
GetGpuMat()->ptr());
tensor->device = Device::GPU;
#endif
} else {
tensor->SetExternalData({Channels(), Height(), Width()}, Type(),
GetCpuMat()->ptr());
tensor->device = Device::CPU;
}
if (layout == Layout::HWC) {
tensor->shape = {Height(), Width(), Channels()};
}
}
bool Mat::CopyToTensor(FDTensor* tensor) {
cv::Mat* im = GetCpuMat();
int total_bytes = im->total() * im->elemSize();
if (total_bytes != tensor->Nbytes()) {
FDERROR << "While copy Mat to Tensor, requires the memory size be same, "
"but now size of Tensor = "
<< tensor->Nbytes() << ", size of Mat = " << total_bytes << "."
<< std::endl;
return false;
}
memcpy(tensor->MutableData(), im->ptr(), im->total() * im->elemSize());
return true;
}
void Mat::PrintInfo(const std::string& flag) {
cv::Mat* im = GetCpuMat();
cv::Scalar mean = cv::mean(*im);
std::cout << flag << ": "
<< "Channel=" << Channels() << ", height=" << Height()
<< ", width=" << Width() << ", mean=";
for (int i = 0; i < Channels(); ++i) {
std::cout << mean[i] << " ";
}
std::cout << std::endl;
}
FDDataType Mat::Type() {
int type = -1;
if (device == Device::GPU) {
#ifdef ENABLE_OPENCV_CUDA
type = gpu_mat.type();
#endif
} else {
type = cpu_mat.type();
}
if (type < 0) {
FDASSERT(false,
"While calling Mat::Type(), get negative value, which is not "
"expected!.");
}
type = type % 8;
if (type == 0) {
return FDDataType::UINT8;
} else if (type == 1) {
return FDDataType::INT8;
} else if (type == 2) {
FDASSERT(false, "While calling Mat::Type(), get UINT16 type which is not "
"supported now.");
} else if (type == 3) {
return FDDataType::INT16;
} else if (type == 4) {
return FDDataType::INT32;
} else if (type == 5) {
return FDDataType::FP32;
} else if (type == 6) {
return FDDataType::FP64;
} else {
FDASSERT(false, "While calling Mat::Type(), get type = " +
std::to_string(type) + ", which is not expected!.");
}
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,80 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/core/fd_tensor.h"
#include "opencv2/core/core.hpp"
#ifdef ENABLE_OPENCV_CUDA
#include "opencv2/core/cuda.hpp"
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#endif
namespace fastdeploy {
namespace vision {
enum Layout { HWC, CHW };
struct FASTDEPLOY_DECL Mat {
explicit Mat(cv::Mat& mat) {
cpu_mat = mat;
device = Device::CPU;
layout = Layout::HWC;
height = cpu_mat.rows;
width = cpu_mat.cols;
channels = cpu_mat.channels();
}
private:
int channels;
int height;
int width;
cv::Mat cpu_mat;
#ifdef ENABLE_OPENCV_CUDA
cv::cuda::GpuMat gpu_mat;
#endif
public:
#ifdef ENABLE_OPENCV_CUDA
cv::cuda::GpuMat* GetGpuMat();
#endif
cv::Mat* GetCpuMat();
FDDataType Type();
int Channels() const { return channels; }
int Width() const { return width; }
int Height() const { return height; }
void SetChannels(int s) { channels = s; }
void SetWidth(int w) { width = w; }
void SetHeight(int h) { height = h; }
// Transfer the vision::Mat to FDTensor
void ShareWithTensor(FDTensor* tensor);
// Only support copy to cpu tensor now
bool CopyToTensor(FDTensor* tensor);
// debug functions
// TODO(jiangjiajun) Develop a right process pipeline with c++ is not a easy
// things
// Will add more debug function here to help debug processed image
// This function will print shape / mean of each channels of the Mat
void PrintInfo(const std::string& flag);
Layout layout = Layout::HWC;
Device device = Device::CPU;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,88 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/normalize.h"
namespace fastdeploy {
namespace vision {
Normalize::Normalize(const std::vector<float>& mean,
const std::vector<float>& std, bool is_scale,
const std::vector<float>& min,
const std::vector<float>& max) {
FDASSERT(mean.size() == std.size(),
"Normalize: requires the size of mean equal to the size of std.");
std::vector<double> mean_(mean.begin(), mean.end());
std::vector<double> std_(std.begin(), std.end());
std::vector<double> min_(mean.size(), 0.0);
std::vector<double> max_(mean.size(), 255.0);
if (min.size() != 0) {
FDASSERT(
min.size() == mean.size(),
"Normalize: while min is defined, requires the size of min equal to "
"the size of mean.");
min_.assign(min.begin(), min.end());
}
if (max.size() != 0) {
FDASSERT(
min.size() == mean.size(),
"Normalize: while max is defined, requires the size of max equal to "
"the size of mean.");
max_.assign(max.begin(), max.end());
}
for (auto c = 0; c < mean_.size(); ++c) {
double alpha = 1.0;
if (is_scale) {
alpha /= (max_[c] - min_[c]);
}
double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c];
alpha /= std_[c];
alpha_.push_back(alpha);
beta_.push_back(beta);
}
}
bool Normalize::CpuRun(Mat* mat) {
cv::Mat* im = mat->GetCpuMat();
std::vector<cv::Mat> split_im;
cv::split(*im, split_im);
for (int c = 0; c < im->channels(); c++) {
split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
}
cv::merge(split_im, *im);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool Normalize::GpuRun(Mat* mat) {
cv::cuda::GpuMat* im = mat->GetGpuMat();
std::vector<cv::cuda::GpuMat> split_im;
cv::cuda::split(*im, split_im);
for (int c = 0; c < im->channels(); c++) {
split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]);
}
cv::cuda::merge(split_im, *im);
return true;
}
#endif
bool Normalize::Run(Mat* mat, const std::vector<float>& mean,
const std::vector<float>& std, bool is_scale,
const std::vector<float>& min,
const std::vector<float>& max, ProcLib lib) {
auto n = Normalize(mean, std, is_scale, min, max);
return n(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,53 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class Normalize : public Processor {
public:
Normalize(const std::vector<float>& mean, const std::vector<float>& std,
bool is_scale = true,
const std::vector<float>& min = std::vector<float>(),
const std::vector<float>& max = std::vector<float>());
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "Normalize"; }
// While use normalize, it is more recommend not use this function
// this function will need to compute result = ((mat / 255) - mean) / std
// if we use the following method
// ```
// auto norm = Normalize(...)
// norm(mat)
// ```
// There will be some precomputation in contruct function
// and the `norm(mat)` only need to compute result = mat * alpha + beta
// which will reduce lots of time
static bool Run(Mat* mat, const std::vector<float>& mean,
const std::vector<float>& std, bool is_scale = true,
const std::vector<float>& min = std::vector<float>(),
const std::vector<float>& max = std::vector<float>(),
ProcLib lib = ProcLib::OPENCV_CPU);
private:
std::vector<float> alpha_;
std::vector<float> beta_;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,100 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/pad.h"
namespace fastdeploy {
namespace vision {
bool Pad::CpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
return false;
}
if (mat->Channels() > 4) {
FDERROR << "Pad: Only support channels <= 4." << std::endl;
return false;
}
if (mat->Channels() != value_.size()) {
FDERROR << "Pad: Require input channels equals to size of padding value, "
"but now channels = "
<< mat->Channels()
<< ", the size of padding values = " << value_.size() << "."
<< std::endl;
return false;
}
cv::Mat* im = mat->GetCpuMat();
cv::Scalar value;
if (value_.size() == 1) {
value = cv::Scalar(value_[0]);
} else if (value_.size() == 2) {
value = cv::Scalar(value_[0], value_[1]);
} else if (value_.size() == 3) {
value = cv::Scalar(value_[0], value_[1], value_[2]);
} else {
value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
}
cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
cv::BORDER_CONSTANT, value);
mat->SetHeight(im->rows);
mat->SetWidth(im->cols);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool Pad::GpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl;
return false;
}
if (mat->Channels() > 4) {
FDERROR << "Pad: Only support channels <= 4." << std::endl;
return false;
}
if (mat->Channels() != value_.size()) {
FDERROR << "Pad: Require input channels equals to size of padding value, "
"but now channels = "
<< mat->Channels()
<< ", the size of padding values = " << value_.size() << "."
<< std::endl;
return false;
}
cv::cuda::GpuMat* im = mat->GetGpuMat();
cv::Scalar value;
if (value_.size() == 1) {
value = cv::Scalar(value_[0]);
} else if (value_.size() == 2) {
value = cv::Scalar(value_[0], value_[1]);
} else if (value_.size() == 3) {
value = cv::Scalar(value_[0], value_[1], value_[2]);
} else {
value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
}
cv::cuda::copyMakeBorder(*im, *im, top_, bottom_, left_, right_,
cv::BORDER_CONSTANT, value);
mat->SetHeight(im->rows);
mat->SetWidth(im->cols);
return true;
}
#endif
bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left,
const int& right, const std::vector<float>& value,
ProcLib lib) {
auto p = Pad(top, bottom, left, right, value);
return p(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,50 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class Pad : public Processor {
public:
Pad(int top, int bottom, int left, int right,
const std::vector<float>& value) {
top_ = top;
bottom_ = bottom;
left_ = left;
right_ = right;
value_ = value;
}
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "Pad"; }
static bool Run(Mat* mat, const int& top, const int& bottom, const int& left,
const int& right, const std::vector<float>& value,
ProcLib lib = ProcLib::OPENCV_CPU);
private:
int top_;
int bottom_;
int left_;
int right_;
std::vector<float> value_;
};
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,141 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/pad_to_size.h"
namespace fastdeploy {
namespace vision {
bool PadToSize::CpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "PadToSize: The input data must be Layout::HWC format!"
<< std::endl;
return false;
}
if (mat->Channels() > 4) {
FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
return false;
}
if (mat->Channels() != value_.size()) {
FDERROR
<< "PadToSize: Require input channels equals to size of padding value, "
"but now channels = "
<< mat->Channels() << ", the size of padding values = " << value_.size()
<< "." << std::endl;
return false;
}
int origin_w = mat->Width();
int origin_h = mat->Height();
if (origin_w > width_) {
FDERROR << "PadToSize: the input width:" << origin_w
<< " is greater than the target width: " << width_ << "."
<< std::endl;
return false;
}
if (origin_h > height_) {
FDERROR << "PadToSize: the input height:" << origin_h
<< " is greater than the target height: " << height_ << "."
<< std::endl;
return false;
}
if (origin_w == width_ && origin_h == height_) {
return true;
}
cv::Mat* im = mat->GetCpuMat();
cv::Scalar value;
if (value_.size() == 1) {
value = cv::Scalar(value_[0]);
} else if (value_.size() == 2) {
value = cv::Scalar(value_[0], value_[1]);
} else if (value_.size() == 3) {
value = cv::Scalar(value_[0], value_[1], value_[2]);
} else {
value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
}
// top, bottom, left, right
cv::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0, width_ - origin_w,
cv::BORDER_CONSTANT, value);
mat->SetHeight(height_);
mat->SetWidth(width_);
return true;
}
#ifdef ENABLE_OPENCV_CUDA
bool PadToSize::GpuRun(Mat* mat) {
if (mat->layout != Layout::HWC) {
FDERROR << "PadToSize: The input data must be Layout::HWC format!"
<< std::endl;
return false;
}
if (mat->Channels() > 4) {
FDERROR << "PadToSize: Only support channels <= 4." << std::endl;
return false;
}
if (mat->Channels() != value_.size()) {
FDERROR
<< "PadToSize: Require input channels equals to size of padding value, "
"but now channels = "
<< mat->Channels() << ", the size of padding values = " << value_.size()
<< "." << std::endl;
return false;
}
int origin_w = mat->Width();
int origin_h = mat->Height();
if (origin_w > width_) {
FDERROR << "PadToSize: the input width:" << origin_w
<< " is greater than the target width: " << width_ << "."
<< std::endl;
return false;
}
if (origin_h > height_) {
FDERROR << "PadToSize: the input height:" << origin_h
<< " is greater than the target height: " << height_ << "."
<< std::endl;
return false;
}
if (origin_w == width_ && origin_h == height_) {
return true;
}
cv::cuda::GpuMat* im = mat->GetGpuMat();
cv::Scalar value;
if (value_.size() == 1) {
value = cv::Scalar(value_[0]);
} else if (value_.size() == 2) {
value = cv::Scalar(value_[0], value_[1]);
} else if (value_.size() == 3) {
value = cv::Scalar(value_[0], value_[1], value_[2]);
} else {
value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]);
}
// top, bottom, left, right
cv::cuda::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0,
width_ - origin_w, cv::BORDER_CONSTANT, value);
mat->SetHeight(height_);
mat->SetWidth(width_);
return true;
}
#endif
bool PadToSize::Run(Mat* mat, int width, int height,
const std::vector<float>& value, ProcLib lib) {
auto p = PadToSize(width, height, value);
return p(mat, lib);
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -1,46 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/base.h"
namespace fastdeploy {
namespace vision {
class PadToSize : public Processor {
public:
// only support pad with left-top padding mode
PadToSize(int width, int height, const std::vector<float>& value) {
width_ = width;
height_ = height;
value_ = value;
}
bool CpuRun(Mat* mat);
#ifdef ENABLE_OPENCV_CUDA
bool GpuRun(Mat* mat);
#endif
std::string Name() { return "PadToSize"; }
static bool Run(Mat* mat, int width, int height,
const std::vector<float>& value,
ProcLib lib = ProcLib::OPENCV_CPU);
private:
int width_;
int height_;
std::vector<float> value_;
};
} // namespace vision
} // namespace fastdeploy

Some files were not shown because too many files have changed in this diff Show More