first commit

This commit is contained in:
jiangjiajun
2022-07-05 09:30:15 +00:00
parent 4df7366d62
commit 9d87046d78
781 changed files with 225888 additions and 6184 deletions

178
.clang-format Normal file
View File

@@ -0,0 +1,178 @@
---
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignArrayOfStructures: None
AlignConsecutiveMacros: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: None
AlignConsecutiveDeclarations: None
AlignEscapedNewlines: Right
AlignOperands: Align
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: MultiLine
AttributeMacros:
- __capability
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeConceptDeclarations: true
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IfMacros:
- KJ_IF_MAYBE
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
SortPriority: 0
CaseSensitive: false
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
SortPriority: 0
CaseSensitive: false
- Regex: '.*'
Priority: 1
SortPriority: 0
CaseSensitive: false
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseLabels: false
IndentCaseBlocks: false
IndentGotoLabels: true
IndentPPDirectives: None
IndentExternBlock: AfterExternBlock
IndentRequires: false
IndentWidth: 2
IndentWrappedFunctionNames: false
InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
LambdaBodyIndentation: Signature
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PenaltyIndentedWhitespace: 0
PointerAlignment: Left
PPIndentWidth: -1
ReferenceAlignment: Pointer
ReflowComments: true
ShortNamespaceLines: 1
SortIncludes: CaseSensitive
SortJavaStaticImport: Before
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceAroundPointerQualifiers: Default
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: Never
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
BitFieldColonSpacing: Both
Standard: Latest
StatementAttributeLikeMacros:
- Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
- NS_SWIFT_NAME
- CF_SWIFT_NAME
...

15
.clang_format.hook Normal file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
readonly VERSION="3.8"
version=$(clang-format -version)
if ! [[ version=="VERSION"* ]]; then
echo "clang-format version check failed."
echo "a version contains 'VERSIONisneeded,butgetversion'"
echo "you can install the right version, and make an soft-link to '$PATH' env"
exit -1
fi
clang-format -style=google $@

60
.cpplint_pre_commit.hook Normal file
View File

@@ -0,0 +1,60 @@
#!/bin/bash
#TOTAL_ERRORS=0
#echo "HAHAHAHAHHA"
#exit 5
#
#files=$(
#
#if [[ ! $TRAVIS_BRANCH ]]; then
# # install cpplint on local machine.
# if [[ ! $(which cpplint) ]]; then
# pip install cpplint
# fi
# # diff files on local machine.
# files=$(git diff --cached --name-status | awk 'Extra open brace or missing close brace2}')
#else
# # diff files between PR and latest commit on Travis CI.
# branch_ref=(gitrevparse"TRAVIS_BRANCH")
# head_ref=$(git rev-parse HEAD)
# files=(gitdiffnamestatusbranch_ref $head_ref | awk 'Extra open brace or missing close brace2}')
#fi
## The trick to remove deleted files: https://stackoverflow.com/a/2413151
#for file in $files; do
# echo $file
# if [[ $file =~ ^(patches/.*) ]]; then
# continue;
# else
# cpplint --filter=-readability/fn_size $file;
# TOTAL_ERRORS=(exprTOTAL_ERRORS + $?);
# fi
#done
#
#exit $TOTAL_ERRORS
if git rev-parse --verify HEAD >/dev/null 2>&1
then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
fi
# Redirect output to stderr.
exec 1>&2
cpplint=cpplint
sum=0
filters='-build/include_order,-build/namespaces,-legal/copyright,-runtime/references,-build/include_what_you_use'
# for cpp
for file in $(git diff-index --name-status $against -- | grep -E '\.[ch](pp)?$' | awk '{print $2}'); do
$cpplint --filter=$filters $file
sum=$(expr ${sum} + $?)
done
if [ ${sum} -eq 0 ]; then
exit 0
else
exit 1
fi

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
fastdeploy/libs/lib*

43
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,43 @@
repos:
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: a11d9314b22d8f8c7556443875b731ef05965464
hooks:
- id: check-merge-conflict
- id: check-symlinks
- id: end-of-file-fixer
- id: trailing-whitespace
- id: detect-private-key
- id: check-symlinks
- id: check-added-large-files
- repo: local
hooks:
- id: copyright_checker
name: copyright_checker
entry: python ./.copyright.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$
- repo: local
hooks:
- id: clang-format-with-version-check
name: clang-format
description: Format files with ClangFormat.
entry: bash .clang_format.hook -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: local
hooks:
- id: cpplint-cpp-source
name: cpplint
description: Check C++ code style using cpplint.py.
entry: bash .cpplint_pre_commit.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$

261
CMakeLists.txt Normal file
View File

@@ -0,0 +1,261 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PROJECT(fastdeploy C CXX)
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
include(ExternalProject)
add_subdirectory(fastdeploy)
include(external/utils.cmake)
# Set C++11 as standard for the whole project
if(NOT MSVC)
set(CMAKE_CXX_STANDARD 11)
endif(NOT MSVC)
#############################CMAKE FOR FASTDEPLOY################################
option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF)
option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF)
option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.")
option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.")
option(ENABLE_VISION "if to enable vision models usage." OFF)
option(ENABLE_VISION_VISUALIZE "if to enable visualize vision model result toolbox." ON)
# Please don't open this flag now, some bugs exists.
option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow process image with GPU." OFF)
option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF)
if(ENABLE_DEBUG)
add_definitions(-DFASTDEPLOY_DEBUG)
endif()
if(NOT CUDA_DIRECTORY)
set(CUDA_DIRECTORY "/usr/local/cuda")
endif()
option(BUILD_FASTDEPLOY_PYTHON "if build python lib for fastdeploy." OFF)
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_definitions(-DFASTDEPLOY_LIB)
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/*.cc)
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/ort/*.cc)
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cpp)
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*.cc)
file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/*_pybind.cc)
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})
set(DEPEND_LIBS "")
file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
if(ENABLE_PADDLE_FRONTEND)
add_definitions(-DENABLE_PADDLE_FRONTEND)
include(${PROJECT_SOURCE_DIR}/external/paddle2onnx.cmake)
list(APPEND DEPEND_LIBS external_paddle2onnx)
endif(ENABLE_PADDLE_FRONTEND)
if(ENABLE_ORT_BACKEND)
add_definitions(-DENABLE_ORT_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
include(external/onnxruntime.cmake)
list(APPEND DEPEND_LIBS external_onnxruntime)
endif()
if(WITH_GPU)
if(APPLE)
message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
set(WITH_GPU OFF)
else()
add_definitions(-DWITH_GPU)
include_directories(${CUDA_DIRECTORY}/include)
find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
list(APPEND DEPEND_LIBS ${CUDA_LIB})
endif()
endif()
if(ENABLE_TRT_BACKEND)
if(APPLE)
message(FATAL_ERROR "Cannot enable tensorrt backend in mac os, please set -DENABLE_TRT_BACKEND=OFF.")
endif()
if(NOT WITH_GPU)
message(FATAL_ERROR "While -DENABLE_TRT_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
endif()
add_definitions(-DENABLE_TRT_BACKEND)
include_directories(${TRT_DIRECTORY}/include)
include_directories(${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/common)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
# copy tensorrt libraries to third lib
if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
endif()
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
file(COPY ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB} DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib" FOLLOW_SYMLINK_CHAIN)
endif()
if(ENABLE_VISION)
add_definitions(-DENABLE_VISION)
if(ENABLE_OPENCV_CUDA)
add_definitions(-DENABLE_OPENCV_CUDA)
if(APPLE)
message(FATAL_ERROR "Cannot enable opencv with cuda in mac os, please set -DENABLE_OPENCV_CUDA=OFF.")
endif()
endif()
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
list(APPEND DEPEND_LIBS yaml-cpp)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS})
include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include)
include(external/opencv.cmake)
if(ENABLE_VISION_VISUALIZE)
add_definitions(-DENABLE_VISION_VISUALIZE)
endif()
endif()
configure_file(${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h)
configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS})
add_library(fastdeploy SHARED ${ALL_DEPLOY_SRCS})
redefine_file_macro(fastdeploy)
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
if(NOT APPLE)
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--start-group,--exclude-libs,ALL")
endif()
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
if (APPLE)
# set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
elseif(MSVC)
else()
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
endif()
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
list(APPEND DEPEND_LIBS OpenMP::OpenMP_CXX)
endif()
set_target_properties(fastdeploy PROPERTIES VERSION ${FASTDEPLOY_VERSION})
target_link_libraries(fastdeploy ${DEPEND_LIBS})
include(external/summary.cmake)
fastdeploy_summary()
install(
TARGETS fastdeploy
LIBRARY DESTINATION lib
)
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/fastdeploy
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING
PATTERN "*.h"
PATTERN "${PROJECT_SOURCE_DIR}/fastdeploy/backends/*/*.h"
)
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs
)
install(
FILES
${PROJECT_SOURCE_DIR}/LICENSE
${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt
${PROJECT_SOURCE_DIR}/VERSION_NUMBER
${PROJECT_SOURCE_DIR}/FastDeploy.cmake
DESTINATION ${CMAKE_INSTALL_PREFIX}
)
# Build demo cpp
if(ENABLE_VISION)
add_executable(yolov5_exe ${PROJECT_SOURCE_DIR}/demo/cpp/vision/yolov5.cc)
target_link_libraries(yolov5_exe PUBLIC fastdeploy)
endif()
if(BUILD_FASTDEPLOY_PYTHON)
add_definitions(-DBUILD_FASTDEPLOY_PYTHON)
if("${PY_EXT_SUFFIX}" STREQUAL "")
if(MSVC)
set(PY_EXT_SUFFIX ".pyd")
else()
set(PY_EXT_SUFFIX ".so")
endif()
endif()
# find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12
# Use the following command in the future; now this is only compatible with the latest pybind11
# find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED)
find_package(PythonInterp ${PY_VERSION} REQUIRED)
find_package(PythonLibs ${PY_VERSION})
if(CMAKE_SYSTEM_NAME STREQUAL "AIX")
set(CMAKE_NO_SYSTEM_FROM_IMPORTED 1)
endif()
add_library(fastdeploy_main MODULE ${DEPLOY_PYBIND_SRCS})
redefine_file_macro(fastdeploy_main)
set_target_properties(fastdeploy_main PROPERTIES PREFIX "")
set_target_properties(fastdeploy_main
PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
set_target_properties(fastdeploy_main PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
set_target_properties(fastdeploy_main
PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
target_include_directories(fastdeploy_main PRIVATE
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<INSTALL_INTERFACE:include>
${PYTHON_INCLUDE_DIR})
target_include_directories(fastdeploy_main PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include)
if(APPLE)
set_target_properties(fastdeploy_main
PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
endif()
if(APPLE)
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
elseif(WIN32)
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
else()
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
endif()
if(MSVC)
target_link_libraries(fastdeploy_main PRIVATE ${PYTHON_LIBRARIES})
target_compile_options(fastdeploy_main
PRIVATE /MP
/wd4244 # 'argument': conversion from 'google::
# protobuf::uint64' to 'int', possible
# loss of data
/wd4267 # Conversion from 'size_t' to 'int',
# possible loss of data
/wd4996 # The second parameter is ignored.
${EXTRA_FLAGS})
target_compile_options(fastdeploy_main PRIVATE $<$<NOT:$<CONFIG:Debug>>:/MT> $<$<CONFIG:Debug>:/MTd>)
endif()
endif(BUILD_FASTDEPLOY_PYTHON)

59
FastDeploy.cmake.in Normal file
View File

@@ -0,0 +1,59 @@
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
set(WITH_GPU @WITH_GPU@)
set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND&)
set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
set(ENABLE_VISION @ENABLE_VISION@)
set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
set(FASTDEPLOY_LIBS "")
set(FASTDEPLOY_INCS "")
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/include)
find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib)
list(APPEND FASTDEPLOY_LIBS ${FDLIB})
if(ENABLE_ORT_BACKEND)
find_library(ORT_LIB onnxruntime ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib)
list(APPEND FASTDEPLOY_LIBS ${ORT_LIB})
endif()
if(WITH_GPU)
if (NOT CUDA_DIRECTORY)
message(FATAL_ERROR "[FastDeploy] Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/usr/local/cuda")
endif()
find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
if (ENABLE_TRT_BACKEND)
if (NOT TRT_DIRECTORY)
message(FATAL_ERROR "[FastDeploy] Please define TRT_DIRECTORY, e.g -DTRT_DIRECTORY=/usr/downloads/TensorRT-8.4.1.0")
endif()
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
endif()
endif()
if(ENABLE_VISION)
find_library(OPENCV_CORE_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
find_library(OPENCV_HIGHGUI_LIB opencv_highgui ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
find_library(OPENCV_IMGPROC_LIB opencv_imgproc ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
find_library(OPENCV_IMGCODESC_LIB opencv_imgcodecs ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
list(APPEND FASTDEPLOY_LIBS ${OPENCV_CORE_LIB} ${OPENCV_HIGHGUI_LIB} ${OPENCV_IMGPROC_LIB} ${OPENCV_IMGCODESC_LIB})
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/include)
if(ENABLE_OPENCV_CUDA)
find_library(OPENCV_CUDAARITHM_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
find_library(OPENCV_CUDAIMGPROC_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
find_library(OPENCV_CUDAWARPING_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
list(APPEND FASTDEPLOY_LIBS ${OPENCV_CUDAARITHM_LIB} ${OPENCV_CUDAIMGPROC_LIB} ${OPENCV_CUDAWARPING_LIB})
endif()
endif()
if(ENABLE_PADDLE_FRONTEND)
find_library(PADDLE2ONNX_LIB paddle2onnx ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib)
list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
endif()

208
README.md
View File

@@ -1,202 +1,28 @@
# ⚡️FastDeploy
# FastDeploy
</p>
模型推理就用FastDeploy!
------------------------------------------------------------------------------------------
## 环境要求
- python >= 3.6
- cmake >= 3.18
- gcc >= 8.2
- cuda >= 11.0如若需要启用GPU
- tensorrt >= 8.4如若需要启用TensorRT后端
<p align="center">
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-dfd.svg"></a>
<a href="https://github.com/PaddlePaddle/FastDeploy/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/FastDeploy?color=ffa"></a>
<a href=""><img src="https://img.shields.io/badge/python-3.7+-aff.svg"></a>
<a href=""><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
<a href="https://github.com/PaddlePaddle/FastDeploy/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/FastDeploy?color=9ea"></a>
<a href="https://github.com/PaddlePaddle/FastDeploy/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/FastDeploy?color=3af"></a>
<a href="https://pypi.org/project/FastDeploy-python/"><img src="https://img.shields.io/pypi/dm/FastDeploy-python?color=9cf"></a>
<a href="https://github.com/PaddlePaddle/FastDeploy/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/FastDeploy?color=9cc"></a>
<a href="https://github.com/PaddlePaddle/FastDeploy/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/FastDeploy?color=ccf"></a>
</p>
## 如何利用FastDeploy快速完成模型部署
- [C++部署指南](docs/cpp/README.md)
- [Python部署指南](docs/python/README.md)
<h4 align="center">
<a href=#特性> 特性 </a> |
<a href=#SDK安装> 安装 </a> |
<a href=#SDK使用> 快速开始 </a> |
<a href=#社区交流> 社区交流 </a>
</h4>
## 如何自行编译FastDeploy
**⚡️FastDeploy**是一款**简单易用**的推理部署工具箱。覆盖业界主流**优质预训练模型**并提供**开箱即用**的开发体验,包括图像分类、目标检测、图像分割、人脸检测、人体关键点识别、文字识别等多任务,满足开发者**多场景****多硬件**、**多平台**的快速部署需求。
- [FastDeploy编译指南](docs/compile/README.md)
## News 📢
* 🔥 2022.6.30 晚20:30FastDeploy天使用户邀测沟通会与开发者共同讨论推理部署痛点问题欢迎大家扫码报名入群获取会议链接。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854075-2c0f9997-ed18-4b17-9aaf-1b43266d3996.jpeg" width = "150" height = "150" />
</div>
* 🔥 2022.6.27 [**⚡FastDeploy v0.1.0**](https://github.com/PaddlePaddle/FastDeploy/releases/tag/release%2F0.1.0)测试版发布!🎉
* 💎 发布40个重点模型在8种重点软硬件环境的支持的SDK
* 😊 支持网页端、pip包两种下载使用方式
## 特性
### 📦**开箱即用的推理部署工具链,支持云边端、多硬件、多平台部署**
- 网页端点选下载、PIP 安装一行命令快速下载多种类型SDK安装包
- 云端(含服务器、数据中心):
- 支持一行命令启动 Serving 服务(含网页图形化展示)
- 支持一行命令启动图像、本地视频流、本地摄像头、网络视频流预测
- 支持 Window、Linux 操作系统
- 支持 Python、C++ 编程语言
- 边缘端:
- 支持 NVIDIA Jetson 等边缘设备,支持视频流预测服务
- 端侧(含移动端)
- 支持 iOS、Android 移动端
- 支持 ARM CPU 端侧设备
- 支持主流硬件
- 支持 Intel CPU 系列(含酷睿、至强等)
- 支持 ARM CPU 全系含高通、MTK、RK等
- 支持 NVIDIA GPU 全系(含 V100、T4、Jetson 等)
### 🤗**丰富的预训练模型轻松下载SDK搞定推理部署**
<font size=0.5>
|<font size=2> 模型| <font size=2> 任务 |<font size=2> 大小(MB) | <font size=2>端侧 | <font size=2>移动端 |<font size=2> 移动端 |<font size=2>边缘端 |<font size=2>服务器+云端 | <font size=2>服务器+云端 |<font size=2> 服务器+云端 |<font size=2> 服务器+云端 |
|---|---|---|---|---|---|---|---|---|---|---|
|----- | ---- |----- |<font size=2> Linux | <font size=2> Android |<font size=2> iOS | <font size=2> Linux |<font size=2> Linux |<font size=2> Linux |<font size=2> Windows |<font size=2> Windows |
|----- | ---- |--- | <font size=2> ARM CPU |<font size=2> ARM CPU | <font size=2> ARM CPU |<font size=2> Jetson |<font size=2> X86 CPU |<font size=2> GPU |<font size=2> X86 CPU |<font size=2> GPU |
| <font size=2> [PP-LCNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 11.9 |✅|✅|✅|✅|✅|✅|✅|✅|
| <font size=2> [PP-LCNetv2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 26.6 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [EfficientNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |31.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [GhostNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 20.8 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [MobileNetV1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 17 |✅|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [MobileNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 14.2 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [MobileNetV3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 22 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [ShuffleNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md)|Classfication | 9.2 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [SqueezeNetV1.1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |5 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [Inceptionv3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |95.5 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-HGNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 59 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [ResNet50_vd](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 102.5 |❌|❌|❌|✅|✅|✅|✅|✅|
|<font size=2> [SwinTransformer_224_win7](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 352.7 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-PicoDet_s_320_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.1 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-PicoDet_s_320_lcnet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.9 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [CenterNet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.8 |✅|✅|✅|✅ |✅ |✅|✅|✅|
|<font size=2> [YOLOv3_MobileNetV3](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 94.6 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-YOLO_tiny_650e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [SSD_MobileNetV1_300_120e_voc](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 23.3 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [YOLOX_Nano_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 3.7 |❌|❌|❌|✅|✅ |✅|✅|✅|
|<font size=2> [PP-YOLO_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 188.5|✅ |✅ |✅ |✅ |✅ |✅|✅|✅|
|<font size=2> [PP-YOLOv2_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 218.7 |✅|✅|✅|✅|✅ |✅|✅|✅|
|<font size=2> [PP-YOLO_crn_l_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 209.1 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [YOLOv5s](https://github.com/ultralytics/yolov5) |Detection | 29.3|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [Faster R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 167.2 |❌|❌|❌|✅|✅|✅|✅|✅|
|<font size=2> [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Face Detection |1.5|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [RetinaFace](https://github.com/biubug6/Pytorch_Retinaface) |Face Localisation |1.7| ✅|❌|❌|✅|✅|✅|✅|✅|
|<font size=2> [PP-TinyPose](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Keypoint Detection| 5.5 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-LiteSeg(STDC1)](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)|Segmentation | 32.2|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-HumanSeg-Lite](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md) |Segmentation | 0.556|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [HRNet-w18](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 38.7|✅|✅|✅|❌|✅|✅|✅|✅|
|<font size=2> [Mask R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation| 107.2|❌|❌|❌|✅|✅|✅|✅|✅|
|<font size=2> [PP-HumanSeg-Server](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation | 107.2|✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [Unet](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 53.7|❌|✅|❌|❌|✅|✅|✅|❌|
|<font size=2> [Deeplabv3-ResNet50](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md)|Segmentation |156.5|❌|❌|❌|❌|✅|✅|✅|✅|
|<font size=2> [PP-OCRv1](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|<font size=2> [PP-OCRv2](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
| <font size=2> [PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/PP-OCRv3_introduction.md) |OCR | 2.4+10.6 |✅|✅|✅|✅|✅|✅|✅|✅|
| <font size=2> [PP-OCRv3-tiny](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/models_list.md) |OCR |2.4+10.7 |✅|✅|✅|✅|✅|✅|✅|✅|
</font>
## SDK安装
### 方式1网页版下载安装
- 可以登录[EasyEdge网页端](https://ai.baidu.com/easyedge/app/openSource)下载SDK
### 方式2pip安装
开发者可以通过pip安装`fastdeploy-python`来获取最新的下载链接
- 环境依赖
python >= 3.6
- 安装方式
## 代码提交
提交代码前先初始化代码环境在clone代码后执行
```
pip install fastdeploy-python --upgrade
sh commit-prepare.sh
```
- 使用方式
- 列出FastDeploy当前支持的所有模型
```
fastdeploy --list_models
```
- 下载模型在具体平台和对应硬件上的部署SDK以及示例
```
fastdeploy --download_sdk \
--model PP-PicoDet-s_320 \
--platform Linux \
--soc x86 \
--save_dir .
```
- 参数说明
- `list_models`: 列出FastDeploy当前最新支持的所有模型
- `download_sdk`: 下载模型在具体平台和对应硬件上的部署SDK以及示例
- `model`: 模型名,如"PP-PicoDet-s_320",可通过`list_models`查看所有的可选项
- `platform`: 部署平台,支持 Windows/Linux/Android/iOS
- `soc`: 部署硬件,支持 x86/x86-NVIDIA-GPU/ARM/Jetson
- `save_dir`: SDK下载保存目录
## SDK使用
### 1 云+服务器部署
- Linux 系统(X86 CPU、NVIDIA GPU)
- [C++ Inference部署含视频流](./docs/Linux-CPP-SDK-Inference.md)
- [C++ 服务化部署](./docs/Linux-CPP-SDK-Serving.md)
- [Python Inference部署](./docs/Linux-Python-SDK-Inference.md)
- [Python 服务化部署](./docs/Linux-Python-SDK-Serving.md)
- Window系统(X86 CPU、NVIDIA GPU)
- [C++ Inference部署含视频流](./docs/Windows-CPP-SDK-Inference.md)
- [C++ 服务化部署](./docs/Windows-CPP-SDK-Serving.md)
- [Python Inference部署](./docs/Windows-Python-SDK-Inference.md)
- [Python 服务化部署](./docs/Windows-Python-SDK-Serving.md)
### 2 边缘侧部署
- ArmLinux 系统NVIDIA Jetson Nano/TX2/Xavier
- [C++ Inference部署含视频流](./docs/Jetson-Linux-CPP-SDK-Inference.md)
- [C++ 服务化部署](./docs/Jetson-Linux-CPP-SDK-Serving.md)
### 3 端侧部署
- ArmLinux 系统(ARM CPU)
- [C++ Inference部署含视频流](./docs/ARM-Linux-CPP-SDK-Inference.md)
- [C++ 服务化部署](./docs/ARM-Linux-CPP-SDK-Serving.md)
- [Python Inference部署](./docs/ARM-Linux-Python-SDK-Inference.md)
- [Python 服务化部署](./docs/ARM-Linux-Python-SDK-Serving.md)
### 4 移动端部署
- [iOS 系统部署](./docs/iOS-SDK.md)
- [Android 系统部署](./docs/Android-SDK.md)
### 5 自定义模型部署
- [快速实现个性化模型替换](./docs/Replace-Model-With-Anther-One.md)
## 社区交流
- **加入社区👬:** 微信扫描二维码后,填写问卷加入交流群,与开发者共同讨论推理部署痛点问题
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854075-2c0f9997-ed18-4b17-9aaf-1b43266d3996.jpeg" width = "200" height = "200" />
</div>
## Acknowledge
本项目中SDK生成和下载使用了[EasyEdge](https://ai.baidu.com/easyedge/app/openSource)中的免费开放能力,再次表示感谢。
## License
FastDeploy遵循[Apache-2.0开源协议](./LICENSE)。
在之后commit代码时会自动进行代码格式的检查。

734
ThirdPartyNotices.txt Normal file
View File

@@ -0,0 +1,734 @@
This project depends on some open source projects, list as below
--------
1. https://github.com/protocolbuffers/protobuf
Copyright 2008 Google Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Code generated by the Protocol Buffer compiler is owned by the owner
of the input file used when generating it. This code is not
standalone and requires a support library to be linked with it. This
support library is itself covered by the above license.
--------
2. https://github.com/onnx/onnx
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
3. https://github.com/microsoft/onnxruntime
MIT License
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
--------
4. https://github.com/pybind/pybind11
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
external contributions to this project including patches, pull requests, etc.
--------
4. https://github.com/onnx/onnx-tensorrt
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2021 NVIDIA Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
5. https://github.com/opencv/opencv
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
6. https://github.com/jbeder/yaml-cpp
Copyright (c) 2008-2015 Jesse Beder.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

1
VERSION_NUMBER Normal file
View File

@@ -0,0 +1 @@
0.3.0

View File

@@ -3,4 +3,5 @@ cd $path
pip install pre-commit
pip install yapf
pip install cpplint
pre-commit install

49
demo/cpp/vision/yolov5.cc Normal file
View File

@@ -0,0 +1,49 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision.h"
int main() {
auto model = fastdeploy::vision::ultralytics::YOLOv5("yolov5s.onnx");
model.EnableDebug();
if (!model.Initialized()) {
std::cout << "Init Failed." << std::endl;
return -1;
}
cv::Mat im = cv::imread("bus.jpg");
for (size_t i = 0; i < 10; ++i) {
auto im1 = im.clone();
fastdeploy::vision::DetectionResult res;
if (!model.Predict(&im1, &res)) {
std::cout << "Predict Failed." << std::endl;
return -1;
}
}
{
fastdeploy::vision::DetectionResult res;
auto vis_im = im.clone();
if (!model.Predict(&im, &res)) {
std::cout << "Predict Failed." << std::endl;
return -1;
}
fastdeploy::vision::Visualize::VisDetection(&vis_im, res);
cv::imwrite("vis.jpg", vis_im);
// Print Detection Result
std::cout << res.Str() << std::endl;
}
return 0;
}

View File

@@ -0,0 +1,10 @@
import fastdeploy as fd
import cv2
# 获取模型 和 测试图片
# wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
# wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx")
im = cv2.imread("bus.jpg")
result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
print(result)

View File

@@ -1,404 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在ARM Linux C++环境下 1推理部署步骤 2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中ARM Linux Python请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK-Inference.md)文档。
**注意**部分模型如Tinypose、OCR等仅支持图像推理不支持视频推理。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. 硬件支持](#1-硬件支持)
* [2. 软件环境](#2-软件环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1 预测图像](#21-预测图像)
* [2.2 预测视频流](#22-预测视频流)
* [预测API流程详解](#预测api流程详解)
* [1. SDK参数运行配置](#1-sdk参数运行配置)
* [2. 初始化Predictor](#2-初始化predictor)
* [3. 预测推理](#3-预测推理)
* [3.1 预测图像](#31-预测图像)
* [3.2 预测视频](#32-预测视频)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. 硬件支持
目前支持的ARM架构aarch64 、armv7hf
## 2. 软件环境
1.运行二进制文件-环境要求
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
* Linux下查看gcc版本命名可能因系统差异命令会不同`gcc --version`
* Linux下C++基础库GLIBCXX的命令因系统差异库路径会有不同`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
* glibc2.23以上
* Linux查看命令`ldd --version`
2.二次开发编译-环境要求
编译源代码时除gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`cmake需满足
* cmake: 3.0 以上
* Linux查看命令`cmake --version`
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下
```
.EasyEdge-Linux-m43157-b97741-x86
├── RES                 # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── baidu_easyedge_ocr_linux_cpp_aarch64_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv8架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可
├── ReadMe.txt
├── bin # 可直接运行的二进制文件
├── include # 二次开发用的头文件
├── lib # 二次开发用的所依赖的库
├── src # 二次开发用的示例工程
└── thirdparty # 第三方依赖
└── baidu_easyedge_ocr_linux_cpp_armv7l_armv7hf_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv7架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可
└── python # Python SDK 文件
```
**注意**
1. 【OCR需要编译】因为OCR任务的特殊性本次SDK没有提供bin文件夹可执行文件。开发者根据需要满足文档中gcc和cmake要求后`src/demo*`路径编译获取可执行文件,具体可参考。
2. 【OCR仅支持图像推理不支持视频流推理】
3. ARM-Linux-Python的环境要求和使用请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK.md)文档。
## 2. 测试Demo
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1 预测图像
```bash
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
```bash
> ./easyedge_image_inference ../../../../RES 2.jpeg
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
Done
```
### 2.2 预测视频流
```
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
```
其中 video_type 支持三种:
```
video_type : 1 // 本地视频文件
video_type : 2 // 摄像头的index
video_type : 3 // 网络视频流
```
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址
```
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 /my_video_file.mp4
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
```
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
# 预测API流程详解
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API方便开发者学习后二次开发。更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API如下代码片段`step`注释所示。
> ❗注意:<br>
> 1`src`文件夹中包含完整可编译的cmake工程实例建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
> 2请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
```cpp
// step 1: SDK配置运行参数
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
// step 2: 创建并初始化Predictor这这里选择合适的引擎
auto predictor = global_controller()->CreateEdgePredictor(config);
// step 3-1: 预测图像
auto img = cv::imread({图片路径});
std::vector<EdgeResultData> results;
predictor->infer(img, results);
// step 3-2: 预测视频
std::vector<EdgeResultData> results;
FrameTensor frame_tensor;
VideoConfig video_config;
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
video_config.source_value = video_src;
/*
... more video_configs, 根据需要配置video_config的各选项
*/
auto video_decoding = CreateVideoDecoding(video_config);
while (video_decoding->next(frame_tensor) == EDGE_OK) {
results.clear();
if (frame_tensor.is_needed) {
predictor->infer(frame_tensor.frame, results);
render(frame_tensor.frame, results, predictor->model_info().kind);
}
//video_decoding->display(frame_tensor); // 显示当前frame需在video_config中开启配置
//video_decoding->save(frame_tensor); // 存储当前frame到视频需在video_config中开启配置
}
```
若需自定义library search path或者gcc路径修改对应Demo工程下的CMakeList.txt即可。
## 1. SDK参数运行配置
SDK的参数通过`EdgePredictorConfig::set_config``global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
配置参数使用方法如下:
```
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
```
## 2. 初始化Predictor
* 接口
```cpp
auto predictor = global_controller()->CreateEdgePredictor(config);
predictor->init();
```
若返回非0请查看输出日志排查错误原因。
## 3. 预测推理
### 3.1 预测图像
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
* 接口输入
```cpp
/**
* @brief
* 通用接口
* @param image: must be BGR , HWC format (opencv default)
* @param result
* @return
*/
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
```
图片的格式务必为opencv默认的BGR, HWC格式。
* 接口返回
`EdgeResultData`中可以获取对应的分类信息、位置信息。
```cpp
struct EdgeResultData {
int index; // 分类结果的index
std::string label; // 分类结果的label
float prob; // 置信度
// 物体检测 或 图像分割时使用:
float x1, y1, x2, y2; // (x1, y1): 左上角, x2, y2): 右下角; 均为0~1的长宽比例值。
// 图像分割时使用:
cv::Mat mask; // 0, 1 的mask
std::string mask_rle; // Run Length Encoding游程编码的mask
};
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
*** 关于图像分割mask_rle ***
该字段返回了mask的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
### 3.2 预测视频
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
* 接口输入
class`VideoDecoding`
```
/**
* @brief 获取输入源的下一帧
* @param frame_tensor
* @return
*/
virtual int next(FrameTensor &frame_tensor) = 0;
/**
* @brief 显示当前frame_tensor中的视频帧
* @param frame_tensor
* @return
*/
virtual int display(const FrameTensor &frame_tensor) = 0;
/**
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
* @param frame_tensor
* @return
*/
virtual int save(FrameTensor &frame_tensor) = 0;
/**
* @brief 获取视频的fps属性
* @return
*/
virtual int get_fps() = 0;
/**
* @brief 获取视频的width属性
* @return
*/
virtual int get_width() = 0;
/**
* @brief 获取视频的height属性
* @return
*/
virtual int get_height() = 0;
```
struct `VideoConfig`
```
/**
* @brief 视频源、抽帧策略、存储策略的设置选项
*/
struct VideoConfig {
SourceType source_type; // 输入源类型
std::string source_value; // 输入源地址如视频文件路径、摄像头index、网络流地址
int skip_frames{0}; // 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储对于不满足skip_frames策略的frame把所抽取帧的is_needed置为false
int input_fps{0}; // 在采取抽帧之前设置视频的fps
Resolution resolution{Resolution::kAuto}; // 采样分辨率只对camera有效
bool enable_display{false}; // 默认不支持。
std::string window_name{"EasyEdge"};
bool display_all{false}; // 是否显示所有frame若为false仅显示根据skip_frames抽取的frame
bool enable_save{false};
std::string save_path; // frame存储为视频文件的路径
bool save_all{false}; // 是否存储所有frame若为false仅存储根据skip_frames抽取的frame
std::map<std::string, std::string> conf;
};
```
| 序号 | 字段 | 含义 |
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `source_type` | 输入源类型支持视频文件、摄像头、网络视频流三种值分别为1、2、3 |
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头该值为摄像头的index如对于`/dev/video0`的摄像头则index为0若`source_type`为网络视频流,则为该视频流的完整地址。 |
| 3 | `skip_frames` | 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true标记为is_needed的帧是用来做预测的帧。反之直接跳过该帧不经过预测。 |
| 4 | `retrieve_all` | 若置该项为true则无论是否设置跳帧所有的帧都会被抽取返回以作为显示或存储用。 |
| 5 | `input_fps` | 用于抽帧前设置fps |
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
*** 注意:***
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能需要自行编译带有GTK选项的OpenCV。
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
```
video_config.conf["backend"] = "2";
```
3. 部分设备上的CSI摄像头尚未兼容如遇到问题可以通过工单、QQ交流群或微信交流群反馈。
具体接口调用流程可以参考SDK中的`demo_video_inference`。
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
3. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,318 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在ARM Linux C++环境下1服务化推理部署步骤2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中ARM Linux Python请参考[ARM Linux Python环境下的HTTP推理部署](./ARM-Linux-Python-SDK-Serving.md)文档。
**注意**部分模型如OCR等不支持服务化推理。
<!--ts-->
* [简介](#简介)
* [安装准备](#安装准备)
* [1. 硬件支持](#1-硬件支持)
* [2. 软件环境](#2-软件环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试 HTTP Demo](#2-测试-http-demo)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
* [3. http返回数据](#3-http返回数据)
* [FAQ](#faq)
<!--te-->
# 安装准备
## 1. 硬件支持
目前支持的ARM架构aarch64 、armv7hf
## 2. 软件环境
1.运行二进制文件-环境要求
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
* Linux下查看gcc版本命名可能因系统差异命令会不同`gcc --version`
* Linux下C++基础库GLIBCXX的命令可能因系统差异路径会有不同可检测自己环境下的情况`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
* glibc2.23以上
* Linux查看命令`ldd --version`
2.二次开发编译-环境要求
编译源代码时除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`还需要cmake满足要求。
* cmake: 3.0 以上
* Linux查看命令`cmake --version`
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下
```
.EasyEdge-Linux-m43157-b97741-x86
├── RES                 # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp                 # C++ SDK 文件结构
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
├── bin         # 可直接运行的二进制文件
├── include     # 二次开发用的头文件
├── lib         # 二次开发用的所依赖的库
├── src         # 二次开发用的示例工程
└── thirdparty  # 第三方依赖
└── python # Python SDK 文件
```
## 2. 测试 HTTP Demo
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1 启动HTTP预测服务
```
./easyedge_serving {模型RES文件夹路径}
```
启动后日志中会显示如下设备IP和24401端口号信息
```
HTTP is now serving at 0.0.0.0:24401
```
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
同时可以调用HTTP接口来访问服务具体参考下文的[二次开发](#10)接口说明。
# HTTP API流程详解
本章节主要结合[2.1 HTTP Demo]()的API介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端目前支持的能力包括以下几种方式Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
```cpp
/**
* @brief 开启一个简单的demo http服务。
* 该方法会block直到收到sigint/sigterm。
* http服务里图片的解码运行在cpu之上可能会降低推理速度。
* @tparam ConfigT
* @param config
* @param host
* @param port
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
* @param instance_num 实例数量,根据内存/显存/时延要求调整
* @return
*/
template<typename ConfigT>
int start_http_server(
const ConfigT &config,
const std::string &host,
int port,
const std::string &service_id,
int instance_num = 1);
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式一:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
Python请求示例
```Python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
### 2.2 http 请求方法二:使用图片base64格式
HTTP方法POST
Header如下
| 参数 | 值 |
| ------------ | ---------------- |
| Content-Type | application/json |
**Body请求填写**
* 分类网络:
body 中请求示例
```
{
"image": "<base64数据>"
"top_num": 5
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
* 检测和分割网络:
Body请求示例
```
{
"image": "<base64数据>"
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
Python请求示例
```Python
import base64
import requests
def main():
with open("图像路径", 'rb') as f:
result = requests.post("http://{服务ip地址}:24401/", json={
"image": base64.b64encode(f.read()).decode("utf8")
})
# print(result.request.body)
# print(result.request.headers)
print(result.content)
if __name__ == '__main__':
main()
```
## 3. http返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 使用libcurl请求http服务时速度明显变慢
这是因为libcurl请求continue导致server等待数据的问题添加空的header即可
```bash
headers = curl_slist_append(headers, "Expect:");
```
3. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
4. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,371 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例介绍FastDeploy中的模型SDK 在**ARM Linux Python** 环境下:1)图像推理部署步骤; 2介绍模型推流全流程API方便开发者了解项目后二次开发。其中ARM Linux C++请参考[ARM Linux C++环境下的推理部署](./ARM-Linux-CPP-SDK-Inference.md)文档。
**注意**部分模型如Tinypose、OCR等仅支持图像推理不支持视频推理。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1.SDK下载](#1sdk下载)
* [2.硬件支持](#2硬件支持)
* [3.python环境](#3python环境)
* [4.安装依赖](#4安装依赖)
* [4.1.安装paddlepaddle](#41安装paddlepaddle)
* [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1.文件结构说明](#1文件结构说明)
* [2.测试Demo](#2测试demo)
* [2.1预测图像](#21预测图像)
* [Demo API介绍](#demo-api介绍)
* [1.基础流程](#1基础流程)
* [2.初始化](#2初始化)
* [3.SDK参数配置](#3sdk参数配置)
* [4.预测图像](#4预测图像)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1.SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。
```shell
EasyEdge-Linux-x86--[部署芯片]
├──...
├──python # Linux Python SDK
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
├── infer_demo # demo体验完整文件
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ └── demo_serving.py # 提供http服务的demo文件
├── tensor_demo # 学习自定义算法前后处理时使用
│ └── demo_xxx.py
```
## 2.硬件支持
目前支持的ARM架构aarch64 、armv7hf
## 3.python环境
> ARM Linux SDK仅支持Python 3.6
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
```shell
$python3 --version
```
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
$python3 -m pip --version
```
## 4.安装依赖
### 4.1.安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。
`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl
```
### 4.2.安装EasyEdge Python Wheel 包
`python`目录下安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
```
# 快速开始
## 1.文件结构说明
Python SDK文件结构如下
```shell
.EasyEdge-Linux-x86--[部署芯片]
├── RES # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json # Android、iOS系统APP名字需要
│ ├── label_list.txt # 模型标签文件
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ └── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── python # Python SDK 文件
├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl #EasyEdge Python Wheel 包
├── infer_demo
├── demo_armv8_cpu.py # 图像推理
├── demo_serving.py # HTTP服务化推理
└── tensor_demo # 学习自定义算法前后处理时使用
├── demo_armv8_cpu.py
```
## 2.测试Demo
> 模型资源文件默认已经打包在开发者下载的SDK包中 默认为`RES`目录。
### 2.1预测图像
使用infer_demo文件夹下的demo文件。
```bash
python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="300"></div>
```shell
2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine...
2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2
{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'}
```
可以看到,运行结果为`index8labelhen`通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
# Demo API介绍
本章节主要结合[测试Demo](#2测试Demo)的Demo示例介绍推理API方便开发者学习后二次开发。
## 1.基础流程
> ❗注意请优先参考SDK中自带demo的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
`infer_demo/demo_xx_xx.py`
```python
# 引入EasyEdge运行库
import BaiduAI.EasyEdge as edge
# 创建并初始化一个预测Progam选择合适的引擎
pred = edge.Program()
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
# 预测图像
res = pred.infer_image({numpy.ndarray的图片})
# 关闭结束预测Progam
pred.close()
```
`infer_demo/demo_serving.py`
```python
import BaiduAI.EasyEdge as edge
from BaiduAI.EasyEdge.serving import Serving
# 创建并初始化Http服务
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
# 运行Http服务
# 请参考同级目录下demo_xx_xx.py里:
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
# 对以下参数device\device_id和engine进行修改
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
```
## 2.初始化
* 接口
```python
def init(self,
model_dir,
device=Device.CPU,
engine=Engine.PADDLE_FLUID,
config_file='conf.json',
preprocess_file='preprocess_args.json',
model_file='model',
params_file='params',
label_file='label_list.txt',
infer_cfg_file='infer_cfg.json',
device_id=0,
thread_num=1
):
"""
Args:
model_dir: str
device: BaiduAI.EasyEdge.Device比如Device.CPU
engine: BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID
config_file: str
preprocess_file: str
model_file: str
params_file: str
label_file: str 标签文件
infer_cfg_file: 包含预处理、后处理信息的文件
device_id: int 设备ID
thread_num: int CPU的线程数
Raises:
RuntimeError, IOError
Returns:
bool: True if success
"""
```
若返回不是True请查看输出日志排查错误原因。
## 3.SDK参数配置
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
```python
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
```
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如
```python
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
```
## 4.预测图像
* 接口
```python
def infer_image(self, img,
threshold=0.3,
channel_order='HWC',
color_format='BGR',
data_type='numpy'):
"""
Args:
img: np.ndarray or bytes
threshold: float
only return result with confidence larger than threshold
channel_order: string
channel order HWC or CHW
color_format: string
color format order RGB or BGR
data_type: string
仅在图像分割时有意义。 'numpy' or 'string'
'numpy': 返回已解析的mask
'string': 返回未解析的mask游程编码
Returns:
list
"""
```
* 返回格式: `[dict1, dict2, ...]`
| 字段 | 类型 | 取值 | 说明 |
| ---------- | -------------------- | --------- | ------------------------ |
| confidence | float | 0~1 | 分类或检测的置信度 |
| label | string | | 分类或检测的类别 |
| index | number | | 分类或检测的类别 |
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
| mask | string/numpy.ndarray | 图像分割的mask | |
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
***结果示例***
i) 图像分类
```json
{
"index": 736,
"label": "table",
"confidence": 0.9
}
```
ii) 物体检测
```json
{
"index": 8,
"label": "cat",
"confidence": 1.0,
"x1": 0.21289,
"y1": 0.12671,
"x2": 0.91504,
"y2": 0.91211,
}
```
iii) 图像分割
```json
{
"name": "cat",
"score": 1.0,
"location": {
"left": ...,
"top": ...,
"width": ...,
"height": ...,
},
"mask": ...
}
```
mask字段中data_type为`numpy`时,返回图像掩码的二维数组
```
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
data_type为`string`时mask的游程编码解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
# FAQ
1.执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,266 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例介绍FastDeploy中的模型SDK 在**ARM Linux Python** 环境下: 1)**服务化**推理部署步骤; 2介绍模型推流全流程API方便开发者了解项目后二次开发。其中ARM Linux Python请参考[ARM Linux C++环境下的HTTP推理部署](./ARM-Linux-CPP-SDK-Serving.md)文档。
**注意**部分模型如OCR等不支持服务化推理。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1.SDK下载](#1sdk下载)
* [2.硬件支持](#2硬件支持)
* [3.Python环境](#3python环境)
* [4.安装依赖](#4安装依赖)
* [4.1.安装paddlepaddle](#41安装paddlepaddle)
* [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1.文件结构说明](#1文件结构说明)
* [2.测试Serving服务](#2测试serving服务)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
* [3. http返回数据](#3-http返回数据)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1.SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下。
```shell
EasyEdge-Linux-x86-[部署芯片]
├── RES # 模型文件资源文件夹,可替换为其他模型
├── README.md
├── cpp # C++ SDK
└── python # Python SDK
```
## 2.硬件支持
目前支持的ARM架构aarch64 、armv7hf
## 3.Python环境
> ARM Linux SDK仅支持Python 3.6
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配需要根据ARM Linux下Python安装方式进行安装。不建议在ARM Linux下使用conda因为ARM Linux场景通常资源很有限
```shell
$python3 --version
```
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
$python3 -m pip --version
```
## 4.安装依赖
### 4.1.安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。
`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl
```
### 4.2.安装EasyEdge Python Wheel 包
`python`目录下安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
```
# 二.快速开始
## 1.文件结构说明
Python SDK文件结构如下
```shell
EasyEdge-Linux-x86--[部署芯片]
├──...
├──python # Linux Python SDK
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
├── BBaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
├── infer_demo # demo体验完整文件
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ └── demo_serving.py # 提供http服务的demo文件
├── tensor_demo # 学习自定义算法前后处理时使用
│ └── demo_xxx.py
```
## 2.测试Serving服务
> 模型资源文件默认已经打包在开发者下载的SDK包中 默认为`RES`目录。
### 2.1 启动HTTP预测服务
指定对应的模型文件夹(默认为`RES`、设备ip和指定端口号运行如下命令。
```shell
python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401}
```
成功启动后,终端中会显示如下字样。
```shell
...
* Running on {host ip}:24401
```
如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。
# 三. HTTP API流程详解
## 1. 开启http服务
http服务的启动使用`demo_serving.py`文件
```python
class Serving(object):
"""
SDK local serving
"""
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
self.program = None
self.model_dir = model_dir
self.model_filename = model_filename
self.params_filename = params_filename
self.program_lock = threading.Lock()
self.license_key = license
# 只有ObjectTracking会初始化video_processor
self.video_processor = None
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
"""
Args:
host : str
port : str
device : BaiduAI.EasyEdge.Device比如Device.CPU
engine : BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID
"""
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容
Python请求示例
```python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
## 3. http返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
# FAQ
1.执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,404 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在Android环境下1推理操作步骤2介绍模型SDK使用说明方便开发者了解项目后二次开发。
<!--ts-->
* [简介](#简介)
* [系统支持说明](#系统支持说明)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. APP 标准版测试](#2-app-标准版测试)
* [2.1 扫码体验](#21-扫码体验)
* [2.2 源码运行](#22-源码运行)
* [3. 精简版测试](#3-精简版测试)
* [SDK使用说明](#sdk使用说明)
* [1. 集成指南](#1-集成指南)
* [1.1 依赖库集成](#11-依赖库集成)
* [1.2 添加权限](#12-添加权限)
* [1.3 混淆规则(可选)](#13-混淆规则可选)
* [2. API调用流程示例](#2-api调用流程示例)
* [2.1 初始化](#21-初始化)
* [2.2 预测图像](#22-预测图像)
* [错误码](#错误码)
<!--te-->
# 系统支持说明
1. Android 版本支持范围Android 5.0API21<= Android < Android 10API 29)。
2. 硬件支持情况支持 arm64-v8a armeabi-v7a暂不支持模拟器
* 官网测试机型红米k30Vivo v1981a华为oxp-an00华为cdy-an90华为pct-al10荣耀yal-al00OPPO Reno5 Pro 5G
3. 其他说明
* 图像分割类算法】(1图像分割类算法暂未提供实时摄像头推理功能开发者可根据自己需要进行安卓开发2PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景本次安卓SDK仅支持竖屏场景开发者可根据自己需要开发横屏功能
* OCR模型OCR任务第一次启动任务第一张推理时间久属于正常情况因为涉及到模型加载预处理等工作)。
> 预测图像时运行内存不能过小一般大于模型资源文件夹大小的3倍。
# 快速开始
## 1. 项目结构说明
根据开发者模型部署芯片操作系统需要在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载SDK目录结构如下
```
.EasyEdge-Android-SDK
├── app
│ ├── src/main
│ │ ├── assets
│ │ │ ├── demo
│ │ │ │ └── conf.json # APP名字
│ │ │ ├── infer # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ │ │ │ ├── model # 模型结构文件
│ │ │ │ ├── params # 模型参数文件
│ │ │ │ ├── label_list.txt # 模型标签文件
│ │ │ │ └── infer_cfg.json # 模型前后处理等配置文件
│ │ ├── java/com.baidu.ai.edge/demo
│ │ │ ├── infertest # 通用ARM精简版测试
│ │ │ │ ├── TestInferClassifyTask.java # 图像分类
│ │ │ │ ├── TestInferDetectionTask.java # 物体检测
│ │ │ │ ├── TestInferSegmentTask.java # 实例分割
│ │ │ │ ├── TestInferPoseTask.java # 姿态估计
│ │ │ │ ├── TestInferOcrTask.java # OCR
│ │ │ │ └── MainActivity.java # 精简版启动 Activity
│ │ │ ├── MainActivity.java # Demo APP 启动 Activity
│ │ │ ├── CameraActivity.java # 摄像头UI逻辑
│ │ │ └── ...
│ │ └── ...
│ ├── libs
│ │ ├── armeabi-v7a # v7a的依赖库
│ │ ├── arm64-v8a # v8a的依赖库
│ │ └── easyedge-sdk.jar # jar文件
│ └── ...
├── camera_ui # UI模块包含相机逻辑
├── README.md
└── ... # 其他 gradle 等工程文件
```
## 2. APP 标准版测试
考虑部分Android开发板没有摄像头因此本项目开发了标准版和精简版两种标准版会调用Android系统的摄像头采集摄像头来进行AI模型推理精简版在没有摄像头的开发板上运行需要开发者准备图像开发者根据硬件情况选择对应的版本
### 2.1 扫码体验
扫描二维码二维码见下载网页`体验Demo`无需任何依赖手机上下载即可直接体验
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854064-a31755d1-52b9-416d-b35d-885b7338a6cc.png" width="600"></div>
### 2.2 源码运行
1下载对应的SDK解压工程。</br>
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854071-f4c17de8-83c2-434e-882d-c175f4202a2d.png" width="600"></div>
2打开Android Studio 点击 "Import Project..."File->New-> "Import Project...", 选择解压后的目录。</br>
3手机链接Android Studio并打开开发者模式。不了解开发者模式的开发者可浏览器搜索</br>
4此时点击运行按钮手机上会有新app安装完毕运行效果和二维码扫描的一样。</br>
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854049-988414c7-116a-4261-a0c7-2705cc199538.png" width="400"></div>
## 3. 精简版测试
* 考虑部分Android开发板没有摄像头本项目提供了精简版本精简版忽略摄像头等UI逻辑可兼容如无摄像头的开发板测试。
* 精简版对应的测试图像路径,在代码`src/main/java/com.baidu.ai.edge/demo/TestInfer*.java`中进行了设置开发者可以准备图像到对应路径测试也可以修改java代码测试。
* 支持以下硬件环境的精简版测试通用ARM图像分类、物体检测、实例分割、姿态估计、文字识别。
示例代码位于 app 模块下 infertest 目录,修改 app/src/main/AndroidManifest.xml 中的启动 Activity 开启测试。
修改前:
```
<activity android:name=".MainActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
infertest.MainActivity
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
<activity
android:name=".CameraActivity"
android:screenOrientation="portrait" >
</activity>
```
修改后:
```
<!-- 以通用ARM为例 -->
<activity android:name=".infertest.MainActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
```
注意:修改后,因为没有测试数据,需要开发者准备一张测试图像,放到 `app/src/main/asserts/` 路径下,并按照`app/src/main/java/com/baidu/ai/edge/demo/infertest/TestInfer*.java`中的图像命名要求对图像进行命名。
<div align="center">
| Demo APP 检测模型运行示例 | 精简版检测模型运行示例 |
| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
| ![Demo APP](https://user-images.githubusercontent.com/54695910/175855181-595fd449-7351-4ec6-a3b8-68c021b152f6.jpeg) | ![精简版](https://user-images.githubusercontent.com/54695910/175855176-075f0c8a-b05d-4d60-a2a1-3f0204c6386e.jpeg) |
</div>
# SDK使用说明
本节介绍如何将 SDK 接入开发者的项目中使用。
## 1. 集成指南
步骤一:依赖库集成
步骤二:添加必要权限
步骤三:混淆配置(可选)
### 1.1 依赖库集成
A. 项目中未集成其他 jar 包和 so 文件:
```
// 1. 复制 app/libs 至项目的 app/libs 目录
// 2. 参考 app/build.gradle 配置 NDK 可用架构和 so 依赖库目录
android {
...
defaultConfig {
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a'
}
}
sourceSets {
main {
jniLibs.srcDirs = ['libs']
}
}
}
```
B. 项目中已集成其他 jar 包,未集成 so 文件:
```
// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录
// 2. 复制 app/libs 下 armeabi-v7a 和 arm64-v8a 目录至 app/src/main/jniLibs 目录下
// 3. 参考 app/build.gradle 配置 NDK 可用架构
android {
...
defaultConfig {
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a'
}
}
}
```
C. 项目中已集成其他 jar 包和 so 文件:
```
// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录
// 2. 融合 app/libs 下 armeabi-v7a 和 arm64-v8a 下的 so 文件与其他同架构 so 文件同目录
// 3. 参考 app/build.gradle 配置 NDK 可用架构
android {
...
defaultConfig {
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a' // 只支持 v7a 和 v8a 两种架构,有其他架构需删除
}
}
}
```
### 1.2 添加权限
参考 app/src/main/AndroidManifest.xml 中配置的权限。
```
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE"/>
<uses-permission android:name="android.permission.INTERNET"/>
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
```
### 1.3 混淆规则(可选)
请不要混淆 jar 包文件,参考 app/proguard-rules.pro 配置。
```
-keep class com.baidu.ai.edge.core.*.*{ *; }
```
## 2. API调用流程示例
以通用ARM的图像分类预测流程为例详细说明请参考后续章节
```
try {
// step 1-1: 准备配置类
InferConfig config = new InferConfig(context.getAssets(), "infer");
// step 1-2: 准备预测 Manager
InferManager manager = new InferManager(context, config, "");
// step 2-1: 准备待预测的图像,必须为 Bitmap.Config.ARGB_8888 格式,一般为默认格式
Bitmap image = getFromSomeWhere();
// step 2-2: 预测图像
List<ClassificationResultModel> results = manager.classify(image, 0.3f);
// step 3: 解析结果
for (ClassificationResultModel resultModel : results) {
Log.i(TAG, "labelIndex=" + resultModel.getLabelIndex()
+ ", labelName=" + resultModel.getLabel()
+ ", confidence=" + resultModel.getConfidence());
}
// step 4: 释放资源。预测完毕请及时释放资源
manager.destroy();
} catch (Exception e) {
Log.e(TAG, e.getMessage());
}
```
### 2.1 初始化
**准备配置类**
芯片与配置类对应关系:
- 通用ARMInferConfig
```
// 示例
// 参数二为芯片对应的模型资源文件夹名称
InferConfig config = new InferConfig(context.getAssets(), "infer");
```
**准备预测 Manager**
芯片与 Manager 对应关系:
- 通用ARMInferManager
```
// 示例
// 参数二为配置类对象
// 参数三保持空字符串即可
InferManager manager = new InferManager(context, config, "");
```
> **注意**
>
> 1. 同一时刻只能有且唯一有效的 Manager若要新建一个 Manager之前创建的 Manager 需先调用 destroy() 销毁;
> 2. Manager 的任何方法都不能在 UI 线程调用;
> 3. Manager 的任何成员变量及方法由于线程同步问题,都必须在同一个线程中执行;
### 2.2 预测图像
本节介绍各种模型类型的预测函数及结果解析。
> **注意**
> 预测函数可以多次调用,但必须在同一个线程中,不支持并发
> 预测函数中的 confidence 非必需,默认使用模型推荐值。填 0 可返回所有结果
> 待预测的图像必须为 Bitmap.Config.ARGB_8888 格式的 Bitmap
**图像分类**
```
// 预测函数
List<ClassificationResultModel> classify(Bitmap bitmap) throws BaseException;
List<ClassificationResultModel> classify(Bitmap bitmap, float confidence) throws BaseException;
// 返回结果
ClassificationResultModel
- label: 分类标签定义在label_list.txt中
- labelIndex: 分类标签对应的序号
- confidence: 置信度0-1
```
**物体检测**
```
// 预测函数
List<DetectionResultModel> detect(Bitmap bitmap) throws BaseException;
List<DetectionResultModel> detect(Bitmap bitmap, float confidence) throws BaseException;
// 返回结果
DetectionResultModel
- label: 标签定义在label_list.txt中
- confidence: 置信度0-1
- bounds: Rect包含左上角和右下角坐标指示物体在图像中的位置
```
**实例分割**
```
// 预测函数
List<SegmentationResultModel> segment(Bitmap bitmap) throws BaseException;
List<SegmentationResultModel> segment(Bitmap bitmap, float confidence) throws BaseException;
// 返回结果
SegmentationResultModel
- label: 标签定义在label_list.txt中
- confidence: 置信度0-1
- lableIndex: 标签对应的序号
- box: Rect指示物体在图像中的位置
- mask: byte[]表示原图大小的01掩码绘制1的像素即可得到当前对象区域
- maskLEcode: mask的游程编码
```
> 关于 maskLEcode 的解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
**姿态估计**
```
// 预测函数
List<PoseResultModel> pose(Bitmap bitmap) throws BaseException;
// 返回结果
PoseResultModel
- label: 标签定义在label_list.txt中
- confidence: 置信度0-1
- points: Pair<Point, Point>, 2个点构成一条线
```
**文字识别**
```
// 预测函数
List<OcrResultModel> ocr(Bitmap bitmap) throws BaseException;
List<OcrResultModel> ocr(Bitmap bitmap, float confidence) throws BaseException;
// 返回结果
OcrResultModel
- label: 识别出的文字
- confidence: 置信度0-1
- points: List<Point>, 文字所在区域的点位
```
# 错误码
| 错误码 | 错误描述 | 详细描述及解决方法 |
| ---- | ------------------------------ | ------------------------------------------------------------------------------------ |
| 1001 | assets 目录下用户指定的配置文件不存在 | SDK可以使用assets目录下config.json作为配置文件。如果传入的config.json不在assets目录下则有此报错 |
| 1002 | 用户传入的配置文件作为json解析格式不准确如缺少某些字段 | 正常情况下demo中的config.json不要修改 |
| 19xx | Sdk内部错误 | 请与百度人员联系 |
| 2001 | XxxxMANAGER 只允许一个实例 | 如已有XxxxMANAGER对象请调用destory方法 |
| 2002 | XxxxMANAGER 已经调用过destory方法 | 在一个已经调用destory方法的DETECT_MANAGER对象上不允许再调用任何方法 |
| 2003 | 传入的assets下模型文件路径为null | XxxxConfig.getModelFileAssetPath() 返回为null。由setModelFileAssetPath(null导致 |
| 2011 | libedge-xxxx.so 加载失败 | System.loadLibrary("edge-xxxx"); libedge-xxxx.so 没有在apk中。CPU架构仅支持armeabi-v7a arm-v8a |
| 2012 | JNI内存错误 | heap的内存不够 |
| 2103 | license过期 | license失效或者系统时间有异常 |
| 2601 | assets 目录下模型文件打开失败 | 请根据报错信息检查模型文件是否存在 |
| 2611 | 检测图片时,传递至引擎的图片二进制与长宽不符合 | 具体见报错信息 |
| 27xx | Sdk内部错误 | 请与百度人员联系 |
| 28xx | 引擎内部错误 | 请与百度人员联系 |
| 29xx | Sdk内部错误 | 请与百度人员联系 |
| 3000 | so加载错误 | 请确认所有so文件存在于apk中 |
| 3001 | 模型加载错误 | 请确认模型放置于能被加载到的合法路径中并确保config.json配置正确 |
| 3002 | 模型卸载错误 | 请与百度人员联系 |
| 3003 | 调用模型错误 | 在模型未加载正确或者so库未加载正确的情况下调用了分类接口 |
| 50xx | 在线模式调用异常 | 请与百度人员联系 |

View File

@@ -1,382 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK 在**Jetson Linux C++** 环境下1 图像和视频 推理部署步骤, 2介绍推理全流程API方便开发者了解项目后二次开发。如果开发者对Jetson的服务化部署感兴趣可以参考[Jetson CPP Serving](./Jetson-Linux-CPP-SDK-Serving.md)文档。
**注意**OCR目前只支持**图像**推理部署。
<!--ts-->
* [简介](#简介)
* [环境要求](#环境要求)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1 预测图像](#21-预测图像)
* [2.2 预测视频流](#22-预测视频流)
* [预测API流程详解](#预测api流程详解)
* [1. SDK参数运行配置](#1-sdk参数运行配置)
* [2. 初始化Predictor](#2-初始化predictor)
* [3. 预测推理](#3-预测推理)
* [3.1 预测图像](#31-预测图像)
* [3.2 预测视频](#32-预测视频)
* [FAQ](#faq)
<!--te-->
# 环境要求
* Jetpack: 4.6安装Jetpack参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]()
| 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- |
| --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
| 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- |
| 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- |
| 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- |
| 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- |
| 5 | 非官方版本如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- |
注意本项目SDK要求 `CUDA=10.2``cuDNN=8.2``TensorRT=8.0``gcc>=7.5``cmake 在 3.0以上` ,安装 Jetpack4.6系统包后CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求无需在进行安装。
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下
```
.EasyEdge-Linux-硬件芯片
├── RES # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
├── ReadMe.txt
├── bin # 可直接运行的二进制文件
├── include # 二次开发用的头文件
├── lib # 二次开发用的所依赖的库
├── src # 二次开发用的示例工程
└── thirdparty # 第三方依赖
```
## 2. 测试Demo
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1 预测图像
```bash
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
```bash
> ./easyedge_image_inference ../../../../RES 2.jpeg
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
Done
```
### 2.2 预测视频流
```
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
```
其中 video_type 支持三种:
```
video_type : 1 // 本地视频文件
video_type : 2 // 摄像头的index
video_type : 3 // 网络视频流
```
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址
```
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 /my_video_file.mp4
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
```
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
# 预测API流程详解
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API如代码step注释所示
> ❗注意:<br>
> 1`src`文件夹中包含完整可编译的cmake工程实例建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
> 2请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
```cpp
// step 1: SDK配置运行参数
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
// step 2: 创建并初始化Predictor这这里选择合适的引擎
auto predictor = global_controller()->CreateEdgePredictor(config);
// step 3-1: 预测图像
auto img = cv::imread({图片路径});
std::vector<EdgeResultData> results;
predictor->infer(img, results);
// step 3-2: 预测视频
std::vector<EdgeResultData> results;
FrameTensor frame_tensor;
VideoConfig video_config;
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
video_config.source_value = video_src;
/*
... more video_configs, 根据需要配置video_config的各选项
*/
auto video_decoding = CreateVideoDecoding(video_config);
while (video_decoding->next(frame_tensor) == EDGE_OK) {
results.clear();
if (frame_tensor.is_needed) {
predictor->infer(frame_tensor.frame, results);
render(frame_tensor.frame, results, predictor->model_info().kind);
}
//video_decoding->display(frame_tensor); // 显示当前frame需在video_config中开启配置
//video_decoding->save(frame_tensor); // 存储当前frame到视频需在video_config中开启配置
}
```
若需自定义library search path或者gcc路径修改对应Demo工程下的CMakeList.txt即可。
## 1. SDK参数运行配置
SDK的参数通过`EdgePredictorConfig::set_config``global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
配置参数使用方法如下:
```
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
```
## 2. 初始化Predictor
* 接口
```cpp
auto predictor = global_controller()->CreateEdgePredictor(config);
predictor->init();
```
若返回非0请查看输出日志排查错误原因。
## 3. 预测推理
### 3.1 预测图像
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
* 接口输入
```cpp
/**
* @brief
* 通用接口
* @param image: must be BGR , HWC format (opencv default)
* @param result
* @return
*/
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
```
图片的格式务必为opencv默认的BGR, HWC格式。
* 接口返回
`EdgeResultData`中可以获取对应的分类信息、位置信息。
```cpp
struct EdgeResultData {
int index; // 分类结果的index
std::string label; // 分类结果的label
float prob; // 置信度
// 物体检测 或 图像分割时使用:
float x1, y1, x2, y2; // (x1, y1): 左上角, x2, y2): 右下角; 均为0~1的长宽比例值。
// 图像分割时使用:
cv::Mat mask; // 0, 1 的mask
std::string mask_rle; // Run Length Encoding游程编码的mask
};
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
*** 关于图像分割mask_rle ***
该字段返回了mask的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
### 3.2 预测视频
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
* 接口输入
class`VideoDecoding`
```
/**
* @brief 获取输入源的下一帧
* @param frame_tensor
* @return
*/
virtual int next(FrameTensor &frame_tensor) = 0;
/**
* @brief 显示当前frame_tensor中的视频帧
* @param frame_tensor
* @return
*/
virtual int display(const FrameTensor &frame_tensor) = 0;
/**
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
* @param frame_tensor
* @return
*/
virtual int save(FrameTensor &frame_tensor) = 0;
/**
* @brief 获取视频的fps属性
* @return
*/
virtual int get_fps() = 0;
/**
* @brief 获取视频的width属性
* @return
*/
virtual int get_width() = 0;
/**
* @brief 获取视频的height属性
* @return
*/
virtual int get_height() = 0;
```
struct `VideoConfig`
```
/**
* @brief 视频源、抽帧策略、存储策略的设置选项
*/
struct VideoConfig {
SourceType source_type; // 输入源类型
std::string source_value; // 输入源地址如视频文件路径、摄像头index、网络流地址
int skip_frames{0}; // 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储对于不满足skip_frames策略的frame把所抽取帧的is_needed置为false
int input_fps{0}; // 在采取抽帧之前设置视频的fps
Resolution resolution{Resolution::kAuto}; // 采样分辨率只对camera有效
bool enable_display{false}; // 默认不支持。
std::string window_name{"EasyEdge"};
bool display_all{false}; // 是否显示所有frame若为false仅显示根据skip_frames抽取的frame
bool enable_save{false};
std::string save_path; // frame存储为视频文件的路径
bool save_all{false}; // 是否存储所有frame若为false仅存储根据skip_frames抽取的frame
std::map<std::string, std::string> conf;
};
```
| 序号 | 字段 | 含义 |
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `source_type` | 输入源类型支持视频文件、摄像头、网络视频流三种值分别为1、2、3 |
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头该值为摄像头的index如对于`/dev/video0`的摄像头则index为0若`source_type`为网络视频流,则为该视频流的完整地址。 |
| 3 | `skip_frames` | 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true标记为is_needed的帧是用来做预测的帧。反之直接跳过该帧不经过预测。 |
| 4 | `retrieve_all` | 若置该项为true则无论是否设置跳帧所有的帧都会被抽取返回以作为显示或存储用。 |
| 5 | `input_fps` | 用于抽帧前设置fps |
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
*** 注意:***
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能需要自行编译带有GTK选项的OpenCV。
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
```
video_config.conf["backend"] = "2";
```
3.部分设备上的CSI摄像头尚未兼容如遇到问题可以通过工单、QQ交流群或微信交流群反馈。
具体接口调用流程可以参考SDK中的`demo_video_inference`。
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
3. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,293 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在**Jetson Linux C++** 环境下1 **服务化**推理部署步骤2介绍推理全流程API方便开发者了解项目后二次开发。如果开发者对Jetson图像/视频部署感兴趣,可以参考[Jetson CPP Inference](./Jetson-Linux-CPP-SDK-Inference.md)文档。
**注意**OCR目前不支持服务化推理部署。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试 HTTP Demo](#2-测试-http-demo)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [HTTP API介绍](#http-api介绍)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
* [3. http 返回数据](#3-http-返回数据)
* [FAQ](#faq)
<!--te-->
# 环境准备
* Jetpack: 4.6 。安装Jetpack 4.6,参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]()
| 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- |
| --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
| 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- |
| 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- |
| 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- |
| 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- |
| 5 | 非官方版本如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- |
注意本项目SDK要求 `CUDA=10.2``cuDNN=8.2``TensorRT=8.0``gcc>=7.5``cmake 在 3.0以上` ,安装 Jetpack4.6系统包后CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求无需在进行安装。
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下
```
.EasyEdge-Linux-硬件芯片
├── RES # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
├── ReadMe.txt
├── bin # 可直接运行的二进制文件
├── include # 二次开发用的头文件
├── lib # 二次开发用的所依赖的库
├── src # 二次开发用的示例工程
└── thirdparty # 第三方依赖
```
## 2. 测试 HTTP Demo
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1 启动HTTP预测服务
```
./easyedge_serving {模型RES文件夹路径}
```
启动后日志中会显示如下设备IP和24401端口号信息
```
HTTP is now serving at 0.0.0.0:24401
```
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
同时可以调用HTTP接口来访问服务具体参考下文的[二次开发](#10)接口说明。
# HTTP API介绍
本章节主要结合[2.1 HTTP Demo]()的API介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端目前支持的能力包括以下几种方式Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
```cpp
/**
* @brief 开启一个简单的demo http服务。
* 该方法会block直到收到sigint/sigterm。
* http服务里图片的解码运行在cpu之上可能会降低推理速度。
* @tparam ConfigT
* @param config
* @param host
* @param port
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
* @param instance_num 实例数量,根据内存/显存/时延要求调整
* @return
*/
template<typename ConfigT>
int start_http_server(
const ConfigT &config,
const std::string &host,
int port,
const std::string &service_id,
int instance_num = 1);
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式一:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
Python请求示例
```Python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
### 2.2 http 请求方法二:使用图片base64格式
HTTP方法POST
Header如下
| 参数 | 值 |
| ------------ | ---------------- |
| Content-Type | application/json |
**Body请求填写**
* 分类网络:
body 中请求示例
```
{
"image": "<base64数据>"
"top_num": 5
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
* 检测和分割网络:
Body请求示例
```
{
"image": "<base64数据>"
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
Python请求示例
```Python
import base64
import requests
def main():
with open("图像路径", 'rb') as f:
result = requests.post("http://{服务ip地址}:24401/", json={
"image": base64.b64encode(f.read()).decode("utf8")
})
# print(result.request.body)
# print(result.request.headers)
print(result.content)
if __name__ == '__main__':
main()
```
## 3. http 返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于分割模型 ***
其中mask为分割模型的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 使用libcurl请求http服务时速度明显变慢
这是因为libcurl请求continue导致server等待数据的问题添加空的header即可
```bash
headers = curl_slist_append(headers, "Expect:");
```
3. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
4. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,412 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK**Intel X86-CPU/ NVIDIA GPU、Linux** 操作系统下的C++ 1图像和视频的推理部署步骤2介绍推理全流程API方便了解项目后二次开发。如果对Linux操作系统下的 Python部署感兴趣请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Inference.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. 硬件支持](#1-硬件支持)
* [2. 软件环境](#2-软件环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1. 预测图像](#21-预测图像)
* [2.2. 预测视频流](#22-预测视频流)
* [3. 编译Demo](#3-编译demo)
* [预测API流程详解](#预测api流程详解)
* [1. SDK参数运行配置](#1-sdk参数运行配置)
* [2. 初始化Predictor](#2-初始化predictor)
* [3. 预测推理](#3-预测推理)
* [3.1 预测图像](#31-预测图像)
* [3.2 预测视频](#32-预测视频)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1.硬件支持
* NVIDIA GPU: x86_64
* cuda支持版本CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5
* cuda支持版本CUDA11.0 + cuDNN v8.0.4
* CPUIntel x86_64
## 2. 软件环境
1.运行二进制文件-环境要求
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
* Linux下查看gcc版本命名可能因系统差异命令会不同`gcc --version`
* Linux下C++基础库GLIBCXX的命令可能因系统差异路径会有不同可检测自己环境下的情况`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
* glibc2.23以上
* Linux查看命令`ldd --version`
2.二次开发编译-环境要求
编译源代码时除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`还需要cmake满足要求。
* cmake: 3.0 以上
* Linux查看命令`cmake --version`
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下
```
.EasyEdge-Linux-硬件芯片
├── RES # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
├── ReadMe.txt
├── bin # 可直接运行的二进制文件
├── include # 二次开发用的头文件
├── lib # 二次开发用的所依赖的库
├── src # 二次开发用的示例工程
└── thirdparty # 第三方依赖
└── python # Python SDK 文件
```
## 2. 测试Demo
**注意** OCR算法目前没有提供
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1. 预测图像
```bash
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
```bash
> ./easyedge_image_inference ../../../../RES 2.jpeg
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
Done
```
### 2.2. 预测视频流
```
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
```
其中 video_type 支持三种:
```
video_type : 1 // 本地视频文件
video_type : 2 // 摄像头的index
video_type : 3 // 网络视频流
```
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址
```
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 /my_video_file.mp4
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
```
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
## 3. 编译Demo
通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件 由`src`下的对应文件编译得到。 通过以下命令,即可完成`src`下的源码编译。
```
cd src
mkdir build && cd build
cmake .. && make
```
至此会在build文件夹下生成编译好的可执行文件如图像推理的二进制文件`build/demo_image_inference/easyedge_image_inference`
# 预测API流程详解
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API查看下面的cpp代码中的step注释说明。
> ❗注意:<br>
> 1`src`文件夹中包含完整可编译的cmake工程实例建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
> 2请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
```cpp
// step 1: SDK配置运行参数
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
// step 2: 创建并初始化Predictor这这里选择合适的引擎
auto predictor = global_controller()->CreateEdgePredictor(config);
// step 3-1: 预测图像
auto img = cv::imread({图片路径});
std::vector<EdgeResultData> results;
predictor->infer(img, results);
// step 3-2: 预测视频
std::vector<EdgeResultData> results;
FrameTensor frame_tensor;
VideoConfig video_config;
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
video_config.source_value = video_src;
/*
... more video_configs, 根据需要配置video_config的各选项
*/
auto video_decoding = CreateVideoDecoding(video_config);
while (video_decoding->next(frame_tensor) == EDGE_OK) {
results.clear();
if (frame_tensor.is_needed) {
predictor->infer(frame_tensor.frame, results);
render(frame_tensor.frame, results, predictor->model_info().kind);
}
//video_decoding->display(frame_tensor); // 显示当前frame需在video_config中开启配置
//video_decoding->save(frame_tensor); // 存储当前frame到视频需在video_config中开启配置
}
```
若需自定义library search path或者gcc路径修改对应Demo工程下的CMakeList.txt即可。
## 1. SDK参数运行配置
SDK的参数通过`EdgePredictorConfig::set_config``global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
配置参数使用方法如下:
```
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
```
## 2. 初始化Predictor
* 接口
```cpp
auto predictor = global_controller()->CreateEdgePredictor(config);
predictor->init();
```
若返回非0请查看输出日志排查错误原因。
## 3. 预测推理
### 3.1 预测图像
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
* 接口输入
```cpp
/**
* @brief
* 通用接口
* @param image: must be BGR , HWC format (opencv default)
* @param result
* @return
*/
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
```
图片的格式务必为opencv默认的BGR, HWC格式。
* 接口返回
`EdgeResultData`中可以获取对应的分类信息、位置信息。
```cpp
struct EdgeResultData {
int index; // 分类结果的index
std::string label; // 分类结果的label
float prob; // 置信度
// 物体检测 或 图像分割时使用:
float x1, y1, x2, y2; // (x1, y1): 左上角, x2, y2): 右下角; 均为0~1的长宽比例值。
// 图像分割时使用:
cv::Mat mask; // 0, 1 的mask
std::string mask_rle; // Run Length Encoding游程编码的mask
};
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
*** 关于图像分割mask_rle ***
该字段返回了mask的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
### 3.2 预测视频
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
* 接口输入
class`VideoDecoding`
```
/**
* @brief 获取输入源的下一帧
* @param frame_tensor
* @return
*/
virtual int next(FrameTensor &frame_tensor) = 0;
/**
* @brief 显示当前frame_tensor中的视频帧
* @param frame_tensor
* @return
*/
virtual int display(const FrameTensor &frame_tensor) = 0;
/**
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
* @param frame_tensor
* @return
*/
virtual int save(FrameTensor &frame_tensor) = 0;
/**
* @brief 获取视频的fps属性
* @return
*/
virtual int get_fps() = 0;
/**
* @brief 获取视频的width属性
* @return
*/
virtual int get_width() = 0;
/**
* @brief 获取视频的height属性
* @return
*/
virtual int get_height() = 0;
```
struct `VideoConfig`
```
/**
* @brief 视频源、抽帧策略、存储策略的设置选项
*/
struct VideoConfig {
SourceType source_type; // 输入源类型
std::string source_value; // 输入源地址如视频文件路径、摄像头index、网络流地址
int skip_frames{0}; // 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储对于不满足skip_frames策略的frame把所抽取帧的is_needed置为false
int input_fps{0}; // 在采取抽帧之前设置视频的fps
Resolution resolution{Resolution::kAuto}; // 采样分辨率只对camera有效
bool enable_display{false}; // 默认不支持。
std::string window_name{"EasyEdge"};
bool display_all{false}; // 是否显示所有frame若为false仅显示根据skip_frames抽取的frame
bool enable_save{false};
std::string save_path; // frame存储为视频文件的路径
bool save_all{false}; // 是否存储所有frame若为false仅存储根据skip_frames抽取的frame
std::map<std::string, std::string> conf;
};
```
| 序号 | 字段 | 含义 |
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `source_type` | 输入源类型支持视频文件、摄像头、网络视频流三种值分别为1、2、3 |
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头该值为摄像头的index如对于`/dev/video0`的摄像头则index为0若`source_type`为网络视频流,则为该视频流的完整地址。 |
| 3 | `skip_frames` | 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true标记为is_needed的帧是用来做预测的帧。反之直接跳过该帧不经过预测。 |
| 4 | `retrieve_all` | 若置该项为true则无论是否设置跳帧所有的帧都会被抽取返回以作为显示或存储用。 |
| 5 | `input_fps` | 用于抽帧前设置fps |
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
*** 注意:***
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能需要自行编译带有GTK选项的OpenCV。
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
```
video_config.conf["backend"] = "2";
```
3.部分设备上的CSI摄像头尚未兼容如遇到问题可以通过工单、QQ交流群或微信交流群反馈。
具体接口调用流程可以参考SDK中的`demo_video_inference`。
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
3. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,329 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在**X86 CPU/ NVIDIA GPU、Linux操作系统** 的C++环境1HTTP服务化推理部署步骤2介绍推理全流程API方便开发者了解项目后二次开发。
如果开发者对Python语言的相关能力感兴趣可以参考Linux Python请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Serving.md)文档。
**【注意】**OCR Demo 暂不支持服务化部署。
<!--ts-->
* [简介](#简介)
* [安装准备](#安装准备)
* [1. 硬件支持](#1-硬件支持)
* [2. 软件环境](#2-软件环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试 HTTP Demo](#2-测试-http-demo)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [3. 编译Demo](#3-编译demo)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
* [3. http返回数据](#3-http返回数据)
* [FAQ](#faq)
<!--te-->
# 安装准备
## 1.硬件支持
- NVIDIA GPU: x86_64
- cuda支持版本CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5
- cuda支持版本CUDA11.0 + cuDNN v8.0.4
- CPUIntel x86_64
## 2. 软件环境
1.运行二进制文件-环境要求
- gcc: 5.4 以上 (GLIBCXX_3.4.22)
- Linux下查看gcc版本命名可能因系统差异命令会不同`gcc --version`
- Linux下C++基础库GLIBCXX的命令可能因系统差异路径会有不同可检测自己环境下的情况`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
- glibc2.23以上
- Linux查看命令`ldd --version`
2.二次开发编译-环境要求
编译源代码时除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`还需要cmake满足要求。
- cmake: 3.0 以上
- Linux查看命令`cmake --version`
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下
```
.EasyEdge-Linux-硬件芯片
├── RES # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
├── ReadMe.txt
├── cpp # C++ SDK 文件结构
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
├── ReadMe.txt
├── bin # 可直接运行的二进制文件
├── include # 二次开发用的头文件
├── lib # 二次开发用的所依赖的库
├── src # 二次开发用的示例工程
└── thirdparty # 第三方依赖
└── python # Python SDK 文件
```
```
```
## 2. 测试 HTTP Demo
> 模型资源文件即压缩包中的RES文件夹默认已经打包在开发者下载的SDK包中请先将tar包整体拷贝到具体运行的设备中再解压缩使用。
SDK中已经包含预先编译的二进制可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
### 2.1. 启动HTTP预测服务
```
./easyedge_serving {模型RES文件夹路径}
```
启动后日志中会显示如下设备IP和24401端口号信息
```
HTTP is now serving at 0.0.0.0:24401
```
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
同时可以调用HTTP接口来访问服务具体参考下文的[二次开发](#10)接口说明。
## 3. 编译Demo
通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的。 该部分说明C++编译命令。
```
cd src
mkdir build && cd build
cmake .. && make
```
至此会在build文件夹下生成编译好的可执行文件如图像推理的二进制文件`build/demo_serving/easyedge_serving`
# HTTP API流程详解
本章节主要结合[2.1 HTTP Demo](#4)的API介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端目前支持的能力包括以下几种方式Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
```cpp
/**
* @brief 开启一个简单的demo http服务。
* 该方法会block直到收到sigint/sigterm。
* http服务里图片的解码运行在cpu之上可能会降低推理速度。
* @tparam ConfigT
* @param config
* @param host
* @param port
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
* @param instance_num 实例数量,根据内存/显存/时延要求调整
* @return
*/
template<typename ConfigT>
int start_http_server(
const ConfigT &config,
const std::string &host,
int port,
const std::string &service_id,
int instance_num = 1);
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式一:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
Python请求示例
```Python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
### 2.2 http 请求方法二:使用图片base64格式
HTTP方法POST
Header如下
| 参数 | 值 |
| ------------ | ---------------- |
| Content-Type | application/json |
**Body请求填写**
* 分类网络:
body 中请求示例
```
{
"image": "<base64数据>"
"top_num": 5
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
* 检测和分割网络:
Body请求示例
```
{
"image": "<base64数据>"
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
Python请求示例
```python
import base64
import requests
def main():
with open("图像路径", 'rb') as f:
result = requests.post("http://{服务ip地址}:24401/", json={
"image": base64.b64encode(f.read()).decode("utf8")
})
# print(result.request.body)
# print(result.request.headers)
print(result.content)
if __name__ == '__main__':
main()
```
### 3. http返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于分割模型 ***
其中mask为分割模型的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
# FAQ
1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
2. 使用libcurl请求http服务时速度明显变慢
这是因为libcurl请求continue导致server等待数据的问题添加空的header即可
```bash
headers = curl_slist_append(headers, "Expect:");
```
3. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
4. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译。

View File

@@ -1,369 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: 1)图像推理部署步骤; 2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中Linux C++请参考[Linux CPP环境下的推理部署](./Linux-CPP-SDK-Inference.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. Python环境](#2-python环境)
* [3. 安装依赖](#3-安装依赖)
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1. 文件结构说明](#1-文件结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1 预测图像](#21-预测图像)
* [预测API流程详解](#预测api流程详解)
* [1. 基础流程](#1-基础流程)
* [2. 初始化](#2-初始化)
* [3. SDK参数配置](#3-sdk参数配置)
* [4. 预测图像](#4-预测图像)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下
```shell
EasyEdge-Linux-x86-[部署芯片]
├── RES # 模型文件资源文件夹,可替换为其他模型
├── README.md
├── cpp # C++ SDK
└── python # Python SDK
```
## 2. Python环境
> 当前SDK仅支持Python 3.5, 3.6, 3.7
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
```shell
$python3 --version
```
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
$python3 -m pip --version
```
## 3. 安装依赖
### 3.1 安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。
`x86_64 CPU` 平台可以使用如下命令进行安装:
```shell
python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
```
NVIDIA GPU平台的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
> 使用 NVIDIA GPU 预测时,必须满足:
>
> 1. 机器已安装 cuda, cudnn
> 2. 已正确安装对应 cuda 版本的paddle 版本
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
### 3.2 安装EasyEdge Python Wheel 包
`python`目录下安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU``x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl
```
`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{版本号}-cp36-cp36m-linux_aarch64.whl
```
# 快速开始
## 1. 文件结构说明
Python SDK文件结构如下
```shell
EasyEdge-Linux-x86--[部署芯片]
├──...
├──python # Linux Python SDK
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl
├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl
├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl
├── infer_demo # demo体验完整文件
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ └── demo_serving.py # 提供http服务的demo文件
├── tensor_demo # tensor in/out demo文件
│ └── demo_xxx.py
```
## 2. 测试Demo
> 模型资源文件默认已经打包在开发者下载的SDK包中 默认为`RES`目录。
### 2.1 预测图像
使用infer_demo文件夹下的demo文件。
```bash
python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
```shell
2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine...
2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2
{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'}
```
可以看到,运行结果为`index8labelhen`通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
# 预测API流程详解
本章节主要结合前文的Demo示例来介绍推理API方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`infer_demo/demo_xx_xx.py`文件查看下面的Python代码中的step注释说明。
## 1. 基础流程
> ❗注意请优先参考SDK中自带demo的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
`infer_demo/demo_xx_xx.py`
```python
# 引入EasyEdge运行库
import BaiduAI.EasyEdge as edge
# 创建并初始化一个预测Progam选择合适的引擎
pred = edge.Program()
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
# 预测图像
res = pred.infer_image({numpy.ndarray的图片})
# 关闭结束预测Progam
pred.close()
```
`infer_demo/demo_serving.py`
```python
import BaiduAI.EasyEdge as edge
from BaiduAI.EasyEdge.serving import Serving
# 创建并初始化Http服务
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
# 运行Http服务
# 请参考同级目录下demo_xx_xx.py里:
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
# 对以下参数device\device_id和engine进行修改
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
```
## 2. 初始化
- 接口
```python
def init(self,
model_dir,
device=Device.CPU,
engine=Engine.PADDLE_FLUID,
config_file='conf.json',
preprocess_file='preprocess_args.json',
model_file='model',
params_file='params',
label_file='label_list.txt',
infer_cfg_file='infer_cfg.json',
device_id=0,
thread_num=1
):
"""
Args:
model_dir: str
device: BaiduAI.EasyEdge.Device比如Device.CPU
engine: BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID
config_file: str
preprocess_file: str
model_file: str
params_file: str
label_file: str 标签文件
infer_cfg_file: 包含预处理、后处理信息的文件
device_id: int 设备ID
thread_num: int CPU的线程数
Raises:
RuntimeError, IOError
Returns:
bool: True if success
"""
```
若返回不是True请查看输出日志排查错误原因。
## 3. SDK参数配置
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
```python
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
```
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如
```python
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
```
## 4. 预测图像
- 接口
```python
def infer_image(self, img,
threshold=0.3,
channel_order='HWC',
color_format='BGR',
data_type='numpy')
"""
Args:
img: np.ndarray or bytes
threshold: float
only return result with confidence larger than threshold
channel_order: string
channel order HWC or CHW
color_format: string
color format order RGB or BGR
data_type: string
仅在图像分割时有意义。 'numpy' or 'string'
'numpy': 返回已解析的mask
'string': 返回未解析的mask游程编码
Returns:
list
"""
```
| 字段 | 类型 | 取值 | 说明 |
| ---------- | -------------------- | --------- | ------------------------ |
| confidence | float | 0~1 | 分类或检测的置信度 |
| label | string | | 分类或检测的类别 |
| index | number | | 分类或检测的类别 |
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
| mask | string/numpy.ndarray | 图像分割的mask | |
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
***结果示例***
i) 图像分类
```json
{
"index": 736,
"label": "table",
"confidence": 0.9
}
```
ii) 物体检测
```json
{
"index": 8,
"label": "cat",
"confidence": 1.0,
"x1": 0.21289,
"y1": 0.12671,
"x2": 0.91504,
"y2": 0.91211,
}
```
iii) 图像分割
```json
{
"name": "cat",
"score": 1.0,
"location": {
"left": ...,
"top": ...,
"width": ...,
"height": ...,
},
"mask": ...
}
```
mask字段中data_type为`numpy`时,返回图像掩码的二维数组
```text
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
data_type为`string`mask的游程编码解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
# FAQ
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
       安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,268 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: 1)SDK **服务化**推理部署步骤; 2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中Linux C++请参考[Linux C++环境下的服务化推理部署](./Linux-CPP-SDK-Serving.md)文档。
**【注意】**OCR Demo 暂不支持服务化部署。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. Python环境](#2-python环境)
* [3. 安装依赖](#3-安装依赖)
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1. 文件结构说明](#1-文件结构说明)
* [2. 测试Serving服务](#2-测试serving服务)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
* [3. http 返回数据](#3-http-返回数据)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下
```shell
EasyEdge-Linux-x86-[部署芯片]
├── RES # 模型文件资源文件夹,可替换为其他模型
├── README.md
├── cpp # C++ SDK
└── python # Python SDK
```
## 2. Python环境
> 当前SDK仅支持Python 3.5, 3.6, 3.7
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
```shell
$python3 --version
```
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
$python3 -m pip --version
```
## 3. 安装依赖
### 3.1 安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。
1.`x86_64 CPU` 平台可以使用如下命令进行安装:
```shell
python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
```
2.`x86_64 NVIDIA GPU` 支持的CUDA和CUDNN版本与PaddlePaddle框架保持一致如下
* CUDA 工具包10.1/10.2配合cuDNN 7 (cuDNN版本>=7.6.5, 如需多卡支持需配合NCCL2.7及更高)
* CUDA 工具包11.0配合cuDNN v8.0.4(如需多卡支持需配合NCCL2.7及更高)
* CUDA 工具包11.1配合cuDNN v8.1.1(如需多卡支持需配合NCCL2.7及更高)
* CUDA 工具包11.2配合cuDNN v8.1.1(如需多卡支持需配合NCCL2.7及更高)
具体安装命令,参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
> 使用 NVIDIA GPU 预测时,必须满足:
>
> 1. 机器已安装 cuda, cudnn
> 2. 已正确安装对应 cuda 版本的paddle 版本
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
### 3.2 安装EasyEdge Python Wheel 包
在`python`目录下安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl
```
`armv8 CPU平台`可以使用如下命令进行安装:
```shell
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp36-cp36m-linux_aarch64.whl
```
# 快速开始
## 1. 文件结构说明
Python SDK文件结构如下
```shell
EasyEdge-Linux-x86--[部署芯片]
├──...
├──python # Linux Python SDK
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl
├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl
├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl
├── infer_demo # demo体验完整文件
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ └── demo_serving.py # 提供http服务的demo文件
├── tensor_demo # 学习自定义算法前后处理时使用
│ └── demo_xxx.py
```
## 2. 测试Serving服务
> 模型资源文件默认已经打包在开发者下载的SDK包中 默认为`RES`目录。
### 2.1 启动HTTP预测服务
指定对应的模型文件夹(默认为`RES`、设备ip和指定端口号运行如下命令。
```shell
python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401}
```
成功启动后,终端中会显示如下字样。
```shell
...
* Running on {host ip}:24401
```
如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。
# HTTP API流程详解
本章节主要结合前文的Demo示例来对API进行介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考对应的Python文件。http服务包含服务端和客户端Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动使用`demo_serving.py`文件
```python
class Serving(object):
""" SDK local serving """
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
self.program = None
self.model_dir = model_dir
self.model_filename = model_filename
self.params_filename = params_filename
self.program_lock = threading.Lock()
self.license_key = license
# 只有ObjectTracking会初始化video_processor
self.video_processor = None
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
""" Args: host : str port : str device : BaiduAI.EasyEdge.Device比如Device.CPU engine : BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID """
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容。
Python请求示例
```python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
## 3. http 返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
***关于分割模型***
其中mask为分割模型的游程编码解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
# FAQ
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,266 +0,0 @@
<a name="0"></a>
# 简介
本文档介绍如何将FastDeploy的Demo模型替换成开发者自己训练的AI模型。**注意**FastDeploy下载的SDK和Demo仅支持相同算法模型的替换。本文档要求开发者已经将Demo和SDK运行跑通如果要了解运行跑通Demo和SDK指导文档可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk使用)
* [简介](#0)<br>
* [模型替换](#1)<br>
* [1.模型准备](#2)<br>
* [1.1 Paddle模型](#3)<br>
* [1.2 Paddle OCR模型增加一步特殊转换](#4)<br>
* [1.2.1 下载模型转换工具](#5)<br>
* [1.2.2 下载模型转换工具](#6)<br>
* [1.3 其他框架模型](#7)<br>
* [2.模型名修改和label文件准备](#8)<br>
* [2.1 非OCR模型名修改](#9)<br>
* [2.2 OCR模型名修改](#10)<br>
* [2.3 模型label文件](#11)<br>
* [3.修改配置文件](#12)<br>
* [测试效果](#13)<br>
* [完整配置文件说明](#14)<br>
* [1.配置文件字段含义](#15)<br>
* [2.预处理顺序](#16)<br>
* [FAQ](#17)<br>
**注意事项:**
1. PP-PicoDet模型 在FastDeploy中支持PP-Picodet模型是将后处理写到网络里面的方式即后处理+NMS都在网络结构里面。Paddle Detection导出静态模型时有3种方法选择将后处理和NMS导入到网络里面即可参考[导出部分](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet#%E5%AF%BC%E5%87%BA%E5%8F%8A%E8%BD%AC%E6%8D%A2%E6%A8%A1%E5%9E%8B)。详细网络区别可以通过netron工具对比。
2. PP-Picodet模型在FastDeploy中支持PP-Picodet模型是将前处理写在网络外面的方式。Paddle Detection中的TinyPose算法中会将PP-PicoDet模型的前处理写入网络中。如果要使用FastDeploy的SDK进行模型替换需要将前处理写到网络外面。参考[Detection中的导出命令](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/keypoint/tiny_pose#%E5%B0%86%E8%AE%AD%E7%BB%83%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%AE%9E%E7%8E%B0%E7%AB%AF%E4%BE%A7%E9%83%A8%E7%BD%B2)将TestReader.fuse_normalize=False即可
<a name="1"></a>
# 模型替换
开发者从PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等飞桨开发套件导出来的对应模型完成 [1.模型准备](#)、[1.模型名修改和模型label](#)、[3.修改配置文件](#) 3步操作需要相同算法才可替换可完成自定义模型的模型文件运行时指定新的模型文件即可在自己训练的模型上实现相应的预测推理任务。
* Linux下模型资源文件夹路径`EasyEdge-Linux-**/RES/`
* Windows下模型资源文件夹路径`EasyEdge-Windows-**/data/model/`
* Android下模型资源文件夹路径`EasyEdge-Android-**/app/src/assets/infer/`` app/src/assets/demo/conf.json`
* iOS下模型资源文件夹路径`EasyEdge-iOS-**/RES/easyedge/`
主要涉及到下面4个模型相关的文件mode、params、label_list.txt、infer_cfg.json和一个APP名相关的配置文件仅Android、iOS、HTTP需要APP名字非必需。
* ```
├── RES、model、infer # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
```
> ❗注意OCR模型在ARM CPU硬件上包括Android、Linux、iOS 三款操作系统),因为任务的特殊性,替换在 [1.模型准备](#)、[1.模型名修改和模型label](#) 不同于其他任务模型,详细参考下面步骤。
<a name="2"></a>
## 1.模型准备
<a name="3"></a>
### 1.1 Paddle模型
* 通过PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等导出来飞桨模型文件包括如下文件可能存在导出时修改了名字的情况后缀`.pdmodel`为模型网络结构文件,后缀`.pdiparams`为模型权重文件):
```
model.pdmodel # 模型网络结构
model.pdiparams # 模型权重
model.yml # 模型的配置文件(包括预处理参数、模型定义等)
```
<a name="4"></a>
### 1.2 OCR模型特殊转换仅在ARM CPU上需要
因为推理引擎版本的问题OCR模型需要在[1.1 Paddle模型](#3)导出`.pdmodel`和`.pdiparams`模型后多增加一步模型转换的特殊处理主要执行下面2步
<a name="5"></a>
#### 1.2.1 下载模型转换工具
Linux 模型转换工具下载链接:[opt_linux](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_linux)</br>
M1 模型转换工具下载链接:[opt_m1](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_m1)</br>
mac 模型转换工具下载链接:[opt_mac](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_mac)</br>
<a name="6"></a>
#### 1.2.2 模型转换
以下命令以mac为例完成模型转换。
```
* 转换 OCR 检测模型命名:
./opt_mac --model_dir=./ch_PP-OCRv3_det_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_det
* 转换 OCR 识别模型命名:
./opt_mac --model_dir=./ch_PP-OCRv3_rec_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_rec
```
产出:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175856746-501b05ad-6fba-482e-8e72-fdd68fe52101.png" width="400"></div>
<a name="7"></a>
### 1.3 其他框架模型
* 如果开发着是PyTorch、TensorFLow、Caffe、ONNX等其他框架模型可以参考[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)官网完成模型转换,即可得到对应的`model.pdmodel`和`model.pdiparams`模型文件。
<a name="8"></a>
## 2.模型名修改和label文件准备
<a name="9"></a>
### 2.1 非OCR模型名修改
按照下面的规则,修改套件导出来的模型名和标签文件,并替换到模型资源文件中。
```
1. model.pdmodel 修改成 model
2. model.pdiparams 修改成 params
```
<a name="10"></a>
### 2.2 OCR模型名修改
```
1. ocr_det.nb 修改成 model # 将 检测模型 修改名称成 model
2. ocr_rec.nb 修改成 params # 将 识别模型 修改名称成 model
```
<a name="11"></a>
### 2.3 模型label文件
同时需要准备模型文件对应的label文件`label_list.txt`。label文件可以参考原Demo中`label_list.txt`的格式准备。
<a name="12"></a>
## 3. 修改模型相关配置文件
1infer_cfg.json 文件修改
所有程序开发者都需要关注该配置文件。开发者在自己数据/任务中训练模型,可能会修改输入图像尺寸、修改阈值等操作,因此需要根据训练情况修改`Res文件夹下的infer_cfg.json`文件中的对应。CV任务涉及到的配置文件修改包括如下字段
```
1. "best_threshold": 0.3, #网络输出的阈值,根据开发者模型实际情况修改
2. "resize": [512, 512], #[w, h]网络输入图像尺寸,用户根据实际情况修改。
```
2conf.json 文件修改
仅Android、iOS、HTTP服务应用开发者需要关注该配置文件。开发者根据自己应用程序命名需要参考已有`conf.json`即可。
通常开发者修改FastDeploy项目中的模型涉及到主要是这几个配置信息的修改。FastDeploy详细的配置文件介绍参考[完整配置文件说明](#8)。
<a name="13"></a>
# 测试效果
将自定义准备的`RES`文件按照第2、3步完成修改后参考可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk%E4%BD%BF%E7%94%A8)完成自己模型上的不同预测体验。
<a name="14"></a>
# 完整配置文件说明
<a name="15"></a>
## 1. 配置文件字段含义
模型资源文件`infer_cfg.json`涉及到大量不同算法的前后处理等信息,下表是相关的字段介绍,通常开发者如果没有修改算法前出处理,不需要关心这些字段。非标记【必须】的可不填。
```json
{
"version": 1,
"model_info": {
"best_threshold": 0.3, // 默认0.3
"model_kind": 1, // 【必须】 1-分类2-检测6-实例分割12-追踪14-语义分割401-人脸402-姿态10001-决策
},
"pre_process": { // 【必须】
// 归一化, 预处理会把图像 (origin_img - mean) * scale
"skip_norm": false, // 默认为false, 如果设置为true不做mean scale处理
"mean": [123, 123, 123], // 【必须一般不需要动】图像均值已经根据Paddle套件均值做了转换处理开发者如果没有修改套件参数可以不用关注。X-mean/ scale
"scale": [0.017, 0.017, 0.017], // 【必须,一般不需要动】
"color_format": "RGB", // BGR 【必须,一般不需要动】
"channel_order": "CHW", // HWC
// 大小相关
"resize": [300, 300], // w, h 【必须】
"rescale_mode": "keep_size", // 默认keep_size keep_ratio, keep_ratio2, keep_raw_size, warp_affine
"max_size": 1366, // keep_ratio 用。如果没有提供,则用 resize[0]
"target_size": 800, // keep_ratio 用。如果没有提供,则用 resize[1]
"raw_size_range": [100, 10000], // keep_raw_size 用
"warp_affine_keep_res": // warp_affine模式使用默认为false
"center_crop_size": [224, 224] // w, h, 如果需要做center_crop则提供否则无需提供该字段
"padding": false,
"padding_mode": "padding_align32", // 【非必须】默认padding_align32, 其他可指定padding_fill_size
"padding_fill_size": [416, 416], // 【非必须】仅padding_fill_size模式下需要提供, [fill_size_w, fill_size_h], 这里padding fill对齐paddle detection实现在bottom和right方向实现补齐
"padding_fill_value": [114, 114, 114] // 【非必须】仅padding_fill_size模式下需要提供
// 其他
"letterbox": true,
},
"post_process": {
"box_normed": true, // 默认为true, 如果为false 则表示该模型的box坐标输出不是归一化的
}
}
```
<a name="16"></a>
## 2. 预处理顺序(没有的流程自动略过)
1. 灰度图 -> rgb图变换
2. resize 尺寸变换
3. center_crop
4. rgb/bgr变换
5. padding_fill_size
6. letterbox画个厚边框填上黑色
7. chw/hwc变换
8. 归一化mean, scale
9. padding_align32
rescale_mode说明
* keep_size: 将图片缩放到resize指定的大小
* keep_ratio:将图片按比例缩放长边不超过max_size短边不超过target_size
* keep_raw_size:保持原图尺寸但必须在raw_size_range之间
* warp_affine: 仿射变换可以设置warp_affine_keep_res指定是否keep_res在keep_res为false场景下宽高通过resize字段指定
<a name="17"></a>
# FAQ
### 1. 如何处理一些 undefined reference / error while loading shared libraries?
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
遇到该问题时请找到具体的库的位置设置LD_LIBRARY_PATH或者安装缺少的库。
> 示例一libverify.so.1: cannot open shared object file: No such file or directory
> 链接找不到libveirfy.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
> 示例二libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
> 链接找不到libopencv_videoio.so文件一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
> 示例三GLIBCXX_X.X.X not found
> 链接无法找到glibc版本请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
### 2. 使用libcurl请求http服务时速度明显变慢
这是因为libcurl请求continue导致server等待数据的问题添加空的header即可
```bash
headers = curl_slist_append(headers, "Expect:");
```
### 3. 运行二进制时,提示 libverify.so cannot open shared object file
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后再运行
```bash
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
```
### 4. 编译时报错file format not recognized
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中再解压缩、编译

View File

@@ -1,389 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例介绍FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows C++** 环境下1SDK 图像和视频推理部署步骤2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Inference.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. CPP环境](#2-cpp环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试EasyEdge服务](#2-测试easyedge服务)
* [3. 预测图像](#3-预测图像)
* [4. 预测视频流](#4-预测视频流)
* [5. 编译Demo](#5-编译demo)
* [预测API流程详解](#预测api流程详解)
* [1. SDK参数运行配置](#1-sdk参数运行配置)
* [2. 初始化Predictor](#2-初始化predictor)
* [3. 预测推理](#3-预测推理)
* [3.1 预测图像](#31-预测图像)
* [3.2 预测视频](#32-预测视频)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-项目结构说明)介绍。
## 2. CPP环境
> 建议使用Microsoft Visual Studio 2015及以上版本获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。
# 快速开始
## 1. 项目结构说明
```shell
EasyEdge-win-xxx
├── data
│ ├── model # 模型文件资源文件夹,可替换为其他模型
│ └── config # 配置文件
├── bin # demo二进制程序
│ ├── xxx_image # 预测图像demo
│ ├── xxx_video # 预测视频流demo
│ └── xxx_serving # 启动http预测服务demo
├── dll # demo二进制程序依赖的动态库
├── ... # 二次开发依赖的文件
├── python # Python SDK文件
├── EasyEdge.exe # EasyEdge服务
└── README.md # 环境说明
```
## 2. 测试EasyEdge服务
> 模型资源文件默认已经打包在开发者下载的SDK包中请先将zip包整体拷贝到具体运行的设备中再解压缩使用。
SDK下载完成后双击打开EasyEdge.exe启动推理服务输入要绑定的Host ip及端口号Port点击启动服务。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854086-d507c288-56c8-4fa9-a00c-9d3cfeaac1c8.png" alt="图片" style="zoom: 67%;" />
</div>
服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:67%;" />
</div>
## 3. 预测图像
除了通过上述方式外您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下点击右键选择"在终端打开",执行如下命令。
> 需要将bin目录下的可执行文件移动到dll目录下执行或者将dll目录添加到系统环境变量中。
```bash
.\easyedge_image_inference {模型model文件夹} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
```shell
2022-06-20 10:36:57,602 INFO [EasyEdge] 9788 EasyEdge Windows Development Kit 1.5.2(Build CPU.Generic 20220607) Release
e[37m--- Fused 0 subgraphs into layer_norm op.e[0m
2022-06-20 10:36:58,008 INFO [EasyEdge] 9788 Allocate graph success.
Results of image ..\demo.jpg:
8, n01514859 hen, p:0.953429
save result image to ..\demo.jpg.result-cpp.jpg
Done
```
可以看到,运行结果为`index8labelhen`通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
## 4. 预测视频流
```
.\easyedge_video_inference {模型model文件夹} {video_type} {video_src}
```
其中video_type支持三种视频流类型它们分别是1本地视频文件 2本地摄像头id3网络视频流地址。
```
/**
* @brief 输入源类型
*/
enum class SourceType {
kVideoFile = 1, // 本地视频文件
kCameraId = 2, // 摄像头的index
kNetworkStream = 3, // 网络视频流
};
```
video_src 即为文件路径。
## 5. 编译Demo
在[项目结构说明](#1-项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。
```
cd src
mkdir build && cd build
cmake .. && make
```
编译完成后在build文件夹下会生成编译好的可执行文件如图像推理的二进制文件`build/demo_serving/easyedge_serving`
# 预测API流程详解
本章节主要结合前文的Demo示例来介绍推理API方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API查看下面的cpp代码中的step注释说明。
> ❗注意:
> 1`src`文件夹中包含完整可编译的cmake工程实例建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。
> 2请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
```cpp
// step 1: SDK配置运行参数
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
// step 2: 创建并初始化Predictor这这里选择合适的引擎
auto predictor = global_controller()->CreateEdgePredictor(config);
// step 3-1: 预测图像
auto img = cv::imread({图片路径});
std::vector<EdgeResultData> results;
predictor->infer(img, results);
// step 3-2: 预测视频
std::vector<EdgeResultData> results;
FrameTensor frame_tensor;
VideoConfig video_config;
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
video_config.source_value = video_src;
/*
... more video_configs, 根据需要配置video_config的各选项
*/
auto video_decoding = CreateVideoDecoding(video_config);
while (video_decoding->next(frame_tensor) == EDGE_OK) {
results.clear();
if (frame_tensor.is_needed) {
predictor->infer(frame_tensor.frame, results);
render(frame_tensor.frame, results, predictor->model_info().kind);
}
//video_decoding->display(frame_tensor); // 显示当前frame需在video_config中开启配置
//video_decoding->save(frame_tensor); // 存储当前frame到视频需在video_config中开启配置
}
```
若需自定义library search path或者gcc路径修改对应Demo工程下的CMakeList.txt即可。
## 1. SDK参数运行配置
SDK的参数通过`EdgePredictorConfig::set_config``global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
配置参数使用方法如下:
```
EdgePredictorConfig config;
config.model_dir = {模型文件目录};
```
## 2. 初始化Predictor
- 接口
```cpp
auto predictor = global_controller()->CreateEdgePredictor(config);
predictor->init();
```
若返回非0请查看输出日志排查错误原因。
## 3. 预测推理
### 3.1 预测图像
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
- 接口输入
```cpp
/**
* @brief
* 通用接口
* @param image: must be BGR , HWC format (opencv default)
* @param result
* @return
*/
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
```
图片的格式务必为opencv默认的BGR, HWC格式。
- 接口返回
`EdgeResultData`中可以获取对应的分类信息、位置信息。
```cpp
struct EdgeResultData {
int index; // 分类结果的index
std::string label; // 分类结果的label
float prob; // 置信度
// 物体检测 或 图像分割时使用:
float x1, y1, x2, y2; // (x1, y1): 左上角, x2, y2): 右下角; 均为0~1的长宽比例值。
// 图像分割时使用:
cv::Mat mask; // 0, 1 的mask
std::string mask_rle; // Run Length Encoding游程编码的mask
};
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于图像分割mask ***
```
cv::Mat mask为图像掩码的二维数组
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
*** 关于图像分割mask_rle ***
该字段返回了mask的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析。
### 3.2 预测视频
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
- 接口输入
class`VideoDecoding`
```
/**
* @brief 获取输入源的下一帧
* @param frame_tensor
* @return
*/
virtual int next(FrameTensor &frame_tensor) = 0;
/**
* @brief 显示当前frame_tensor中的视频帧
* @param frame_tensor
* @return
*/
virtual int display(const FrameTensor &frame_tensor) = 0;
/**
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
* @param frame_tensor
* @return
*/
virtual int save(FrameTensor &frame_tensor) = 0;
/**
* @brief 获取视频的fps属性
* @return
*/
virtual int get_fps() = 0;
/**
* @brief 获取视频的width属性
* @return
*/
virtual int get_width() = 0;
/**
* @brief 获取视频的height属性
* @return
*/
virtual int get_height() = 0;
```
struct `VideoConfig`
```
/**
* @brief 视频源、抽帧策略、存储策略的设置选项
*/
struct VideoConfig {
SourceType source_type; // 输入源类型
std::string source_value; // 输入源地址如视频文件路径、摄像头index、网络流地址
int skip_frames{0}; // 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储对于不满足skip_frames策略的frame把所抽取帧的is_needed置为false
int input_fps{0}; // 在采取抽帧之前设置视频的fps
Resolution resolution{Resolution::kAuto}; // 采样分辨率只对camera有效
bool enable_display{false}; // 默认不支持。
std::string window_name{"EasyEdge"};
bool display_all{false}; // 是否显示所有frame若为false仅显示根据skip_frames抽取的frame
bool enable_save{false};
std::string save_path; // frame存储为视频文件的路径
bool save_all{false}; // 是否存储所有frame若为false仅存储根据skip_frames抽取的frame
std::map<std::string, std::string> conf;
};
```
| 序号 | 字段 | 含义 |
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| 1 | `source_type` | 输入源类型支持视频文件、摄像头、网络视频流三种值分别为1、2、3 |
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头该值为摄像头的index如对于`/dev/video0`的摄像头则index为0若`source_type`为网络视频流,则为该视频流的完整地址。 |
| 3 | `skip_frames` | 设置跳帧每隔skip_frames帧抽取一帧并把该抽取帧的is_needed置为true标记为is_needed的帧是用来做预测的帧。反之直接跳过该帧不经过预测。 |
| 4 | `retrieve_all` | 若置该项为true则无论是否设置跳帧所有的帧都会被抽取返回以作为显示或存储用。 |
| 5 | `input_fps` | 用于抽帧前设置fps |
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
*** 注意:***
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能需要自行编译带有GTK选项的OpenCV。
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
```
video_config.conf["backend"] = "2";
```
3. 部分设备上的CSI摄像头尚未兼容如遇到问题可以通过工单、QQ交流群或微信交流群反馈。
具体接口调用流程可以参考SDK中的`demo_video_inference`。
# FAQ
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,275 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例本文档介绍FastDeploy中的模型SDK在**Intel x86_64 / NVIDIA GPU、Windows操作系统** 的C++环境1HTTP服务化推理部署步骤2介绍推理全流程API方便开发者了解项目后二次开发。
如果开发者对Python语言的相关能力感兴趣可以参考Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Serving.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. CPP环境](#2-cpp环境)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试EasyEdge服务](#2-测试easyedge服务)
* [3. 启动HTTP预测服务](#3-启动http预测服务)
* [4. 编译Demo](#4-编译demo)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
* [3. http 返回数据](#3-http-返回数据)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-%E9%A1%B9%E7%9B%AE%E7%BB%93%E6%9E%84%E8%AF%B4%E6%98%8E)介绍。
```shell
```
## 2. CPP环境
> 建议使用Microsoft Visual Studio 2015及以上版本获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。
# 快速开始
## 1. 项目结构说明
```shell
EasyEdge-win-xxx
├── data
│ ├── model # 模型文件资源文件夹,可替换为其他模型
│ └── config # 配置文件
├── bin # demo二进制程序
│ ├── xxx_image # 预测图像demo
│ ├── xxx_video # 预测视频流demo
│ └── xxx_serving # 启动http预测服务demo
├── dll # demo二进制程序依赖的动态库
├── ... # 二次开发依赖的文件
├── python # Python SDK文件
├── EasyEdge.exe # EasyEdge服务
└── README.md # 环境说明
```
## 2. 测试EasyEdge服务
> 模型资源文件默认已经打包在开发者下载的SDK包中请先将zip包整体拷贝到具体运行的设备中再解压缩使用。
SDK下载完成后双击打开EasyEdge.exe启动推理服务输入要绑定的Host ip及端口号Port点击启动服务。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854086-d507c288-56c8-4fa9-a00c-9d3cfeaac1c8.png" alt="图片" style="zoom: 67%;" />
</div>
服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:67%;" />
</div>
## 3. 启动HTTP预测服务
除了通过上述方式外您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下点击右键选择"在终端打开",执行如下命令。
> 需要将bin目录下的可执行文件移动到dll目录下执行或者将dll目录添加到系统环境变量中。
```
.\easyedge_serving {模型model文件夹路径}
```
启动后,日志中会显示如下字样。
```
HTTP is now serving at 0.0.0.0:24401
```
此时,开发者可以打开浏览器,`http://127.0.0.1:24401`,执行和之前一样的操作即可。
![](https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png)
## 4. 编译Demo
在[项目结构说明](#1项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。
```
cd src
mkdir build && cd build
cmake .. && make
```
编译完成后在build文件夹下会生成编译好的可执行文件如图像推理的二进制文件`build/demo_serving/easyedge_serving`
# HTTP API流程详解
本章节主要结合[2.1 HTTP Demo](#4)的API介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端目前支持的能力包括以下几种方式Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
```cpp
/**
* @brief 开启一个简单的demo http服务。
* 该方法会block直到收到sigint/sigterm。
* http服务里图片的解码运行在cpu之上可能会降低推理速度。
* @tparam ConfigT
* @param config
* @param host
* @param port
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
* @param instance_num 实例数量,根据内存/显存/时延要求调整
* @return
*/
template<typename ConfigT>
int start_http_server(
const ConfigT &config,
const std::string &host,
int port,
const std::string &service_id,
int instance_num = 1);
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式一:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
Python请求示例
```Python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
### 2.2 http 请求方法二:使用图片base64格式
HTTP方法POST
Header如下
| 参数 | 值 |
| ------------ | ---------------- |
| Content-Type | application/json |
**Body请求填写**
- 分类网络:
body 中请求示例
```
{
"image": "<base64数据>"
"top_num": 5
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
- 检测和分割网络:
Body请求示例
```
{
"image": "<base64数据>"
}
```
body中参数详情
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
| image | 是 | string | - | 图像数据base64编码要求base64图片编码后大小不超过4M,最短边至少15px最长边最大4096px支持jpg/png/bmp格式 **注意去掉头部** |
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
## 3. http 返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
*** 关于矩形坐标 ***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
*** 关于分割模型 ***
其中mask为分割模型的游程编码解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
# FAQ
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,381 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows Python** 环境下: 1)图像推理部署步骤; 2介绍模型推流全流程API方便开发者了解项目后二次开发。
其中Windows Python请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Inference.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. Python环境](#2-python环境)
* [3. 安装依赖](#3-安装依赖)
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1. 文件结构说明](#1-文件结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1 预测图像](#21-预测图像)
* [预测API流程详解](#预测api流程详解)
* [1. 基础流程](#1-基础流程)
* [2. 初始化](#2-初始化)
* [3. SDK参数配置](#3-sdk参数配置)
* [4. 预测图像](#4-预测图像)
* [FAQ](#faq)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示
```shell
EasyEdge-win-[部署芯片]
├── data # 模型文件资源文件夹,可替换为其他模型
├── ... # C++/C# 相关文件
├── python # Python SDK文件
├── EasyEdge.exe # 主程序
└── README.md # 环境说明
```
<a name="3"></a>
## 2. Python环境
> 当前SDK仅支持Python 3.7
打开命令行工具使用如下命令获取已安装的Python版本号。如果还没有安装Python环境可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`然后点“Install Now”即可完成安装。
```shell
python --version
```
如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
python -m pip --version
```
## 3. 安装依赖
### 3.1 安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装:
```shell
python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
```
`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
> 使用 NVIDIA GPU 预测时,必须满足:
>
> 1. 机器已安装 cuda, cudnn
>
> 2. 已正确安装对应 cuda 版本的paddle 版本
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
<a name="6"></a>
### 3.2 安装EasyEdge Python Wheel 包
`python`目录下安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU``x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
```shell
python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl
```
<a name="7"></a>
# 快速开始
<a name="8"></a>
## 1. 文件结构说明
Python SDK文件结构如下
```shell
EasyEdge-win-[部署芯片]
├── data # 模型文件资源文件夹,可替换为其他模型
│ ├── model # 模型文件资源文件夹,可替换为其他模型
│ └── config # 配置文件
├── ... # C++/C# 相关文件
├── python # Python SDK文件
│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用
│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl
│ ├── requirements.txt #
│ ├── infer_demo # demo体验完整文件
│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ │ └── demo_serving.py # 提供http服务的demo文件
│ └── tensor_demo # tensor in/out demo文件
```
<a name="9"></a>
## 2. 测试Demo
<a name="10"></a>
### 2.1 预测图像
根据部署平台使用infer_demo文件夹下的demo文件执行如下命令。
```shell
python demo_x86_cpu.py {模型model文件夹} {测试图片路径}
```
运行效果示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
```shell
2022-06-14 18:35:44 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: Config:: w: 256, h: 256; mean: [123.675, 116.28, 103.53]; scale: [0.01712475 0.017507 0.01742919]
2022-06-14 18:35:44 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Init paddlefluid engine...
2022-06-14 18:35:45 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Paddle version: 2.2.2
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: CPU thread num set to 1
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: resize to w257, h256
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Switch to CHW
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Infer cost: 70.1(66.1) ms
{'confidence': 0.9012351036071777, 'index': 8, 'label': 'n01514859 hen'}
```
可以看到,运行结果为`index8labelhen`通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
# 预测API流程详解
本章节主要结合前文的Demo示例来介绍推理API方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考`infer_demo/demo_xx_xx.py`文件查看下面的Python代码中的step注释说明。
## 1. 基础流程
> ❗注意请优先参考SDK中自带demo的使用流程和说明。遇到错误请优先参考文件中的注释、解释、日志说明。
`infer_demo/demo_xx_xx.py`
```python
# 引入EasyEdge运行库
import BaiduAI.EasyEdge as edge
# 创建并初始化一个预测Progam选择合适的引擎
pred = edge.Program()
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
# 预测图像
res = pred.infer_image({numpy.ndarray的图片})
# 关闭结束预测Progam
pred.close()
```
`infer_demo/demo_serving.py`
```python
import BaiduAI.EasyEdge as edge
from BaiduAI.EasyEdge.serving import Serving
# 创建并初始化Http服务
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
# 运行Http服务
# 请参考同级目录下demo_xx_xx.py里:
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
# 对以下参数device\device_id和engine进行修改
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
```
## 2. 初始化
- 接口
```python
def init(self,
model_dir,
device=Device.CPU,
engine=Engine.PADDLE_FLUID,
config_file='conf.json',
preprocess_file='preprocess_args.json',
model_file='model',
params_file='params',
label_file='label_list.txt',
infer_cfg_file='infer_cfg.json',
device_id=0,
thread_num=1
):
"""
Args:
model_dir: str
device: BaiduAI.EasyEdge.Device比如Device.CPU
engine: BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID
config_file: str
preprocess_file: str
model_file: str
params_file: str
label_file: str 标签文件
infer_cfg_file: 包含预处理、后处理信息的文件
device_id: int 设备ID
thread_num: int CPU的线程数
Raises:
RuntimeError, IOError
Returns:
bool: True if success
"""
```
若返回不是True请查看输出日志排查错误原因。
## 3. SDK参数配置
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
```python
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
```
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如
```python
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
```
## 4. 预测图像
- 接口
```python
def infer_image(self, img,
threshold=0.3,
channel_order='HWC',
color_format='BGR',
data_type='numpy')
"""
Args:
img: np.ndarray or bytes
threshold: float
only return result with confidence larger than threshold
channel_order: string
channel order HWC or CHW
color_format: string
color format order RGB or BGR
data_type: string
仅在图像分割时有意义。 'numpy' or 'string'
'numpy': 返回已解析的mask
'string': 返回未解析的mask游程编码
Returns:
list
"""
```
| 字段 | 类型 | 取值 | 说明 |
| ---------- | -------------------- | --------- | ------------------------ |
| confidence | float | 0~1 | 分类或检测的置信度 |
| label | string | | 分类或检测的类别 |
| index | number | | 分类或检测的类别 |
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
| mask | string/numpy.ndarray | 图像分割的mask | |
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
***结果示例***
i) 图像分类
```json
{
"index": 736,
"label": "table",
"confidence": 0.9
}
```
ii) 物体检测
```json
{
"index": 8,
"label": "cat",
"confidence": 1.0,
"x1": 0.21289,
"y1": 0.12671,
"x2": 0.91504,
"y2": 0.91211,
}
```
iii) 图像分割
```json
{
"name": "cat",
"score": 1.0,
"location": {
"left": ...,
"top": ...,
"width": ...,
"height": ...,
},
"mask": ...
}
```
mask字段中data_type为`numpy`时,返回图像掩码的二维数组
```text
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
其中1代表为目标区域0代表非目标区域
```
data_type为`string`mask的游程编码解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
# FAQ
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

View File

@@ -1,262 +0,0 @@
# 简介
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例介绍FastDeploy中的模型SDK在**Intel x86_64 /NVIDIA GPU、Windows操作系统** 的Python环境1HTTP服务化推理部署步骤2介绍推理全流程API方便开发者了解项目后二次开发。
如果开发者对C++语言的相关能力感兴趣可以参考Windows C++请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Serving.md)文档。
<!--ts-->
* [简介](#简介)
* [环境准备](#环境准备)
* [1. SDK下载](#1-sdk下载)
* [2. Python环境](#2-python环境)
* [3. 安装依赖](#3-安装依赖)
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
* [快速开始](#快速开始)
* [1. 文件结构说明](#1-文件结构说明)
* [2. 测试Demo](#2-测试demo)
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
* [HTTP API流程详解](#http-api流程详解)
* [1. 开启http服务](#1-开启http服务)
* [2. 请求http服务](#2-请求http服务)
* [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
* [3. http返回数据](#3-http返回数据)
<!--te-->
# 环境准备
## 1. SDK下载
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示
```shell
EasyEdge-win-[部署芯片]
├── data # 模型文件资源文件夹,可替换为其他模型
├── ... # C++/C# 相关文件
├── python # Python SDK文件
├── EasyEdge.exe # 主程序
└── README.md # 环境说明
```
## 2. Python环境
> 当前SDK仅支持Python 3.7
打开命令行工具使用如下命令获取已安装的Python版本号。如果还没有安装Python环境可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`然后点“Install Now”即可完成安装。
```shell
python --version
```
如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。
接着使用如下命令确认pip的版本是否满足要求要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
```shell
python -m pip --version
```
## 3. 安装依赖
### 3.1 安装paddlepaddle
根据具体的部署芯片CPU/GPU安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装:
```shell
python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
```
`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
> 使用 NVIDIA GPU 预测时,必须满足:
>
> 1. 机器已安装 cuda, cudnn
>
> 2. 已正确安装对应 cuda 版本的paddle 版本
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
### 3.2 安装EasyEdge Python Wheel 包
`python`目录下安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU``x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
```shell
python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl
```
# 快速开始
## 1. 文件结构说明
Python SDK文件结构如下
```shell
EasyEdge-win-[部署芯片]
├── data # 模型文件资源文件夹,可替换为其他模型
│ ├── model # 模型文件资源文件夹,可替换为其他模型
│ └── config # 配置文件
├── ... # C++/C# 相关文件
├── python # Python SDK文件
│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用
│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl
│ ├── requirements.txt #
│ ├── infer_demo # demo体验完整文件
│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
│ │ └── demo_serving.py # 提供http服务的demo文件
│ └── tensor_demo # tensor in/out demo文件
```
## 2. 测试Demo
### 2.1 启动HTTP预测服务
```shell
python demo_serving.py {模型model文件夹} {host, default 0.0.0.0} {port, default 24401}
```
成功启动后,终端中会显示如下字样。
```shell
2022-06-14 18:45:15 INFO [EasyEdge] [demo_serving.py:50] 21212: Init paddlefluid engine...
2022-06-14 18:45:16 INFO [EasyEdge] [demo_serving.py:50] 21212: Paddle version: 2.2.2
* Serving Flask app 'Serving' (lazy loading)
* Environment: production
WARNING: This is a development server. Do not use it in a production deployment.
Use a production WSGI server instead.
* Debug mode: off
* Running on all addresses (0.0.0.0)
WARNING: This is a development server. Do not use it in a production deployment.
* Running on http://127.0.0.1:24401
* Running on http://192.168.3.17:24401 (Press CTRL+C to quit)
```
开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片或者视频来进行测试,运行效果如下。
<div align="center">
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
</div>
# HTTP API流程详解
本章节主要结合前文的Demo示例来对API进行介绍方便开发者学习并将运行库嵌入到开发者的程序当中更详细的API请参考对应的Python文件。http服务包含服务端和客户端Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
## 1. 开启http服务
http服务的启动使用`demo_serving.py`文件
```python
class Serving(object):
""" SDK local serving """
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
self.program = None
self.model_dir = model_dir
self.model_filename = model_filename
self.params_filename = params_filename
self.program_lock = threading.Lock()
self.license_key = license
# 只有ObjectTracking会初始化video_processor
self.video_processor = None
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
""" Args: host : str port : str device : BaiduAI.EasyEdge.Device比如Device.CPU engine : BaiduAI.EasyEdge.Engine 比如: Engine.PADDLE_FLUID """
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
```
## 2. 请求http服务
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
### 2.1 http 请求方式:不使用图片base64格式
URL中的get参数
| 参数 | 说明 | 默认值 |
| --------- | --------- | ---------------- |
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
HTTP POST Body即为图片的二进制内容。
Python请求示例
```python
import requests
with open('./1.jpg', 'rb') as f:
img = f.read()
result = requests.post(
'http://127.0.0.1:24401/',
params={'threshold': 0.1},
data=img).json()
```
## 3. http返回数据
| 字段 | 类型说明 | 其他 |
| ---------- | ------ | ------------------------------------ |
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
| cost_ms | Number | 预测耗时ms不含网络交互时间 |
返回示例
```json
{
"cost_ms": 52,
"error_code": 0,
"results": [
{
"confidence": 0.94482421875,
"index": 1,
"label": "IronMan",
"x1": 0.059185408055782318,
"x2": 0.18795496225357056,
"y1": 0.14762254059314728,
"y2": 0.52510076761245728,
"mask": "...", // 图像分割模型字段
"trackId": 0, // 目标追踪模型字段
},
]
}
```
***关于矩形坐标***
x1 * 图片宽度 = 检测框的左上角的横坐标
y1 * 图片高度 = 检测框的左上角的纵坐标
x2 * 图片宽度 = 检测框的右下角的横坐标
y2 * 图片高度 = 检测框的右下角的纵坐标
***关于分割模型***
其中mask为分割模型的游程编码解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
**FAQ**
1. 执行infer_demo文件时提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
进入当前项目首先卸载protobuf
```shell
python3 -m pip uninstall protobuf
```
安装低版本protobuf
```shell
python3 -m pip install protobuf==3.19.0
```

17
docs/compile/README.md Normal file
View File

@@ -0,0 +1,17 @@
# FastDeploy编译
本文档说明编译C++预测库、Python预测库两种编译过程根据编译的平台参考如下文档
- [Linux & Mac 编译](linux_and_mac.md)
- [Windows编译](windows.md)
其中编译过程中,各平台上编译选项如下表所示
| 选项 | 作用 | 备注 |
|:---- | :--- | :--- |
| ENABLE_ORT_BACKEND | 启用ONNXRuntime推理后端默认ON | - |
| WIGH_GPU | 是否开启GPU使用默认OFF | 当设为TRUE时须通过CUDA_DIRECTORY指定cuda目录如/usr/local/cuda; Mac上不支持设为ON |
| ENABLE_TRT_BACKEND | 启用TensorRT推理后端默认OFF | 当设为TRUE时需通过TRT_DIRECTORY指定tensorrt目录如/usr/downloads/TensorRT-8.4.0.1; Mac上不支持设为ON|
| ENABLE_VISION | 编译集成视觉模型模块包括OpenCV的编译集成默认OFF | - |
| ENABLE_PADDLE_FRONTEND | 编译集成Paddle2ONNX默认ON | - |
| ENABLE_DEBUG | 当为ON时支持输出DEBUG信息但可能会有性能损耗默认OFF | - |

View File

@@ -0,0 +1,32 @@
# Linux & Mac编译
## 编译C++
```
git clone https://gitee.com/jiangjiajun/FastDeploy.git
cd FastDeploy
git submodule init
git submodule update
mkdir build & cd build
cmake .. -DENABLE_ORT_BACKEND=ON \
-DENABLE_VISION=ON \
-DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3
make -j8
make install
```
编译后的预测库即在当前目录下的`fastdeploy-0.0.3`
## 编译Python安装包
```
git clone https://gitee.com/jiangjiajun/FastDeploy.git
cd FastDeploy
git submodule init
git submodule update
# Python通过export环境变量设置编译选项
export ENABLE_ORT_BACKEND=ON
export ENABLE_VISION=ON
python setup.py build
python setup.py bdist_wheel
```
编译后的wheel包即在当前目录下的`dist`目录中
编译选项说明参考[编译指南](./README.md)

3
docs/compile/windows.md Normal file
View File

@@ -0,0 +1,3 @@
# Windows编译
还没写

110
docs/cpp/README.md Normal file
View File

@@ -0,0 +1,110 @@
# C++部署
## 准备预测库
参考编译文档[FastDeploy编译](../compile/README.md)进行编译,或直接使用如下预编译库
| 编译库 | 平台 | 支持设备 | 说明 |
|:------ | :---- | :------- | :----- |
|[fastdeploy-linux-x64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz) | Linux | CPU | 集成ONNXRuntime |
|[fastdeploy-linux-x64-gpu-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-gpu-0.0.3.tgz) | Linux | CPU/GPU | 集成ONNXRuntime, TensorRT |
|[fastdeploy-osx-x86_64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-x86_64-0.0.3.tgz) | Mac OSX Intel CPU | CPU | 集成ONNXRuntime |
|[fastdeploy-osx-arm64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-arm64-0.0.3.tgz) | Mac OSX M1 CPU | CPU | 集成ONNXRuntime |
## 使用
FastDeploy提供了多种领域内的模型可快速完成模型的部署本文档以YOLOv5在Linux上的部署为例
```
# 下载库并解压
wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
tar xvf fastdeploy-linux-x64-0.0.3.tgz
# 下载模型和测试图片
wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
```
### YOLOv5预测代码
准备如下`yolov5.cc`代码
```
#include "fastdeploy/vision.h"
int main() {
typedef vis = fastdeploy::vision;
auto model = vis::ultralytics::YOLOv5("yolov5s.onnx"); // 加载模型
if (!model.Initialized()) { // 判断模型是否初始化成功
std::cerr << "Initialize failed." << std::endl;
return -1;
}
cv::Mat im = cv::imread("bus.jpg"); // 读入图片
vis::DetectionResult res;
if (!model.Predict(&im, &res)) { // 预测图片
std::cerr << "Prediction failed." << std::endl;
return -1;
}
std::cout << res.Str() << std::endl; // 输出检测结果
return 0;
}
```
### 编译代码
编译前先完成CMakeLists.txt的开发`yolov5.cc`同级目录创建`CMakeLists.txt`文件,内容如下
```
PROJECT(yolov5_demo C CXX)
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
# 在低版本ABI环境中可通过如下代码进行兼容性编译
# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
# 在下面指定下载解压后的fastdeploy库路径
set(FASTDEPLOY_INSTALL_DIR /ssd1/download/fastdeploy-linux-x64-0.0.3/)
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# 添加FastDeploy依赖头文件
include_directories(${FASTDEPLOY_INCS})
add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc)
message(${FASTDEPLOY_LIBS})
# 添加FastDeploy库依赖
target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS})
~
```
此时当前目录结构如下所示
```
- demo_directory
|___fastdeploy-linux-x64-0.0.3/ # 预测库解压
|___yolov5.cc # 示例代码
|___CMakeLists.txt # cmake文件
|___yolov5s.onnx # 模型文件
|___bus.jpeg # 测试图片
```
执行如下命令进行编译
```
cmake .
make -j
```
编译后可执行二进制即为当前目录下的`yolov5_demo`,使用如下命令执行
```
./yolov5_demo
```
即会加载模型进行推理,得到结果如下
```
DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
223.395126,403.948669, 345.337189, 867.339050, 0.856906, 0
668.301758,400.781372, 808.441772, 882.534973, 0.829716, 0
50.210758,398.571289, 243.123383, 905.016846, 0.805375, 0
23.768217,214.979355, 802.627869, 778.840820, 0.756311, 5
0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
```

View File

@@ -1,212 +0,0 @@
# 简介
本文档介绍FastDeploy中的模型SDK在iOS环境下1推理部署步骤2介绍SDK使用说明方便开发者了解项目后二次开发。
<!--ts-->
* [简介](#简介)
* [系统支持说明](#系统支持说明)
* [1. 系统支持说明](#1-系统支持说明)
* [2. SDK大小说明](#2-sdk大小说明)
* [快速开始](#快速开始)
* [1. 项目结构说明](#1-项目结构说明)
* [2. 测试Demo](#2-测试demo)
* [SDK使用说明](#sdk使用说明)
* [1. 集成指南](#1-集成指南)
* [1.1 依赖库集成](#11-依赖库集成)
* [2. 调用流程示例](#2-调用流程示例)
* [2.1 初始化](#21-初始化)
* [2.2 预测图像](#22-预测图像)
* [FAQ](#faq)
<!--te-->
# 系统支持说明
## 1. 系统支持说明
1. 系统支持iOS 9.0及以上。
2. 硬件支持:支持 arm64 (Starndard architectures),暂不支持模拟器。
* 官方验证过的手机机型大部分ARM 架构的手机、平板及开发板。
3.其他说明
* 3.1 【图像分割类模型】1图像分割类Demo暂未提供实时摄像头录制拍摄的能力开发者可根据自己需要进行安卓开发完成2PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景本次安卓开发仅支持述评场景开发者可根据自己需要开发横屏的Android功能。<br>
* 3.2 【OCR模型】OCR任务第一次启动任务第一张推理时间久属于正常情况因为涉及到模型加载、预处理等工作<br>
## 2. SDK大小说明
1. 模型资源文件大小影响 SDK 大小
2. SDK 包及 IPA 安装包虽然比较大,但最终安装到设备后所占大小会缩小很多。这与 multi architechtures、bitcode 和 AppStore 的优化有关。
# 快速开始
## 1. 项目结构说明
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下
```
.EasyEdge-iOS-SDK
├── EasyDLDemo # Demo工程文件
├── LIB # 依赖库
├── RES
│ ├── easyedge      # 模型资源文件夹一套模型适配不同硬件、OS和部署方式
│ ├── conf.json        # Android、iOS系统APP名字需要
│ ├── model # 模型结构文件
│ ├── params # 模型参数文件
│ ├── label_list.txt # 模型标签文件
│ ├── infer_cfg.json # 模型前后处理等配置文件
└── DOC # 文档
```
## 2. 测试Demo
按如下步骤可直接运行 SDK 体验 Demo
步骤一:用 Xcode 打开 `EasyDLDemo/EasyDLDemo.xcodeproj`
步骤二配置开发者自己的签名不了解签名机制的可以看FAQ [iOS签名介绍](#100)</br>
步骤三:连接手机运行,不支持模拟器
检测模型运行示例:
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854078-4f1f761d-0629-411a-92cc-6f4180164ca5.png" width="400"></div>
# SDK使用说明
本节介绍如何将 SDK 接入开发者的项目中使用。
## 1. 集成指南
步骤一:依赖库集成
步骤二:`import <EasyDL/EasyDL.h>`
### 1.1 依赖库集成
1. 复制 LIB 目录至项目合适的位置
2. 配置 Build Settings 中 Search paths: 以 SDK 中 LIB 目录路径为例
- Framework Search Paths`${PROJECT_DIR}/../LIB/lib`
- Header Search Paths`${PROJECT_DIR}/../LIB/include`
- Library Search Paths`${PROJECT_DIR}/../LIB/lib`
> 集成过程如出现错误,请参考 Demo 工程对依赖库的引用
## 2. 调用流程示例
以通用ARM的图像分类预测流程为例详细说明请参考后续章节
```
NSError *err;
// step 1: 初始化模型
EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
// step 2: 准备待预测的图像
UIImage *image = ...;
// step 3: 预测图像
NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err];
// step 4: 解析结果
for (id res in results) {
EasyDLClassfiData *clsData = (EasyDLClassfiData *) res;
NSLog(@"labelIndex=%d, labelName=%@, confidence=%f", clsData.category, clsData.label, clsData.accuracy);
}
```
### 2.1 初始化
```
// 示例
// 参数一为模型资源文件夹名称
EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
```
> 模型资源文件夹需以 folder reference 方式加入 Xcode 工程,如 `RES/easyedge` 文件夹在 Demo 工程中表现为蓝色
### 2.2 预测图像
所有模型类型通过以下接口获取预测结果:
```
// 返回的数组类型不定
NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err];
```
返回的数组类型如下,具体可参考 `EasyDLResultData.h` 中的定义:
| 模型类型 | 类型 |
| --- | ---- |
| 图像分类 | EasyDLClassfiData |
| 物体检测/人脸检测 | EasyDLObjectDetectionData |
| 实例分割 | EasyDLObjSegmentationData |
| 姿态估计 | EasyDLPoseData |
| 文字识别 | EasyDLOcrData |
# FAQ
1. 如何多线程并发预测?
SDK内部已经能充分利用多核的计算能力。不建议使用并发来预测。
如果开发者想并发使用,请务必注意`EasyDLModel`所有的方法都不是线程安全的。请初始化多个实例进行并发使用,如
```c
- (void)testMultiThread {
UIImage *img = [UIImage imageNamed:@"1.jpeg"];
NSError *err;
EasyDLModel * model1 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
EasyDLModel * model2 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
dispatch_queue_t queue1 = dispatch_queue_create("testQueue", DISPATCH_QUEUE_CONCURRENT);
dispatch_queue_t queue2 = dispatch_queue_create("testQueue2", DISPATCH_QUEUE_CONCURRENT);
dispatch_async(queue1, ^{
NSError *detectErr;
for(int i = 0; i < 1000; ++i) {
NSArray * res = [model1 detectUIImage:img withFilterScore:0 andError:&detectErr];
NSLog(@"1: %@", res[0]);
}
});
dispatch_async(queue2, ^{
NSError *detectErr;
for(int i = 0; i < 1000; ++i) {
NSArray * res = [model2 detectUIImage:img withFilterScore:0 andError:&detectErr];
NSLog(@"2: %@", res[0]);
}
});
}
```
2. 编译时出现 Undefined symbols for architecture arm64: ...
* 出现 `cxx11, vtable` 字样:请引入 `libc++.tbd`
* 出现 `cv::Mat` 字样:请引入 `opencv2.framework`
* 出现 `CoreML`, `VNRequest` 字样:请引入`CoreML.framework` 并务必`#import <CoreML/CoreML.h> `
3. 运行时报错 Image not found: xxx ...
请Embed具体报错的库。
4. 编译时报错Invalid bitcode version
这个可能是开发者使用的 Xcode 低于12导致可以升级至12版本。
5. 错误说明
SDK 的方法会返回 NSError直接返回的 NSError 的错误码定义在 `EasyDLDefine.h - EEasyDLErrorCode` 中。NSError 附带 message (有时候会附带 NSUnderlyingError开发者可根据 code 和 message 进行错误判断和处理。
6. iOS签名说明
iOS 签名是苹果生态对 APP 开发者做的限定对于个人开发者是免费的对于企业开发者譬如APP要上架应用市场是收费的。此处仅简单说明作为普通开发者第一次尝试使用 Xcode编译代码需要进行的签名操作。<br>
1在Xcode/Preferences/Accounts 中添加个人Apple ID;<br>
2在对应的EasyDLDemo中做如下图设置<br>
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854089-aa1d1af8-7daa-43ae-868d-32041c27ad86.jpg" width="600"></div>
32后会在手机上安装好对应APP还需要在手机上`设置/通用/设备管理/开发者应用/信任appleID`,才能运行该 APP。

24
docs/tech/design.md Normal file
View File

@@ -0,0 +1,24 @@
# FastDeploy
FastDeploy分为`Runtime``应用`模块。
## Runtime
`Runtime`对应于不同硬件上的不同后端大部分情况下一种硬件对应于一种后端但对于CPU、GPU, 存在多种后端,用户可根据自己的需求进行选择。
| Runtime | 后端 |
| :------ | :---- |
| CPU(x86_64) | `fastdeploy::Backend::ORT` |
| GPU(Nvidia) | `fastdeploy::Backend::ORT` / `fastdeploy::Backend::TRT` |
具体文档参考 [Runtime文档](runtime.md)
## 应用
应用是基于`Runtime`提供的上层模型推理,集成了模型端到端的推理功能
- Vision
- Text
- Audio
具体文档参考 [Vision文档](vision.md)

63
docs/tech/models.md Normal file
View File

@@ -0,0 +1,63 @@
# 模型开发
`ultralytics/yolov5`为例,在`fastdeploy/vision`目录下新增`ultralytics`目录,并创建代码`yolov5.h`
定义`YOLOv5`
```
class YOLOv5 : public FastDeployModel {
public:
// 构造函数指定模型路径并默认为ONNX格式
YOLOv5(const std::string& model_file)
: FastDeployModel(model_file, "", Frontend::ONNX) {
size = {640, 640}; // 图像预处理resize大小
// 图像填充值
padding_value = {114.0, 114.0, 114.0};
// 是否只填充到满足stride的最小方框即可
bool is_mini_pad = false;
// 是否支持图像resize超过原图尺寸
bool is_scale_up = true;
// 步长padding到长宽为stride的倍数
stride = 32;
// 通过下面的两个参数来说明模型在CPU/GPU上支持的后端种类
// 指定Device后默认情况下会优先选择最前的后端
valid_cpu_backends = {Backend::ORT};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
}
std::string ModelName() const; // 返回模型名
// 模型初始化, 须在此函数中主动调用基类的`InitBackend()`函数
// 来初始化runtime
// 一些模型前后处理的初始化也可在此函数中如ppdet/ppcls创建一个
// 数据预处理pipeline
bool Init();
// 预处理其中输入是vision::Mat结构输出是FDTensor
// 输出提供给runtime进行推理使用
bool Preprocess(Mat* mat, FDTensor* output);
// 后处理输入是runtime的输入FDTensor
// 一些跟模型相关的预处理参数
bool Postprocess(FDTensor& tensor, DetectionResult* res, float conf_thresh, float nms_iou_thresh);
// 端到端的推理函数,包含前后处理
// 因此一般也建议将后处理的部分参数放在这个接口中
bool Predict(cv::Mat* im, DetectionResult* result, float conf_thresh = 0.25, float nms_iou_thresh = 0.5);
};
```
模型的实现上,并没有特别强的规范约束,但是
- 1. 一定要继承`FastDeployModel`
- 2. 确定可用的`valid_cpu_backends``valid_gpu_backends`
- 3. 要实现`Init()`/`ModelName()`/`Predict()`三个接口
- 4. 建议统一为`Preprocess``Postprocess`两个接口作为前后处理所用
## 其它
`vision`中,会提供几类基础的数据结构使用,包括`vision::ClassifyResult``vision::DetectionResult``vision::SegmentationResult`等作为模型常见的输出结构。 但难免会遇到新的输出结构不在这几类中,对于一定要定制化的数据结构,默认按照下面方式处理
- 1. 如果是大量模型通用的结构,仍然实现在`vision/common.h`中,作为通用的输出结构
- 2. 如果只是某个模型需要,则实现在如`vision/ultralytics/yolov5.h`同时需要自行为此结构体进行pybind封装

135
docs/tech/runtime.md Normal file
View File

@@ -0,0 +1,135 @@
# fastdeploy::Runtime
## FDTensor Runtime的输入输出数据结构
```
struct FDTensor {
std::vector<int64_t> shape; // 形状
std::string name; // 命名
FDDataType dtype; // 数据类型
Device device = Device::CPU; // 数据存放设备
void* MutableData(); // 获取tensor内存buffer指针
// 获取tensor数据如若tensor数据在其它设备
// 此函数会先将数据拷贝至CPU再返回指向
// CPU内存buffer的指针
void* Data();
// 初始化Tensor并复用外部数据指针
// Tensor的内存buffer将由外部的调用者来创建或释放
void SetExternalData(const std::vector<int>& new_shape,
const FDDataType& data_type,
void* data_buffer
const Device& dev);
int Nbytes() const; // 返回tensor数据字节大小
int Numel() const; // 返回tensor元素个数
// Debug函数打印tensor的信息包含mean、max、min等
void PrintInfo(const std::string& prefix = "TensorInfo");
};
```
FDTensor是前后处理与`Runtime`进行对接的数据结构,大多情况下建议通过`SetExternalData`来共享用户传入的数据,减小内存拷贝带来的开销。
## Runtime 多后端推理引擎
### RuntimeOption 引擎配置
```
struct RuntimeOption {
// 模型文件和权重文件
std::string model_file;
std::string params_file;
// 模型格式当前可支持Frontend::PADDLE / Frontend::ONNX
Frontend model_format = Frontend::PADDLE;
Backend backend = Backend::ORT:
// CPU上运行时的线程数
int cpu_thread_num = 8;
// 推理硬件当前支持Device::CPU / Device::GPU
// 在CPU/GPU上需与backend进行搭配选择
Device device;
// Backend::ORT的参数
int ort_graph_opt_level;
int ort_inter_op_num_threads;
int ort_execution_mode;
// Backend::TRT的参数
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
std::string trt_serialize_file = "";
bool trt_enable_fp16 = false;
bool trt_enable_int8 = false;
size_t trt_max_batch_size = 32;
};
```
### Runtime 引擎
```
struct Runtime {
// 加载模型,引擎初始化
bool Init(const RuntimeOption& _option);
// 进行推理
// 其中输入须正确配置tensor中的name
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs(); // 输入个数
int NumOutputs(); // 输出个数
TensorInfo GetInputInfo(int index) // 获取输入信息包括shape, dtype, name
TensorInfo GetOutputInfo(int index) // 获取输出信息包括shape, dtype, name
RuntimeOption option; // 引擎的配置信息
};
```
## Runtime使用示例
### C++
```
#include "fastdeploy/fastdeploy_runtime.h"
int main() {
auto option = fastdeploy::RuntimeOption();
option.model_file = "resnet50/inference.pdmodel";
option.params_file = "resnet50/inference.pdiparams";
auto runtime = fastdeploy::Runtime();
assert(runtime.Init(option));
// 需准备好输入tensor
std::vector<FDTensor> inputs;
std::vector<FDTensor> outputs;
assert(runtime.Infer(tensors, &outputs));
// 输出tensor的debug信息查看
outputs[0].PrintInfo();
}
```
### Python
```
import fastdeploy as fd
import numpy as np
option = fd.RuntimeOption();
option.model_file = "resnet50/inference.pdmodel"
option.params_file = "resnet50/inference.pdiparams";
runtime = fd.Runtime(option)
result = runtime.infer({"image": np.random.rand(1, 3, 224, 224)});
```

74
docs/tech/vision.md Normal file
View File

@@ -0,0 +1,74 @@
# Vision
Vision是FastDeploy中的视觉模型模块包含`processors``utils`两个公共模块,以及模型模块。
## processors 图像处理模块
`processors`提供了常见的图像处理操作并为各操作实现不同的后端如当前支持的CPU以及GPU两种处理方式在模型中预算中开发者调用`processors`提供的API即可快速在不同的处理后端进行切换。
默认在CPU上进行处理
```
namespace vis = fastdeploy::vision;
im = cv2.imread("test.jpg");
vis::Mat mat(im);
assert(vis::Resize::Run(&mat, 224, 224));
assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
assert(vis::HWC2CHW::Run(&mat));
```
切换为CUDA GPU进行处理
```
namespace vis = fastdeploy::vision;
vis::Processor::default_lib = vis::ProcessorLib::OPENCV_CUDA;
im = cv2.imread("test.jpg");
vis::Mat mat(im);
assert(vis::Resize::Run(&mat, 224, 224));
assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
assert(vis::HWC2CHW::Run(&mat));
```
在处理过程中,通过`fastdeploy::vision::Mat`作为传递的数据结构
```
struct Mat {
Mat(cv::Mat); // 通过`cv::Mat`进行构造
FDDataType Type(); // 数值类型
int Channels(); // 通道数
int Width(); // 宽
int Height(); // 高
// 获取图像如Mat在GPU上则会拷贝到CPU上再返回
cv::Mat GetCpuMat();
// 获取图像如Mat在CPU上则会拷贝到GPU上再返回
cv::cuda::GpuMat GetGpuMat();
void ShareWithTensor(FDTensor* tensor); // 构造一个FDTensor并共享内存
bool CopyToTensor(FDTensor* tensor); // 构造一个CPU上的FDTensor并将数据拷贝过去
Layout layout; // 数据排布支持Layout::HWC / Layout::CHW
Device device; // 数据存放设备支持Device::CPU / Device::GPU
};
```
## utilities模块 工具模块
提供一些常见的函数,如分类模型常用的`TopK`选择,检测模型的`NMS`操作。同样后面可以考虑将后处理的实现也有不同后端
## visualize 可视化模块
提供一些可视化函数如检测、分割、OCR都需要这种函数来看可视化的效果
## 模型模块
这个是`Vision`中最重要的模块,所有的模块均通过`域名` + `模型名`来划分,如
- vision::ppdet::YOLOv3 // PaddleDetection的YOLOv3模型
- vision::ppdet::RCNN // PaddleDetection的RCNN类模型
- vision::ultralytics::YOLOv5 // https://github.com/ultralytics/yolov5 YOLOv5模型
模型的增加参考[模型开发](models.md)

57
docs/usage/model.md Normal file
View File

@@ -0,0 +1,57 @@
# FastDeploy模型
目前支持的模型如下
- [fastdeploy.vision.ppcls.Model](vision/ppcls.md) PaddleClas里的所有分类模型
- [fastdeploy.vision.ultralytics/YOLOv5](vision/ultralytics.md) [ultralytics/yolov5](https://github.com/ultralytics/yolov5)模型
具体模型使用方式可参考各模型文档API和示例说明。 各模型在运行时均有默认的Runtime配置本文档说明如何修改模型的后端配置其中如下代码为跑YOLOv5的模型Python示例代码
```
import fastdeploy as fd
model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx")
import cv2
im = cv2.imread('bus.jpg')
result = model.predict(im)
print(model.runtime_option)
```
通过`print(model.runtime_option)`可以看到如下信息
```
RuntimeOption(
backend : Backend.ORT # 当前推理后端为ONNXRuntime
cpu_thread_num : 8 # 推理时CPU线程数设置仅当模型在CPU上推理时有效
device : Device.GPU # 当前推理设备为GPU
device_id : 0 # 当前推理设备id为0
model_file : yolov5s.onnx # 模型文件路径
model_format : Frontend.ONNX # 模型格式当前为ONNX格式
ort_execution_mode : -1 # ONNXRuntime后端的配置参数-1表示默认
ort_graph_opt_level : -1 # ONNXRuntime后端的配置参数, -1表示默认
ort_inter_op_num_threads : -1 # ONNXRuntime后端的配置参数-1表示默认
params_file : # 参数文件ONNX模型无此文件
trt_enable_fp16 : False # TensorRT参数
trt_enable_int8 : False # TensorRT参数
trt_fixed_shape : {} # TensorRT参数
trt_max_batch_size : 32 # TensorRT参数
trt_max_shape : {} # TensorRT参数
trt_max_workspace_size : 1073741824 # TensorRT参数
trt_min_shape : {} # TensorRT参数
trt_opt_shape : {} # TensorRT参数
trt_serialize_file : # TensorRT参数
)
```
会注意到参数名以`ort`开头的均为ONNXRuntime后端专有的参数`trt`的则为TensorRT后端专有的参数。各后端与参数的配置可参考[RuntimeOption](runtime_option.md)说明。
## 切换模型推理方式
一般而言用户只需关注推理是在哪种Device下即可。 当然有更进一步需求可以再为Device选择不同的Backend但配置时注意Device与Backend的搭配。 如Backend::TRT只支持Device为GPU, 而Backend::ORT则同时支持CPU和GPU
```
import fastdeploy as fd
option = fd.RuntimeOption()
option.device = fd.Device.CPU
option.cpu_thread_num = 12
model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx", option)
print(model.runtime_option)
```

104
docs/usage/vision/ppcls.md Normal file
View File

@@ -0,0 +1,104 @@
# PaddleClas分类模型推理
PaddleClas模型导出参考[PaddleClas](https://github.com/PaddlePaddle/PaddleClas.git)
## Python API说明
### Model类
```
fastdeploy.vision.ppcls.Model(model_file, params_file, config_file, runtime_option=None, model_format=fastdeploy.Frontend.PADDLE)
```
**参数**
> * **model_file**(str): 模型文件如resnet50/inference.pdmodel
> * **params_file**(str): 参数文件如resnet50/inference.pdiparams
> * **config_file**(str): 配置文件来源于PaddleClas提供的推理配置文件如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)
> * **runtime_option**(fd.RuntimeOption): 后端推理的配置, 默认为None即采用默认配置
> * **model_format**(fd.Frontend): 模型格式说明PaddleClas的模型格式均为Frontend.PADDLE
#### predict接口
```
Model.predict(image_data, topk=1)
```
> **参数**
>
> > * **image_data**(np.ndarray): 输入数据, 注意需为HWCRGB格式
> > * **topk**(int): 取前top的分类
> **返回结果**
>
> > * **result**(ClassifyResult):结构体包含`label_ids`和`scores`两个list成员变量表示类别和各类别对应的置信度
### 示例
> ```
> import fastdeploy.vision as vis
> import cv2
> model = vis.ppcls.Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml")
> im = cv2.imread("test.jpeg")
> result = model.predict(im, topk=5)
> print(result.label_ids[0], result.scores[0])
> ```
## C++ API说明
需添加头文件`#include "fastdeploy/vision.h"`
### Model类
```
fastdeploy::vision::ppcls::Model(
const std::string& model_file,
const std::string& params_file,
const std::string& config_file,
const RuntimeOption& custom_option = RuntimeOption(),
const Frontend& model_format = Frontend::PADDLE)
```
**参数**
> * **model_file**: 模型文件如resnet50/inference.pdmodel
> * **params_file**: 参数文件如resnet50/inference.pdiparams
> * **config_file**: 配置文件来源于PaddleClas提供的推理配置文件如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)
> * **runtime_option**: 后端推理的配置, 不设置的情况下,采用默认配置
> * **model_format**: 模型格式说明PaddleClas的模型格式均为Frontend.PADDLE
#### Predict接口
```
bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk = 1)
```
> **参数**
> > * **im**: 输入图像数据须为HWCRGB格式(注意传入的im在预处理过程中会被修改)
> > * **result**: 分类结果
> > * **topk**: 取分类结果前topk
> **返回结果**
> > true或false表示预测成功与否
### 示例
> ```
> #include "fastdeploy/vision.h"
>
> int main() {
> typedef vis = fastdeploy::vision;
> auto model = vis::ppcls::Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml");
>
> if (!model.Initialized()) {
> std::cerr << "Initialize failed." << std::endl;
> return -1;
> }
>
> cv::Mat im = cv::imread("test.jpeg");
>
> vis::ClassifyResult res;
> if (!model.Predict(&im, &res, 5)) {
> std::cerr << "Prediction failed." << std::endl;
> return -1;
> }
>
> std::cout << res.label_ids[0] << " " << res.scores[0] << std::endl;
> return 0;
> }
```

90
external/onnxruntime.cmake vendored Normal file
View File

@@ -0,0 +1,90 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(ONNXRUNTIME_PROJECT "extern_onnxruntime")
set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime)
set(ONNXRUNTIME_SOURCE_DIR
${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT})
set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime)
set(ONNXRUNTIME_INC_DIR
"${ONNXRUNTIME_INSTALL_DIR}/include"
CACHE PATH "onnxruntime include directory." FORCE)
set(ONNXRUNTIME_LIB_DIR
"${ONNXRUNTIME_INSTALL_DIR}/lib"
CACHE PATH "onnxruntime lib directory." FORCE)
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
set(ONNXRUNTIME_VERSION "1.11.1")
set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/")
if(WIN32)
if(WITH_GPU)
set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip")
else()
set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip")
endif()
elseif(APPLE)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
set(ONNXRUNTIME_FILENAME "onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}.tgz")
else()
set(ONNXRUNTIME_FILENAME "onnxruntime-osx-x86_64-${ONNXRUNTIME_VERSION}.tgz")
endif()
else()
if(WITH_GPU)
set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}.tgz")
else()
set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")
endif()
endif()
set(ONNXRUNTIME_URL "${ONNXRUNTIME_URL_PREFIX}${ONNXRUNTIME_FILENAME}")
include_directories(${ONNXRUNTIME_INC_DIR}
)# For ONNXRUNTIME code to include internal headers.
if(WIN32)
set(ONNXRUNTIME_LIB
"${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib"
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
elseif(APPLE)
set(ONNXRUNTIME_LIB
"${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.dylib"
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
else()
set(ONNXRUNTIME_LIB
"${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so"
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
endif()
ExternalProject_Add(
${ONNXRUNTIME_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${ONNXRUNTIME_URL}
PREFIX ${ONNXRUNTIME_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${ONNXRUNTIME_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${ONNXRUNTIME_INSTALL_DIR} &&
${CMAKE_COMMAND} -E rename ${ONNXRUNTIME_SOURCE_DIR}/lib/ ${ONNXRUNTIME_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include
${ONNXRUNTIME_INC_DIR}
BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB})
add_library(external_onnxruntime STATIC IMPORTED GLOBAL)
set_property(TARGET external_onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB})
add_dependencies(external_onnxruntime ${ONNXRUNTIME_PROJECT})

121
external/opencv.cmake vendored Normal file
View File

@@ -0,0 +1,121 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(WIN32)
find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR})
list(APPEND DEPEND_LIBS ${OpenCV_LIBS})
else()
include(ExternalProject)
set(OPENCV_PROJECT "extern_opencv")
set(OPENCV_PREFIX_DIR ${THIRD_PARTY_PATH}/opencv)
set(OPENCV_SOURCE_DIR
${THIRD_PARTY_PATH}/opencv/src/${OPENCV_PROJECT})
set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/opencv)
set(OPENCV_INC_DIR
"${OPENCV_INSTALL_DIR}/include/"
CACHE PATH "opencv include directory." FORCE)
set(OPENCV_LIB_DIR
"${OPENCV_INSTALL_DIR}/lib"
CACHE PATH "opencv lib directory." FORCE)
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENCV_LIB_DIR}")
if(WIN32)
message(FATAL_ERROR "NOT SUPPORT WINDOWS NOW, OPENCV")
elseif(APPLE)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-arm64-3.4.16.tgz")
else()
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-x86_64-3.4.16.tgz")
endif()
else()
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-3.4.16.tgz")
if(ENABLE_OPENCV_CUDA)
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-gpu-3.4.16.tgz")
endif()
endif()
include_directories(${OPENCV_INC_DIR}
)# For OPENCV code to include internal headers.
set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib/)
if(WIN32)
message(FATAL_ERROR "NOT SUPPORT WEINDOWS, OPENCV")
elseif(APPLE)
set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.dylib)
set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.dylib)
set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.dylib)
set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.dylib)
else()
set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib64)
set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.so)
set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.so)
set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.so)
set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.so)
set(OPENCV_CUDAARITHM_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaarithm.so)
set(OPENCV_CUDAIMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaimgproc.so)
set(OPENCV_CUDAWARPING_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudawarping.so)
endif()
if(WIN32)
message(FATAL_ERROR "NOT SUPPORT WINDOWS, OPENCV")
else()
ExternalProject_Add(
${OPENCV_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${OPENCV_URL}
PREFIX ${OPENCV_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${OPENCV_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${OPENCV_INSTALL_DIR} &&
${CMAKE_COMMAND} -E rename ${OPENCV_SOURCE_LIB} ${OPENCV_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${OPENCV_SOURCE_DIR}/include/
${OPENCV_INC_DIR}
BUILD_BYPRODUCTS ${OPENCV_LIB})
endif()
add_library(external_opencv_core STATIC IMPORTED GLOBAL)
set_property(TARGET external_opencv_core PROPERTY IMPORTED_LOCATION ${OPENCV_CORE_LIB})
add_library(external_opencv_highgui STATIC IMPORTED GLOBAL)
set_property(TARGET external_opencv_highgui PROPERTY IMPORTED_LOCATION ${OPENCV_HIGHGUI_LIB})
add_library(external_opencv_imgproc STATIC IMPORTED GLOBAL)
set_property(TARGET external_opencv_imgproc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGPROC_LIB})
add_library(external_opencv_imgcodesc STATIC IMPORTED GLOBAL)
set_property(TARGET external_opencv_imgcodesc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGCODESC_LIB})
add_dependencies(external_opencv_core ${OPENCV_PROJECT})
add_dependencies(external_opencv_highgui ${OPENCV_PROJECT})
add_dependencies(external_opencv_imgproc ${OPENCV_PROJECT})
add_dependencies(external_opencv_imgcodesc ${OPENCV_PROJECT})
list(APPEND DEPEND_LIBS external_opencv_core external_opencv_highgui external_opencv_imgproc external_opencv_imgcodesc)
if(ENABLE_OPENCV_CUDA)
add_library(extern_opencv_cudawarping STATIC IMPORTED GLOBAL)
set_property(TARGET extern_opencv_cudawarping PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAWARPING_LIB})
add_dependencies(extern_opencv_cudawarping ${OPENCV_PROJECT})
add_library(extern_opencv_cudaarithm STATIC IMPORTED GLOBAL)
set_property(TARGET extern_opencv_cudaarithm PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAARITHM_LIB})
add_dependencies(extern_opencv_cudaarithm ${OPENCV_PROJECT})
add_library(extern_opencv_cudaimgproc STATIC IMPORTED GLOBAL)
set_property(TARGET extern_opencv_cudaimgproc PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAIMGPROC_LIB})
add_dependencies(extern_opencv_cudaimgproc ${OPENCV_PROJECT})
list(APPEND DEPEND_LIBS extern_opencv_cudawarping extern_opencv_cudaarithm extern_opencv_cudaimgproc)
endif()
endif(WIN32)

80
external/paddle2onnx.cmake vendored Normal file
View File

@@ -0,0 +1,80 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(PADDLE2ONNX_PROJECT "extern_paddle2onnx")
set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx)
set(PADDLE2ONNX_SOURCE_DIR
${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT})
set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx)
set(PADDLE2ONNX_INC_DIR
"${PADDLE2ONNX_INSTALL_DIR}/include"
CACHE PATH "paddle2onnx include directory." FORCE)
set(PADDLE2ONNX_LIB_DIR
"${PADDLE2ONNX_INSTALL_DIR}/lib/"
CACHE PATH "onnxruntime lib directory." FORCE)
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
"${PADDLE2ONNX_LIB_DIR}")
include_directories(${PADDLE2ONNX_INC_DIR})
if(WIN32)
set(PADDLE2ONNX_COMPILE_LIB
"${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib"
CACHE FILEPATH "paddle2onnx compile library." FORCE)
elseif(APPLE)
set(PADDLE2ONNX_COMPILE_LIB
"${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib"
CACHE FILEPATH "paddle2onnx compile library." FORCE)
else()
set(PADDLE2ONNX_COMPILE_LIB
"${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so"
CACHE FILEPATH "paddle2onnx compile library." FORCE)
endif(WIN32)
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
set(PADDLE2ONNX_VERSION "0.9.9")
if(WIN32)
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
elseif(APPLE)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
else()
set(PADDLE2ONNX_FILE "paddle2onnx-osx-x86_64-${PADDLE2ONNX_VERSION}.tgz")
endif()
else()
set(PADDLE2ONNX_FILE "paddle2onnx-linux-x64-${PADDLE2ONNX_VERSION}.tgz")
endif()
set(PADDLE2ONNX_URL "${PADDLE2ONNX_URL_BASE}${PADDLE2ONNX_FILE}")
ExternalProject_Add(
${PADDLE2ONNX_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${PADDLE2ONNX_URL}
PREFIX ${PADDLE2ONNX_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${PADDLE2ONNX_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${PADDLE2ONNX_INSTALL_DIR} &&
${CMAKE_COMMAND} -E rename ${PADDLE2ONNX_SOURCE_DIR}/lib/
${PADDLE2ONNX_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory
${PADDLE2ONNX_SOURCE_DIR}/include ${PADDLE2ONNX_INC_DIR}
BUILD_BYPRODUCTS ${PADDLE2ONNX_COMPILE_LIB})
add_library(external_paddle2onnx STATIC IMPORTED GLOBAL)
set_property(TARGET external_paddle2onnx PROPERTY IMPORTED_LOCATION
${PADDLE2ONNX_COMPILE_LIB})
add_dependencies(external_paddle2onnx ${PADDLE2ONNX_PROJECT})

44
external/summary.cmake vendored Normal file
View File

@@ -0,0 +1,44 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
function(fastdeploy_summary)
message(STATUS "")
message(STATUS "*************FastDeploy Building Summary**********")
message(STATUS " CMake version : ${CMAKE_VERSION}")
message(STATUS " CMake command : ${CMAKE_COMMAND}")
message(STATUS " System : ${CMAKE_SYSTEM_NAME}")
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
message(STATUS " Compile definitions : ${tmp}")
message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}")
message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}")
message(STATUS "")
message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}")
message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}")
message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
if(WITH_GPU)
message(STATUS " WITH_GPU : ${WITH_GPU}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}")
message(STATUS " TRT_DRECTORY : ${TRT_DIRECTORY}")
endif()
message(STATUS " ENABLE_VISION : ${ENABLE_VISION}")
message(STATUS " ENABLE_DEBUG : ${ENABLE_DEBUG}")
message(STATUS " ENABLE_VISION_VISUALIZE : ${ENABLE_VISION_VISUALIZE}")
endfunction()

15
external/utils.cmake vendored Normal file
View File

@@ -0,0 +1,15 @@
# This function comes from https://blog.csdn.net/yindongjie1221/article/details/90614261
function(redefine_file_macro targetname)
get_target_property(source_files "${targetname}" SOURCES)
foreach(sourcefile ${source_files})
get_property(defs SOURCE "${sourcefile}"
PROPERTY COMPILE_DEFINITIONS)
get_filename_component(filepath "${sourcefile}" ABSOLUTE)
string(REPLACE ${PROJECT_SOURCE_DIR}/ "" relpath ${filepath})
list(APPEND defs "__REL_FILE__=\"${relpath}\"")
set_property(
SOURCE "${sourcefile}"
PROPERTY COMPILE_DEFINITIONS ${defs}
)
endforeach()
endfunction()

View File

201
fastdeploy/LICENSE Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,734 @@
This project depends on some open source projects, list as below
--------
1. https://github.com/protocolbuffers/protobuf
Copyright 2008 Google Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Code generated by the Protocol Buffer compiler is owned by the owner
of the input file used when generating it. This code is not
standalone and requires a support library to be linked with it. This
support library is itself covered by the above license.
--------
2. https://github.com/onnx/onnx
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
3. https://github.com/microsoft/onnxruntime
MIT License
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
--------
4. https://github.com/pybind/pybind11
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
external contributions to this project including patches, pull requests, etc.
--------
4. https://github.com/onnx/onnx-tensorrt
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2021 NVIDIA Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
5. https://github.com/opencv/opencv
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------
6. https://github.com/jbeder/yaml-cpp
Copyright (c) 2008-2015 Jesse Beder.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -11,189 +11,31 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from six import text_type as _text_type
from .download import download, download_and_decompress
import argparse
# Since the source code is not fully open sourced,
# currently we will provide the prebuilt library
# and demo codes
import os
__version__ = "0.1.0"
import logging
from .fastdeploy_main import Frontend, Backend, FDDataType, TensorInfo, RuntimeOption, Device
from .fastdeploy_runtime import *
from . import fastdeploy_main as C
from . import vision
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model',
type=_text_type,
default=None,
help='Name of model, which can be listed by --list_models')
parser.add_argument(
'--platform',
type=_text_type,
default=None,
help='Define platform, supports Windows/Linux/Android/iOS.')
parser.add_argument(
'--soc',
type=_text_type,
default=None,
help='Define soc for the platform, supports x86/x86-NVIDIA_GPU/ARM/jetson.'
)
parser.add_argument(
'--save_dir',
type=_text_type,
default=".",
help='Path to download and extract deployment SDK.')
parser.add_argument(
'--list_models',
required=False,
action="store_true",
default=False,
help='List all the supported models.')
parser.add_argument(
'--download_sdk',
required=False,
action="store_true",
default=False,
help='List all the supported models.')
return parser.parse_args()
def TensorInfoStr(tensor_info):
message = "TensorInfo(name : '{}', dtype : '{}', shape : '{}')".format(
tensor_info.name, tensor_info.dtype, tensor_info.shape)
return message
def read_sources():
user_dir = os.path.expanduser('~')
print("Updating the newest sdk information...")
source_cfgs = "https://bj.bcebos.com/paddlehub/fastdeploy/fastdeploy_newest_sources.cfg.1"
if os.path.exists(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")):
os.remove(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1"))
download(source_cfgs, user_dir)
categories = dict()
res = dict()
with open(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) as f:
for line in f:
if line.strip().startswith("#"):
continue
if line.strip() == "":
continue
category, model, plat, soc, url = line.strip().split('\t')
if category not in categories:
categories[category] = set()
categories[category].add(model)
if model not in res:
res[model] = dict()
if plat not in res[model]:
res[model][plat] = dict()
if soc not in res[model][plat]:
res[model][plat][soc] = dict()
res[model][plat][soc] = url
return categories, res
def RuntimeOptionStr(runtime_option):
attrs = dir(runtime_option)
message = "RuntimeOption(\n"
for attr in attrs:
if attr.startswith("__"):
continue
message += " {} : {}\t\n".format(attr, getattr(runtime_option, attr))
message.strip("\n")
message += ")"
return message
def main():
args = parse_arguments()
if not args.list_models and not args.download_sdk:
print(
"Please use flag --list_models to show all the supported models, or use flag --download_sdk to download the specify SDK to deploy you model."
)
return
categories, all_sources = read_sources()
all_models = list(all_sources.keys())
all_models.sort()
if args.list_models:
print("Currently, FastDeploy supports {} models, list as below,\n".format(
len(all_models)))
for k, v in categories.items():
print("\nModel Category: {}".format(k))
print("_"*100)
models = list(categories[k])
models.sort()
i = 0
while i < len(models):
if i == len(models) - 1:
print(models[i].center(30))
i += 1
elif i == len(models) - 2:
print(models[i].center(30), models[i+1].center(30))
i += 2
else:
print(models[i].center(30), models[i+1].center(30), models[i+2].center(30))
i += 3
return
if not os.path.exists(args.save_dir):
print("The specified save_dir: {} is not exist.".format(args.save_dir))
return
if args.model is None or args.model == "":
print(
"Please define --model to choose which kind of model to deploy, use --list_models to show all the supported models."
)
return
if args.model not in all_sources:
print(
"{} is not supported, use --list_models to list all the models FastDeploy supported.".
format(args.model))
return
if args.platform is None or args.platform == "":
print(
"Please define --platform to choose which platform to deploy, supports windows/linux/android/ios."
)
return
if args.platform not in all_sources[args.model]:
print(
"The model:{} only supports platform of {}, {} is not supported now.".
format(args.model,
list(all_sources[args.model].keys()), args.platform))
return
if args.soc is None or args.soc == "":
print(
"Please define --soc to choose which hardware to deploy, for model:{} and platform:{}, the available socs are {}.".
format(args.model, args.platform,
list(all_sources[args.model][args.platform].keys())))
return
if args.soc not in all_sources[args.model][args.platform]:
print(
"The model:{} in platform:{} only supports soc of {}, {} is not supported now.".
format(args.model, args.platform,
list(all_sources[args.model][args.platform].keys()),
args.soc))
return
print("\nDownloading SDK:",
all_sources[args.model][args.platform][args.soc])
save_dir = args.save_dir
sdk_name = os.path.split(all_sources[args.model][args.platform][args.soc])[
-1].strip()
if all_sources[args.model][args.platform][args.soc].count(".zip") > 0:
sdk_name = os.path.split(all_sources[args.model][args.platform][
args.soc])[-1].strip().split(".zip")[0]
new_save_dir = os.path.join(args.save_dir, sdk_name)
if not os.path.exists(new_save_dir):
os.mkdir(new_save_dir)
save_dir = new_save_dir
download_and_decompress(
all_sources[args.model][args.platform][args.soc],
new_save_dir,
rename=sdk_name + ".zip")
os.remove(os.path.join(new_save_dir, sdk_name + ".zip"))
print("Done. All the files of SDK have been extracted in {}.".format(
new_save_dir))
if __name__ == "__main__":
main()
C.TensorInfo.__repr__ = TensorInfoStr
C.RuntimeOption.__repr__ = RuntimeOptionStr

View File

@@ -0,0 +1,48 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
namespace fastdeploy {
struct TensorInfo {
std::string name;
std::vector<int> shape;
FDDataType dtype;
};
class BaseBackend {
public:
bool initialized_ = false;
BaseBackend() {}
virtual bool Initialized() const { return initialized_; }
virtual int NumInputs() const = 0;
virtual int NumOutputs() const = 0;
virtual TensorInfo GetInputInfo(int index) = 0;
virtual TensorInfo GetOutputInfo(int index) = 0;
virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) = 0;
};
} // namespace fastdeploy

View File

@@ -0,0 +1,278 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/ort/ort_backend.h"
#include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h"
#include <memory>
#ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h"
#endif
namespace fastdeploy {
ONNXTensorElementDataType GetOrtDtype(FDDataType fd_dtype) {
if (fd_dtype == FDDataType::FP32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
} else if (fd_dtype == FDDataType::FP64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
} else if (fd_dtype == FDDataType::INT32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
} else if (fd_dtype == FDDataType::INT64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
}
FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype)
<< "." << std::endl;
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
}
FDDataType GetFdDtype(ONNXTensorElementDataType ort_dtype) {
if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
return FDDataType::FP32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
return FDDataType::FP64;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
return FDDataType::INT32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
return FDDataType::INT64;
}
FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
return FDDataType::FP32;
}
void OrtBackend::BuildOption(const OrtBackendOption& option) {
option_ = option;
if (option.graph_optimization_level >= 0) {
session_options_.SetGraphOptimizationLevel(
GraphOptimizationLevel(option.graph_optimization_level));
}
if (option.intra_op_num_threads >= 0) {
session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
}
if (option.inter_op_num_threads >= 0) {
session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
}
if (option.execution_mode >= 0) {
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
}
if (option.use_gpu) {
auto all_providers = Ort::GetAvailableProviders();
bool support_cuda = false;
std::string providers_msg = "";
for (size_t i = 0; i < all_providers.size(); ++i) {
providers_msg = providers_msg + all_providers[i] + ", ";
if (all_providers[i] == "CUDAExecutionProvider") {
support_cuda = true;
}
}
if (!support_cuda) {
FDLogger() << "[WARN] Compiled fastdeploy with onnxruntime doesn't "
"support GPU, the available providers are "
<< providers_msg << "will fallback to CPUExecutionProvider."
<< std::endl;
option_.use_gpu = false;
} else {
FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
std::to_string(option.gpu_id) + ".");
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = option.gpu_id;
session_options_.AppendExecutionProvider_CUDA(cuda_options);
}
}
}
bool OrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const OrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
#ifdef ENABLE_PADDLE_FRONTEND
char* model_content_ptr;
int model_content_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true)) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl;
return false;
}
std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size);
delete model_content_ptr;
model_content_ptr = nullptr;
return InitFromOnnx(onnx_model_proto, option, true);
#else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead."
<< std::endl;
#endif
return false;
}
bool OrtBackend::InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option,
bool from_memory_buffer) {
if (initialized_) {
FDERROR << "OrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
BuildOption(option);
if (from_memory_buffer) {
session_ = {env_, model_file.data(), model_file.size(), session_options_};
} else {
#ifdef _WIN32
session_ = {env_,
std::wstring(model_file.begin(), model_file.end()).c_str(),
session_options_};
#else
session_ = {env_, model_file.c_str(), session_options_};
#endif
}
binding_ = std::make_shared<Ort::IoBinding>(session_);
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
Ort::Allocator allocator(session_, memory_info);
size_t n_inputs = session_.GetInputCount();
for (size_t i = 0; i < n_inputs; ++i) {
auto input_name = session_.GetInputName(i, allocator);
auto type_info = session_.GetInputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type});
allocator.Free(input_name);
}
size_t n_outputs = session_.GetOutputCount();
for (size_t i = 0; i < n_outputs; ++i) {
auto output_name = session_.GetOutputName(i, allocator);
auto type_info = session_.GetOutputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type});
Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
binding_->BindOutput(output_name, out_memory_info);
allocator.Free(output_name);
}
initialized_ = true;
return true;
}
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
const auto info = value.GetTensorTypeAndShapeInfo();
const auto data_type = info.GetElementType();
size_t numel = info.GetElementCount();
tensor->shape = info.GetShape();
if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
tensor->data.resize(numel * sizeof(float));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(float));
tensor->dtype = FDDataType::FP32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
tensor->data.resize(numel * sizeof(int32_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(int32_t));
tensor->dtype = FDDataType::INT32;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
tensor->data.resize(numel * sizeof(int64_t));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(int64_t));
tensor->dtype = FDDataType::INT64;
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
tensor->data.resize(numel * sizeof(double));
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
numel * sizeof(double));
tensor->dtype = FDDataType::FP64;
} else {
FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) +
" while calling OrtBackend::CopyToCpu().");
}
}
bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
// from FDTensor to Ort Inputs
for (size_t i = 0; i < inputs.size(); ++i) {
auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
binding_->BindInput(inputs[i].name.c_str(), ort_value);
}
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
}
// Inference with inputs
try {
session_.Run({}, *(binding_.get()));
} catch (const std::exception& e) {
FDERROR << "Failed to Infer: " << e.what() << std::endl;
return false;
}
// Copy result after inference
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
outputs->resize(ort_outputs.size());
for (size_t i = 0; i < ort_outputs.size(); ++i) {
(*outputs)[i].name = outputs_desc_[i].name;
CopyToCpu(ort_outputs[i], &((*outputs)[i]));
}
return true;
}
TensorInfo OrtBackend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
" should less than the number of inputs:" +
std::to_string(NumInputs()) + ".");
TensorInfo info;
info.name = inputs_desc_[index].name;
info.shape.assign(inputs_desc_[index].shape.begin(),
inputs_desc_[index].shape.end());
info.dtype = GetFdDtype(inputs_desc_[index].dtype);
return info;
}
TensorInfo OrtBackend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index:" + std::to_string(index) +
" should less than the number of outputs:" +
std::to_string(NumOutputs()) + ".");
TensorInfo info;
info.name = outputs_desc_[index].name;
info.shape.assign(outputs_desc_[index].shape.begin(),
outputs_desc_[index].shape.end());
info.dtype = GetFdDtype(outputs_desc_[index].dtype);
return info;
}
} // namespace fastdeploy

View File

@@ -0,0 +1,84 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy {
struct OrtValueInfo {
std::string name;
std::vector<int64_t> shape;
ONNXTensorElementDataType dtype;
};
struct OrtBackendOption {
// -1 means default
// 0: ORT_DISABLE_ALL
// 1: ORT_ENABLE_BASIC
// 2: ORT_ENABLE_EXTENDED
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
int graph_optimization_level = -1;
int intra_op_num_threads = -1;
int inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL
// 1: ORT_PARALLEL
int execution_mode = -1;
bool use_gpu = false;
int gpu_id = 0;
};
class OrtBackend : public BaseBackend {
public:
OrtBackend() {}
void BuildOption(const OrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const OrtBackendOption& option = OrtBackendOption(),
bool from_memory_buffer = false);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return inputs_desc_.size(); }
int NumOutputs() const { return outputs_desc_.size(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
private:
Ort::Env env_;
Ort::Session session_{nullptr};
Ort::SessionOptions session_options_;
std::shared_ptr<Ort::IoBinding> binding_;
std::vector<OrtValueInfo> inputs_desc_;
std::vector<OrtValueInfo> outputs_desc_;
OrtBackendOption option_;
void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
};
} // namespace fastdeploy

View File

@@ -0,0 +1,67 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) {
if (fd_dtype == FDDataType::FP32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
} else if (fd_dtype == FDDataType::FP64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
} else if (fd_dtype == FDDataType::INT32) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
} else if (fd_dtype == FDDataType::INT64) {
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
}
FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype)
<< "." << std::endl;
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
}
FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) {
if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
return FDDataType::FP32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
return FDDataType::FP64;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
return FDDataType::INT32;
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
return FDDataType::INT64;
}
FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
return FDDataType::FP32;
}
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) {
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
"Only support tensor which device is CPU or GPU for OrtBackend.");
if (tensor.device == Device::GPU && is_backend_cuda) {
Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
OrtMemTypeDefault);
auto ort_value = Ort::Value::CreateTensor(
memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
tensor.shape.size(), GetOrtDtype(tensor.dtype));
return ort_value;
}
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
auto ort_value = Ort::Value::CreateTensor(
memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
tensor.shape.size(), GetOrtDtype(tensor.dtype));
return ort_value;
}
} // namespace fastdeploy

View File

@@ -0,0 +1,39 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy {
// Convert FDDataType to OrtDataType
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype);
// Convert OrtDataType to FDDataType
FDDataType GetFdDtype(const ONNXTensorElementDataType* ort_dtype);
// Create Ort::Value
// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
// While is_backend_cuda = true, and tensor.device = Device::GPU
// Will directly share the cuda data in tensor to OrtValue
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false);
} // namespace fastdeploy

View File

@@ -0,0 +1,342 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BATCH_STREAM_H
#define BATCH_STREAM_H
#include "NvInfer.h"
#include "common.h"
#include <algorithm>
#include <stdio.h>
#include <vector>
class IBatchStream {
public:
virtual void reset(int firstBatch) = 0;
virtual bool next() = 0;
virtual void skip(int skipCount) = 0;
virtual float* getBatch() = 0;
virtual float* getLabels() = 0;
virtual int getBatchesRead() const = 0;
virtual int getBatchSize() const = 0;
virtual nvinfer1::Dims getDims() const = 0;
};
class MNISTBatchStream : public IBatchStream {
public:
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
const std::string& labelsFile,
const std::vector<std::string>& directories)
: mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
//!< We already know the dimensions of MNIST images.
{
readDataFile(locateFile(dataFile, directories));
readLabelsFile(locateFile(labelsFile, directories));
}
void reset(int firstBatch) override { mBatchCount = firstBatch; }
bool next() override {
if (mBatchCount >= mMaxBatches) {
return false;
}
++mBatchCount;
return true;
}
void skip(int skipCount) override { mBatchCount += skipCount; }
float* getBatch() override {
return mData.data() +
(mBatchCount * mBatchSize * samplesCommon::volume(mDims));
}
float* getLabels() override {
return mLabels.data() + (mBatchCount * mBatchSize);
}
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override {
return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
}
private:
void readDataFile(const std::string& dataFilePath) {
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
int magicNumber, numImages, imageH, imageW;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2051 &&
"Magic Number does not match the expected value for an MNIST image "
"set");
// Read number of images and dimensions
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
numImages = samplesCommon::swapEndianness(numImages);
imageH = samplesCommon::swapEndianness(imageH);
imageW = samplesCommon::swapEndianness(imageW);
// The MNIST data is made up of unsigned bytes, so we need to cast to float
// and normalize.
int numElements = numImages * imageH * imageW;
std::vector<uint8_t> rawData(numElements);
file.read(reinterpret_cast<char*>(rawData.data()),
numElements * sizeof(uint8_t));
mData.resize(numElements);
std::transform(rawData.begin(), rawData.end(), mData.begin(),
[](uint8_t val) { return static_cast<float>(val) / 255.f; });
}
void readLabelsFile(const std::string& labelsFilePath) {
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
int magicNumber, numImages;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2049 &&
"Magic Number does not match the expected value for an MNIST labels "
"file");
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
numImages = samplesCommon::swapEndianness(numImages);
std::vector<uint8_t> rawLabels(numImages);
file.read(reinterpret_cast<char*>(rawLabels.data()),
numImages * sizeof(uint8_t));
mLabels.resize(numImages);
std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
[](uint8_t val) { return static_cast<float>(val); });
}
int mBatchSize{0};
int mBatchCount{
0}; //!< The batch that will be read on the next invocation of next()
int mMaxBatches{0};
Dims mDims{};
std::vector<float> mData{};
std::vector<float> mLabels{};
};
class BatchStream : public IBatchStream {
public:
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::string suffix, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
mSuffix(suffix), mDataDir(directories) {
FILE* file = fopen(
locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
"rb");
ASSERT(file != nullptr);
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
mDims.nbDims = 4; // The number of dimensions.
mDims.d[0] = d[0]; // Batch Size
mDims.d[1] = d[1]; // Channels
mDims.d[2] = d[2]; // Height
mDims.d[3] = d[3]; // Width
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
mDims.d[3] > 0);
fclose(file);
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
BatchStream(int batchSize, int maxBatches, std::string prefix,
std::vector<std::string> directories)
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
std::string listFile, std::vector<std::string> directories)
: mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
mListFile(listFile), mDataDir(directories) {
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
// Resets data members
void reset(int firstBatch) override {
mBatchCount = 0;
mFileCount = 0;
mFileBatchPos = mDims.d[0];
skip(firstBatch);
}
// Advance to next batch and return true, or return false if there is no batch
// left.
bool next() override {
if (mBatchCount == mMaxBatches) {
return false;
}
for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
batchPos += csize, mFileBatchPos += csize) {
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
if (mFileBatchPos == mDims.d[0] && !update()) {
return false;
}
// copy the smaller of: elements left to fulfill the request, or elements
// left in the file buffer.
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
csize * mImageSize, getBatch() + batchPos * mImageSize);
std::copy_n(getFileLabels() + mFileBatchPos, csize,
getLabels() + batchPos);
}
mBatchCount++;
return true;
}
// Skips the batches
void skip(int skipCount) override {
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
mFileBatchPos == mDims.d[0]) {
mFileCount += skipCount * mBatchSize / mDims.d[0];
return;
}
int x = mBatchCount;
for (int i = 0; i < skipCount; i++) {
next();
}
mBatchCount = x;
}
float* getBatch() override { return mBatch.data(); }
float* getLabels() override { return mLabels.data(); }
int getBatchesRead() const override { return mBatchCount; }
int getBatchSize() const override { return mBatchSize; }
nvinfer1::Dims getDims() const override { return mDims; }
private:
float* getFileBatch() { return mFileBatch.data(); }
float* getFileLabels() { return mFileLabels.data(); }
bool update() {
if (mListFile.empty()) {
std::string inputFileName = locateFile(
mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
FILE* file = fopen(inputFileName.c_str(), "rb");
if (!file) {
return false;
}
int d[4];
size_t readSize = fread(d, sizeof(int), 4, file);
ASSERT(readSize == 4);
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
mDims.d[3] == d[3]);
size_t readInputCount =
fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
size_t readLabelCount =
fread(getFileLabels(), sizeof(float), mDims.d[0], file);
ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
fclose(file);
} else {
std::vector<std::string> fNames;
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
if (!file) {
return false;
}
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
file.seekg(((mBatchCount * mBatchSize)) * 7);
for (int i = 1; i <= mBatchSize; i++) {
std::string sName;
std::getline(file, sName);
sName = sName + ".ppm";
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
fNames.emplace_back(sName);
}
mFileCount++;
const int imageC = 3;
const int imageH = 300;
const int imageW = 300;
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
fNames.size());
for (uint32_t i = 0; i < fNames.size(); ++i) {
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
}
std::vector<float> data(samplesCommon::volume(mDims));
const float scale = 2.0 / 255.0;
const float bias = 1.0;
long int volChl = mDims.d[2] * mDims.d[3];
// Normalize input data
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
i < mBatchSize; ++i) {
for (int c = 0; c < mDims.d[1]; ++c) {
for (int j = 0; j < volChl; ++j) {
data[i * volImg + c * volChl + j] =
scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
}
}
}
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
}
mFileBatchPos = 0;
return true;
}
int mBatchSize{0};
int mMaxBatches{0};
int mBatchCount{0};
int mFileCount{0};
int mFileBatchPos{0};
int mImageSize{0};
std::vector<float> mBatch; //!< Data for the batch
std::vector<float> mLabels; //!< Labels for the batch
std::vector<float> mFileBatch; //!< List of image files
std::vector<float> mFileLabels; //!< List of label files
std::string mPrefix; //!< Batch file name prefix
std::string mSuffix; //!< Batch file name suffix
nvinfer1::Dims mDims; //!< Input dimensions
std::string mListFile; //!< File name of the list of image names
std::vector<std::string>
mDataDir; //!< Directories where the files can be found
};
#endif

View File

@@ -0,0 +1 @@
exclude_files=.*

View File

@@ -0,0 +1,118 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "BatchStream.h"
#include "NvInfer.h"
//! \class EntropyCalibratorImpl
//!
//! \brief Implements common functionality for Entropy calibrators.
//!
template <typename TBatchStream> class EntropyCalibratorImpl {
public:
EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
std::string networkName, const char* inputBlobName,
bool readCache = true)
: mStream{stream},
mCalibrationTableName("CalibrationTable" + networkName),
mInputBlobName(inputBlobName), mReadCache(readCache) {
nvinfer1::Dims dims = mStream.getDims();
mInputCount = samplesCommon::volume(dims);
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
mStream.reset(firstBatch);
}
virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
int getBatchSize() const noexcept { return mStream.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept {
if (!mStream.next()) {
return false;
}
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
mInputCount * sizeof(float), cudaMemcpyHostToDevice));
ASSERT(!strcmp(names[0], mInputBlobName));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) noexcept {
mCalibrationCache.clear();
std::ifstream input(mCalibrationTableName, std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good()) {
std::copy(std::istream_iterator<char>(input),
std::istream_iterator<char>(),
std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? mCalibrationCache.data() : nullptr;
}
void writeCalibrationCache(const void* cache, size_t length) noexcept {
std::ofstream output(mCalibrationTableName, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
private:
TBatchStream mStream;
size_t mInputCount;
std::string mCalibrationTableName;
const char* mInputBlobName;
bool mReadCache{true};
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache;
};
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
template <typename TBatchStream>
class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
public:
Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
const char* networkName, const char* inputBlobName,
bool readCache = true)
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept override {
return mImpl.getBatch(bindings, names, nbBindings);
}
const void* readCalibrationCache(size_t& length) noexcept override {
return mImpl.readCalibrationCache(length);
}
void writeCalibrationCache(const void* cache,
size_t length) noexcept override {
mImpl.writeCalibrationCache(cache, length);
}
private:
EntropyCalibratorImpl<TBatchStream> mImpl;
};
#endif // ENTROPY_CALIBRATOR_H

View File

@@ -0,0 +1,115 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ERROR_RECORDER_H
#define ERROR_RECORDER_H
#include "NvInferRuntimeCommon.h"
#include "logger.h"
#include <atomic>
#include <cstdint>
#include <exception>
#include <mutex>
#include <vector>
using nvinfer1::ErrorCode;
using nvinfer1::IErrorRecorder;
//!
//! A simple implementation of the IErrorRecorder interface for
//! use by samples. This interface also can be used as a reference
//! implementation.
//! The sample Error recorder is based on a vector that pairs the error
//! code and the error string into a single element. It also uses
//! standard mutex's and atomics in order to make sure that the code
//! works in a multi-threaded environment.
//!
class SampleErrorRecorder : public IErrorRecorder {
using errorPair = std::pair<ErrorCode, std::string>;
using errorStack = std::vector<errorPair>;
public:
SampleErrorRecorder() = default;
virtual ~SampleErrorRecorder() noexcept {}
int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
: (*this)[errorIdx].first;
};
IErrorRecorder::ErrorDesc
getErrorDesc(int32_t errorIdx) const noexcept final {
return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
: (*this)[errorIdx].second.c_str();
}
// This class can never overflow since we have dynamic resize via std::vector
// usage.
bool hasOverflowed() const noexcept final { return false; }
// Empty the errorStack.
void clear() noexcept final {
try {
// grab a lock so that there is no addition while clearing.
std::lock_guard<std::mutex> guard(mStackLock);
mErrorStack.clear();
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
};
//! Simple helper function that
bool empty() const noexcept { return mErrorStack.empty(); }
bool reportError(ErrorCode val,
IErrorRecorder::ErrorDesc desc) noexcept final {
try {
std::lock_guard<std::mutex> guard(mStackLock);
sample::gLogError << "Error[" << static_cast<int32_t>(val)
<< "]: " << desc << std::endl;
mErrorStack.push_back(errorPair(val, desc));
} catch (const std::exception& e) {
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
// All errors are considered fatal.
return true;
}
// Atomically increment or decrement the ref counter.
IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
private:
// Simple helper functions.
const errorPair& operator[](size_t index) const noexcept {
return mErrorStack[index];
}
bool invalidIndexCheck(int32_t index) const noexcept {
// By converting signed to unsigned, we only need a single check since
// negative numbers turn into large positive greater than the size.
size_t sIndex = index;
return sIndex >= mErrorStack.size();
}
// Mutex to hold when locking mErrorStack.
std::mutex mStackLock;
// Reference count of the class. Destruction of the class when mRefCount
// is not zero causes undefined behavior.
std::atomic<int32_t> mRefCount{0};
// The error stack that holds the errors recorded by TensorRT.
errorStack mErrorStack;
}; // class SampleErrorRecorder
#endif // ERROR_RECORDER_H

View File

@@ -0,0 +1 @@
目录代码来源自 https://github.com/NVIDIA/TensorRT

View File

@@ -0,0 +1,169 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_ARGS_PARSER_H
#define TENSORRT_ARGS_PARSER_H
#include <string>
#include <vector>
#ifdef _MSC_VER
#include ".\windows\getopt.h"
#else
#include <getopt.h>
#endif
#include <iostream>
namespace samplesCommon {
//!
//! \brief The SampleParams structure groups the basic parameters required by
//! all sample networks.
//!
struct SampleParams {
int32_t batchSize{1}; //!< Number of inputs in a batch
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
bool int8{false}; //!< Allow runnning the network in Int8 mode.
bool fp16{false}; //!< Allow running the network in FP16 mode.
std::vector<std::string>
dataDirs; //!< Directory paths where sample data files are stored
std::vector<std::string> inputTensorNames;
std::vector<std::string> outputTensorNames;
};
//!
//! \brief The CaffeSampleParams structure groups the additional parameters
//! required by
//! networks that use caffe
//!
struct CaffeSampleParams : public SampleParams {
std::string
prototxtFileName; //!< Filename of prototxt design file of a network
std::string
weightsFileName; //!< Filename of trained weights file of a network
std::string meanFileName; //!< Filename of mean file of a network
};
//!
//! \brief The OnnxSampleParams structure groups the additional parameters
//! required by
//! networks that use ONNX
//!
struct OnnxSampleParams : public SampleParams {
std::string onnxFileName; //!< Filename of ONNX file of a network
};
//!
//! \brief The UffSampleParams structure groups the additional parameters
//! required by
//! networks that use Uff
//!
struct UffSampleParams : public SampleParams {
std::string uffFileName; //!< Filename of uff file of a network
};
//!
//! /brief Struct to maintain command-line arguments.
//!
struct Args {
bool runInInt8{false};
bool runInFp16{false};
bool help{false};
int32_t useDLACore{-1};
int32_t batch{1};
std::vector<std::string> dataDirs;
std::string saveEngine;
std::string loadEngine;
bool useILoop{false};
};
//!
//! \brief Populates the Args struct with the provided command-line parameters.
//!
//! \throw invalid_argument if any of the arguments are not valid
//!
//! \return boolean If return value is true, execution can continue, otherwise
//! program should exit
//!
inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
while (1) {
int32_t arg;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"datadir", required_argument, 0, 'd'},
{"int8", no_argument, 0, 'i'},
{"fp16", no_argument, 0, 'f'},
{"useILoop", no_argument, 0, 'l'},
{"saveEngine", required_argument, 0, 's'},
{"loadEngine", no_argument, 0, 'o'},
{"useDLACore", required_argument, 0, 'u'},
{"batch", required_argument, 0, 'b'},
{nullptr, 0, nullptr, 0}};
int32_t option_index = 0;
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
if (arg == -1) {
break;
}
switch (arg) {
case 'h':
args.help = true;
return true;
case 'd':
if (optarg) {
args.dataDirs.push_back(optarg);
} else {
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
return false;
}
break;
case 's':
if (optarg) {
args.saveEngine = optarg;
}
break;
case 'o':
if (optarg) {
args.loadEngine = optarg;
}
break;
case 'i':
args.runInInt8 = true;
break;
case 'f':
args.runInFp16 = true;
break;
case 'l':
args.useILoop = true;
break;
case 'u':
if (optarg) {
args.useDLACore = std::stoi(optarg);
}
break;
case 'b':
if (optarg) {
args.batch = std::stoi(optarg);
}
break;
default:
return false;
}
}
return true;
}
} // namespace samplesCommon
#endif // TENSORRT_ARGS_PARSER_H

View File

@@ -0,0 +1,426 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_BUFFERS_H
#define TENSORRT_BUFFERS_H
#include "NvInfer.h"
#include "common.h"
#include "half.h"
#include <cassert>
#include <cuda_runtime_api.h>
#include <iostream>
#include <iterator>
#include <memory>
#include <new>
#include <numeric>
#include <string>
#include <vector>
namespace samplesCommon {
//!
//! \brief The GenericBuffer class is a templated class for buffers.
//!
//! \details This templated RAII (Resource Acquisition Is Initialization) class
//! handles the allocation,
//! deallocation, querying of buffers on both the device and the host.
//! It can handle data of arbitrary types because it stores byte
//! buffers.
//! The template parameters AllocFunc and FreeFunc are used for the
//! allocation and deallocation of the buffer.
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
//! and returns bool. ptr is a pointer to where the allocated buffer
//! address should be stored.
//! size is the amount of memory in bytes to allocate.
//! The boolean indicates whether or not the memory allocation was
//! successful.
//! FreeFunc must be a functor that takes in (void* ptr) and returns
//! void.
//! ptr is the allocated buffer address. It must work with nullptr
//! input.
//!
template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
public:
//!
//! \brief Construct an empty buffer.
//!
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
//!
//! \brief Construct a buffer with the specified allocation size in bytes.
//!
GenericBuffer(size_t size, nvinfer1::DataType type)
: mSize(size), mCapacity(size), mType(type) {
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc();
}
}
GenericBuffer(GenericBuffer&& buf)
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
mBuffer(buf.mBuffer) {
buf.mSize = 0;
buf.mCapacity = 0;
buf.mType = nvinfer1::DataType::kFLOAT;
buf.mBuffer = nullptr;
}
GenericBuffer& operator=(GenericBuffer&& buf) {
if (this != &buf) {
freeFn(mBuffer);
mSize = buf.mSize;
mCapacity = buf.mCapacity;
mType = buf.mType;
mBuffer = buf.mBuffer;
// Reset buf.
buf.mSize = 0;
buf.mCapacity = 0;
buf.mBuffer = nullptr;
}
return *this;
}
//!
//! \brief Returns pointer to underlying array.
//!
void* data() { return mBuffer; }
//!
//! \brief Returns pointer to underlying array.
//!
const void* data() const { return mBuffer; }
//!
//! \brief Returns the size (in number of elements) of the buffer.
//!
size_t size() const { return mSize; }
//!
//! \brief Returns the size (in bytes) of the buffer.
//!
size_t nbBytes() const {
return this->size() * samplesCommon::getElementSize(mType);
}
//!
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
//! or equal to the current capacity.
//!
void resize(size_t newSize) {
mSize = newSize;
if (mCapacity < newSize) {
freeFn(mBuffer);
if (!allocFn(&mBuffer, this->nbBytes())) {
throw std::bad_alloc{};
}
mCapacity = newSize;
}
}
//!
//! \brief Overload of resize that accepts Dims
//!
void resize(const nvinfer1::Dims& dims) {
return this->resize(samplesCommon::volume(dims));
}
~GenericBuffer() { freeFn(mBuffer); }
private:
size_t mSize{0}, mCapacity{0};
nvinfer1::DataType mType;
void* mBuffer;
AllocFunc allocFn;
FreeFunc freeFn;
};
class DeviceAllocator {
public:
bool operator()(void** ptr, size_t size) const {
return cudaMalloc(ptr, size) == cudaSuccess;
}
};
class DeviceFree {
public:
void operator()(void* ptr) const { cudaFree(ptr); }
};
class HostAllocator {
public:
bool operator()(void** ptr, size_t size) const {
*ptr = malloc(size);
return *ptr != nullptr;
}
};
class HostFree {
public:
void operator()(void* ptr) const { free(ptr); }
};
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
//!
//! \brief The ManagedBuffer class groups together a pair of corresponding
//! device and host buffers.
//!
class ManagedBuffer {
public:
DeviceBuffer deviceBuffer;
HostBuffer hostBuffer;
};
//!
//! \brief The BufferManager class handles host and device buffer allocation
//! and deallocation.
//!
//! \details This RAII class handles host and device buffer allocation and
//! deallocation,
//! memcpy between host and device buffers to aid with inference,
//! and debugging dumps to validate inference. The BufferManager class
//! is meant to be
//! used to simplify buffer management and any interactions between
//! buffers and the engine.
//!
class BufferManager {
public:
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
//!
//! \brief Create a BufferManager for handling buffer interactions with
//! engine.
//!
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
const int batchSize = 0,
const nvinfer1::IExecutionContext* context = nullptr)
: mEngine(engine), mBatchSize(batchSize) {
// Full Dims implies no batch size.
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
// Create host and device buffers
for (int i = 0; i < mEngine->getNbBindings(); i++) {
auto dims = context ? context->getBindingDimensions(i)
: mEngine->getBindingDimensions(i);
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
nvinfer1::DataType type = mEngine->getBindingDataType(i);
int vecDim = mEngine->getBindingVectorizedDim(i);
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
{
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
vol *= scalarsPerVec;
}
vol *= samplesCommon::volume(dims);
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
manBuf->deviceBuffer = DeviceBuffer(vol, type);
manBuf->hostBuffer = HostBuffer(vol, type);
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
mManagedBuffers.emplace_back(std::move(manBuf));
}
}
//!
//! \brief Returns a vector of device buffers that you can use directly as
//! bindings for the execute and enqueue methods of IExecutionContext.
//!
std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
//!
//! \brief Returns a vector of device buffers.
//!
const std::vector<void*>& getDeviceBindings() const {
return mDeviceBindings;
}
//!
//! \brief Returns the device buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getDeviceBuffer(const std::string& tensorName) const {
return getBuffer(false, tensorName);
}
//!
//! \brief Returns the host buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getHostBuffer(const std::string& tensorName) const {
return getBuffer(true, tensorName);
}
//!
//! \brief Returns the size of the host and device buffers that correspond to
//! tensorName.
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
//!
size_t size(const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return kINVALID_SIZE_VALUE;
return mManagedBuffers[index]->hostBuffer.nbBytes();
}
//!
//! \brief Dump host buffer with specified tensorName to ostream.
//! Prints error message to std::ostream if no such tensor can be
//! found.
//!
void dumpBuffer(std::ostream& os, const std::string& tensorName) {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1) {
os << "Invalid tensor name" << std::endl;
return;
}
void* buf = mManagedBuffers[index]->hostBuffer.data();
size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
size_t rowCount = static_cast<size_t>(
bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
int leadDim = mBatchSize;
int* trailDims = bufDims.d;
int nbDims = bufDims.nbDims;
// Fix explicit Dimension networks
if (!leadDim && nbDims > 0) {
leadDim = bufDims.d[0];
++trailDims;
--nbDims;
}
os << "[" << leadDim;
for (int i = 0; i < nbDims; i++)
os << ", " << trailDims[i];
os << "]" << std::endl;
switch (mEngine->getBindingDataType(index)) {
case nvinfer1::DataType::kINT32:
print<int32_t>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kFLOAT:
print<float>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kHALF:
print<half_float::half>(os, buf, bufSize, rowCount);
break;
case nvinfer1::DataType::kINT8:
assert(0 && "Int8 network-level input and output is not supported");
break;
case nvinfer1::DataType::kBOOL:
assert(0 && "Bool network-level input and output are not supported");
break;
}
}
//!
//! \brief Templated print function that dumps buffers of arbitrary type to
//! std::ostream.
//! rowCount parameter controls how many elements are on each line.
//! A rowCount of 1 means that there is only 1 element on each line.
//!
template <typename T>
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
assert(rowCount != 0);
assert(bufSize % sizeof(T) == 0);
T* typedBuf = static_cast<T*>(buf);
size_t numItems = bufSize / sizeof(T);
for (int i = 0; i < static_cast<int>(numItems); i++) {
// Handle rowCount == 1 case
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
os << typedBuf[i] << std::endl;
else if (rowCount == 1)
os << typedBuf[i];
// Handle rowCount > 1 case
else if (i % rowCount == 0)
os << typedBuf[i];
else if (i % rowCount == rowCount - 1)
os << " " << typedBuf[i] << std::endl;
else
os << " " << typedBuf[i];
}
}
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! synchronously.
//!
void copyInputToDevice() { memcpyBuffers(true, false, false); }
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! synchronously.
//!
void copyOutputToHost() { memcpyBuffers(false, true, false); }
//!
//! \brief Copy the contents of input host buffers to input device buffers
//! asynchronously.
//!
void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(true, false, true, stream);
}
//!
//! \brief Copy the contents of output device buffers to output host buffers
//! asynchronously.
//!
void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
memcpyBuffers(false, true, true, stream);
}
~BufferManager() = default;
private:
void* getBuffer(const bool isHost, const std::string& tensorName) const {
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return nullptr;
return (isHost ? mManagedBuffers[index]->hostBuffer.data()
: mManagedBuffers[index]->deviceBuffer.data());
}
void memcpyBuffers(const bool copyInput, const bool deviceToHost,
const bool async, const cudaStream_t& stream = 0) {
for (int i = 0; i < mEngine->getNbBindings(); i++) {
void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
: mManagedBuffers[i]->deviceBuffer.data();
const void* srcPtr = deviceToHost
? mManagedBuffers[i]->deviceBuffer.data()
: mManagedBuffers[i]->hostBuffer.data();
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
const cudaMemcpyKind memcpyType =
deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
if ((copyInput && mEngine->bindingIsInput(i)) ||
(!copyInput && !mEngine->bindingIsInput(i))) {
if (async)
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
else
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
}
}
}
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
std::vector<std::unique_ptr<ManagedBuffer>>
mManagedBuffers; //!< The vector of pointers to managed buffers
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
//! for engine execution
};
} // namespace samplesCommon
#endif // TENSORRT_BUFFERS_H

View File

@@ -0,0 +1,844 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_COMMON_H
#define TENSORRT_COMMON_H
// For loadLibrary
#ifdef _MSC_VER
// Needed so that the max/min definitions in windows.h do not conflict with
// std::max/min.
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX
#else
#include <dlfcn.h>
#endif
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cmath>
#include <cstring>
#include <cuda_runtime_api.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <new>
#include <numeric>
#include <ratio>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "safeCommon.h"
using namespace nvinfer1;
using namespace plugin;
#ifdef _MSC_VER
#define FN_NAME __FUNCTION__
#else
#define FN_NAME __func__
#endif
#if defined(__aarch64__) || defined(__QNX__)
#define ENABLE_DLA_API 1
#endif
#define CHECK_RETURN_W_MSG(status, val, errMsg) \
do { \
if (!(status)) { \
sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
<< FN_NAME << "(), line " << __LINE__ << std::endl; \
return val; \
} \
} while (0)
#undef ASSERT
#define ASSERT(condition) \
do { \
if (!(condition)) { \
sample::gLogError << "Assertion failure: " << #condition << std::endl; \
abort(); \
} \
} while (0)
#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
auto deleter = [](T* t) { t->destroy(); };
return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
}
constexpr long double operator"" _GiB(long double val) {
return val * (1 << 30);
}
constexpr long double operator"" _MiB(long double val) {
return val * (1 << 20);
}
constexpr long double operator"" _KiB(long double val) {
return val * (1 << 10);
}
// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
// Since the return type is signed, -1_GiB will work as expected.
constexpr long long int operator"" _GiB(unsigned long long val) {
return val * (1 << 30);
}
constexpr long long int operator"" _MiB(unsigned long long val) {
return val * (1 << 20);
}
constexpr long long int operator"" _KiB(unsigned long long val) {
return val * (1 << 10);
}
struct SimpleProfiler : public nvinfer1::IProfiler {
struct Record {
float time{0};
int count{0};
};
virtual void reportLayerTime(const char* layerName, float ms) noexcept {
mProfile[layerName].count++;
mProfile[layerName].time += ms;
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
mLayerNames.end()) {
mLayerNames.push_back(layerName);
}
}
SimpleProfiler(const char* name,
const std::vector<SimpleProfiler>& srcProfilers =
std::vector<SimpleProfiler>())
: mName(name) {
for (const auto& srcProfiler : srcProfilers) {
for (const auto& rec : srcProfiler.mProfile) {
auto it = mProfile.find(rec.first);
if (it == mProfile.end()) {
mProfile.insert(rec);
} else {
it->second.time += rec.second.time;
it->second.count += rec.second.count;
}
}
}
}
friend std::ostream& operator<<(std::ostream& out,
const SimpleProfiler& value) {
out << "========== " << value.mName << " profile ==========" << std::endl;
float totalTime = 0;
std::string layerNameStr = "TensorRT layer name";
int maxLayerNameLength =
std::max(static_cast<int>(layerNameStr.size()), 70);
for (const auto& elem : value.mProfile) {
totalTime += elem.second.time;
maxLayerNameLength =
std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
}
auto old_settings = out.flags();
auto old_precision = out.precision();
// Output header
{
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
out << std::setw(12) << "Runtime, "
<< "%"
<< " ";
out << std::setw(12) << "Invocations"
<< " ";
out << std::setw(12) << "Runtime, ms" << std::endl;
}
for (size_t i = 0; i < value.mLayerNames.size(); i++) {
const std::string layerName = value.mLayerNames[i];
auto elem = value.mProfile.at(layerName);
out << std::setw(maxLayerNameLength) << layerName << " ";
out << std::setw(12) << std::fixed << std::setprecision(1)
<< (elem.time * 100.0F / totalTime) << "%"
<< " ";
out << std::setw(12) << elem.count << " ";
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
<< std::endl;
}
out.flags(old_settings);
out.precision(old_precision);
out << "========== " << value.mName << " total runtime = " << totalTime
<< " ms ==========" << std::endl;
return out;
}
private:
std::string mName;
std::vector<std::string> mLayerNames;
std::map<std::string, Record> mProfile;
};
//! Locate path to file, given its filename or filepath suffix and possible dirs
//! it might lie in.
//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
//! file path.
inline std::string locateFile(const std::string& filepathSuffix,
const std::vector<std::string>& directories,
bool reportError = true) {
const int MAX_DEPTH{10};
bool found{false};
std::string filepath;
for (auto& dir : directories) {
if (!dir.empty() && dir.back() != '/') {
#ifdef _MSC_VER
filepath = dir + "\\" + filepathSuffix;
#else
filepath = dir + "/" + filepathSuffix;
#endif
} else {
filepath = dir + filepathSuffix;
}
for (int i = 0; i < MAX_DEPTH && !found; i++) {
const std::ifstream checkFile(filepath);
found = checkFile.is_open();
if (found) {
break;
}
filepath = "../" + filepath; // Try again in parent dir
}
if (found) {
break;
}
filepath.clear();
}
// Could not find the file
if (filepath.empty()) {
const std::string dirList = std::accumulate(
directories.begin() + 1, directories.end(), directories.front(),
[](const std::string& a, const std::string& b) {
return a + "\n\t" + b;
});
std::cout << "Could not find " << filepathSuffix
<< " in data directories:\n\t" << dirList << std::endl;
if (reportError) {
std::cout << "&&&& FAILED" << std::endl;
exit(EXIT_FAILURE);
}
}
return filepath;
}
inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
int inW) {
std::ifstream infile(fileName, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
std::string magic, h, w, max;
infile >> magic >> h >> w >> max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(buffer), inH * inW);
}
namespace samplesCommon {
// Swaps endianness of an integral type.
template <typename T,
typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
inline T swapEndianness(const T& value) {
uint8_t bytes[sizeof(T)];
for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
}
return *reinterpret_cast<T*>(bytes);
}
class HostMemory {
public:
HostMemory() = delete;
virtual void* data() const noexcept { return mData; }
virtual std::size_t size() const noexcept { return mSize; }
virtual DataType type() const noexcept { return mType; }
virtual ~HostMemory() {}
protected:
HostMemory(std::size_t size, DataType type)
: mData{nullptr}, mSize(size), mType(type) {}
void* mData;
std::size_t mSize;
DataType mType;
};
template <typename ElemType, DataType dataType>
class TypedHostMemory : public HostMemory {
public:
explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
mData = new ElemType[size];
};
~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
};
using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
inline void* safeCudaMalloc(size_t memSize) {
void* deviceMem;
CHECK(cudaMalloc(&deviceMem, memSize));
if (deviceMem == nullptr) {
std::cerr << "Out of memory" << std::endl;
exit(1);
}
return deviceMem;
}
inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
struct InferDeleter {
template <typename T> void operator()(T* obj) const { delete obj; }
};
template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
static auto StreamDeleter = [](cudaStream_t* pStream) {
if (pStream) {
cudaStreamDestroy(*pStream);
delete pStream;
}
};
inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
new cudaStream_t, StreamDeleter);
if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
cudaSuccess) {
pStream.reset(nullptr);
}
return pStream;
}
//! Return vector of indices that puts magnitudes of sequence in descending
//! order.
template <class Iter>
std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
std::vector<size_t> indices(end - begin);
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
return std::abs(begin[j]) < std::abs(begin[i]);
});
return indices;
}
inline bool readReferenceFile(const std::string& fileName,
std::vector<std::string>& refVector) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
"that is not open."
<< std::endl;
return false;
}
std::string line;
while (std::getline(infile, line)) {
if (line.empty())
continue;
refVector.push_back(line);
}
infile.close();
return true;
}
template <typename T>
std::vector<std::string> classify(const std::vector<std::string>& refVector,
const std::vector<T>& output,
const size_t topK) {
const auto inds =
samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
std::vector<std::string> result;
result.reserve(topK);
for (size_t k = 0; k < topK; ++k) {
result.push_back(refVector[inds[k]]);
}
return result;
}
// Returns indices of highest K magnitudes in v.
template <typename T>
std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
std::vector<size_t> indices =
samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
indices.resize(k);
return indices;
}
template <typename T>
bool readASCIIFile(const std::string& fileName, const size_t size,
std::vector<T>& out) {
std::ifstream infile(fileName);
if (!infile.is_open()) {
std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
"not open."
<< std::endl;
return false;
}
out.clear();
out.reserve(size);
out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
infile.close();
return true;
}
template <typename T>
bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
std::ofstream outfile(fileName);
if (!outfile.is_open()) {
std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
"not open."
<< std::endl;
return false;
}
for (auto fn : in) {
outfile << fn << "\n";
}
outfile.close();
return true;
}
inline void print_version() {
std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "."
<< NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
<< NV_TENSORRT_BUILD << std::endl;
}
inline std::string getFileType(const std::string& filepath) {
return filepath.substr(filepath.find_last_of(".") + 1);
}
inline std::string toLower(const std::string& inp) {
std::string out = inp;
std::transform(out.begin(), out.end(), out.begin(), ::tolower);
return out;
}
inline float getMaxValue(const float* buffer, int64_t size) {
assert(buffer != nullptr);
assert(size > 0);
return *std::max_element(buffer, buffer + size);
}
// Ensures that every tensor used by a network has a dynamic range set.
//
// All tensors in a network must have a dynamic range specified if a calibrator
// is not used.
// This function is just a utility to globally fill in missing scales and
// zero-points for the entire network.
//
// If a tensor does not have a dyanamic range set, it is assigned inRange or
// outRange as follows:
//
// * If the tensor is the input to a layer or output of a pooling node, its
// dynamic range is derived from inRange.
// * Otherwise its dynamic range is derived from outRange.
//
// The default parameter values are intended to demonstrate, for final layers in
// the network,
// cases where dynamic ranges are asymmetric.
//
// The default parameter values choosen arbitrarily. Range values should be
// choosen such that
// we avoid underflow or overflow. Also range value should be non zero to avoid
// uniform zero scale tensor.
inline void setAllDynamicRanges(INetworkDefinition* network,
float inRange = 2.0f, float outRange = 4.0f) {
// Ensure that all layer inputs have a scale.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbInputs(); j++) {
ITensor* input{layer->getInput(j)};
// Optional inputs are nullptr here and are from RNN layers.
if (input != nullptr && !input->dynamicRangeIsSet()) {
ASSERT(input->setDynamicRange(-inRange, inRange));
}
}
}
// Ensure that all layer outputs have a scale.
// Tensors that are also inputs to layers are ingored here
// since the previous loop nest assigned scales to them.
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
for (int j = 0; j < layer->getNbOutputs(); j++) {
ITensor* output{layer->getOutput(j)};
// Optional outputs are nullptr here and are from RNN layers.
if (output != nullptr && !output->dynamicRangeIsSet()) {
// Pooling must have the same input and output scales.
if (layer->getType() == LayerType::kPOOLING) {
ASSERT(output->setDynamicRange(-inRange, inRange));
} else {
ASSERT(output->setDynamicRange(-outRange, outRange));
}
}
}
}
}
inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
INetworkDefinition* n) {
// Set dummy per-tensor dynamic range if Int8 mode is requested.
if (c->getFlag(BuilderFlag::kINT8)) {
sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
"per-tensor dynamic range. Int8 accuracy is not "
"guaranteed."
<< std::endl;
setAllDynamicRanges(n);
}
}
inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
bool allowGPUFallback = true) {
if (useDLACore >= 0) {
if (builder->getNbDLACores() == 0) {
std::cerr << "Trying to use DLA core " << useDLACore
<< " on a platform that doesn't have any DLA cores"
<< std::endl;
assert(
"Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
false);
}
if (allowGPUFallback) {
config->setFlag(BuilderFlag::kGPU_FALLBACK);
}
if (!config->getFlag(BuilderFlag::kINT8)) {
// User has not requested INT8 Mode.
// By default run in FP16 mode. FP32 mode is not permitted.
config->setFlag(BuilderFlag::kFP16);
}
config->setDefaultDeviceType(DeviceType::kDLA);
config->setDLACore(useDLACore);
}
}
inline int32_t parseDLA(int32_t argc, char** argv) {
for (int32_t i = 1; i < argc; i++) {
if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
return std::stoi(argv[i] + 13);
}
}
return -1;
}
inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
switch (t) {
case nvinfer1::DataType::kINT32:
return 4;
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
inline int64_t volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
}
template <int C, int H, int W> struct PPM {
std::string magic, fileName;
int h, w, max;
uint8_t buffer[C * H * W];
};
// New vPPM(variable sized PPM) class with variable dimensions.
struct vPPM {
std::string magic, fileName;
int h, w, max;
std::vector<uint8_t> buffer;
};
struct BBox {
float x1, y1, x2, y2;
};
template <int C, int H, int W>
void readPPMFile(const std::string& filename,
samplesCommon::PPM<C, H, W>& ppm) {
ppm.fileName = filename;
std::ifstream infile(filename, std::ifstream::binary);
assert(infile.is_open() &&
"Attempting to read from a file that is not open.");
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void readPPMFile(const std::string& filename, vPPM& ppm,
std::vector<std::string>& input_dir) {
ppm.fileName = filename;
std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
infile.seekg(1, infile.cur);
for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
ppm.buffer.push_back(0);
}
infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
template <int C, int H, int W>
void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
const BBox& bbox) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
for (int x = x1; x <= x2; ++x) {
// bbox top border
ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
}
for (int y = y1; y <= y2; ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + x1) * 3] = 255;
ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + x2) * 3] = 255;
ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
}
outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
}
inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
std::vector<BBox>& dets) {
std::ofstream outfile("./" + filename, std::ofstream::binary);
assert(!outfile.fail());
outfile << "P6"
<< "\n"
<< ppm.w << " " << ppm.h << "\n"
<< ppm.max << "\n";
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
for (auto bbox : dets) {
for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
// bbox top border
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
// bbox bottom border
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
}
for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
// bbox left border
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
// bbox right border
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
}
}
outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
}
class TimerBase {
public:
virtual void start() {}
virtual void stop() {}
float microseconds() const noexcept { return mMs * 1000.f; }
float milliseconds() const noexcept { return mMs; }
float seconds() const noexcept { return mMs / 1000.f; }
void reset() noexcept { mMs = 0.f; }
protected:
float mMs{0.0f};
};
class GpuTimer : public TimerBase {
public:
explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
CHECK(cudaEventCreate(&mStart));
CHECK(cudaEventCreate(&mStop));
}
~GpuTimer() {
CHECK(cudaEventDestroy(mStart));
CHECK(cudaEventDestroy(mStop));
}
void start() { CHECK(cudaEventRecord(mStart, mStream)); }
void stop() {
CHECK(cudaEventRecord(mStop, mStream));
float ms{0.0f};
CHECK(cudaEventSynchronize(mStop));
CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
mMs += ms;
}
private:
cudaEvent_t mStart, mStop;
cudaStream_t mStream;
}; // class GpuTimer
template <typename Clock> class CpuTimer : public TimerBase {
public:
using clock_type = Clock;
void start() { mStart = Clock::now(); }
void stop() {
mStop = Clock::now();
mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
}
private:
std::chrono::time_point<Clock> mStart, mStop;
}; // class CpuTimer
using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
inline std::vector<std::string> splitString(std::string str,
char delimiter = ',') {
std::vector<std::string> splitVect;
std::stringstream ss(str);
std::string substr;
while (ss.good()) {
getline(ss, substr, delimiter);
splitVect.emplace_back(std::move(substr));
}
return splitVect;
}
// Return m rounded up to nearest multiple of n
inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
inline void loadLibrary(const std::string& path) {
#ifdef _MSC_VER
void* handle = LoadLibrary(path.c_str());
#else
int32_t flags{RTLD_LAZY};
#if ENABLE_ASAN
// https://github.com/google/sanitizers/issues/89
// asan doesn't handle module unloading correctly and there are no plans on
// doing
// so. In order to get proper stack traces, don't delete the shared library on
// close so that asan can resolve the symbols correctly.
flags |= RTLD_NODELETE;
#endif // ENABLE_ASAN
void* handle = dlopen(path.c_str(), flags);
#endif
if (handle == nullptr) {
#ifdef _MSC_VER
sample::gLogError << "Could not load plugin library: " << path << std::endl;
#else
sample::gLogError << "Could not load plugin library: " << path
<< ", due to: " << dlerror() << std::endl;
#endif
}
}
inline int32_t getSMVersion() {
int32_t deviceIndex = 0;
CHECK(cudaGetDevice(&deviceIndex));
int32_t major, minor;
CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
deviceIndex));
CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
deviceIndex));
return ((major << 8) | minor);
}
inline bool isSMSafe() {
const int32_t smVersion = getSMVersion();
return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
}
inline bool isDataTypeSupported(DataType dataType) {
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder) {
return false;
}
if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
(dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
return false;
}
return true;
}
} // namespace samplesCommon
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
os << "(";
for (int i = 0; i < dims.nbDims; ++i) {
os << (i ? ", " : "") << dims.d[i];
}
return os << ")";
}
#endif // TENSORRT_COMMON_H

View File

@@ -0,0 +1,223 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "getOptions.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstring>
#include <set>
namespace nvinfer1 {
namespace utility {
//! Matching for TRTOptions is defined as follows:
//!
//! If A and B both have longName set, A matches B if and only if A.longName ==
//! B.longName and (A.shortName == B.shortName if both have short name set).
//!
//! If A only has shortName set and B only has longName set, then A does not
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
//! the definition of a TRTOption in the input to getOptions. As such, if the
//! definition only has shortName set, it will never be equal to a TRTOption
//! that does not have shortName set (and same for longName).
//!
//! If A and B both have shortName set but B does not have longName set, A
//! matches B if and only if A.shortName == B.shortName.
//!
//! If A has neither long or short name set, A matches B if and only if B has
//! neither long or short name set.
bool matches(const TRTOption& a, const TRTOption& b) {
if (!a.longName.empty() && !b.longName.empty()) {
if (a.shortName && b.shortName) {
return (a.longName == b.longName) && (a.shortName == b.shortName);
}
return a.longName == b.longName;
}
// If only one of them is not set, this will return false anyway.
return a.shortName == b.shortName;
}
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
//! TRTOptions, -1 if not found.
int getTRTOptionIndex(const std::vector<TRTOption>& options,
const TRTOption& opt) {
for (size_t i = 0; i < options.size(); ++i) {
if (matches(opt, options[i])) {
return i;
}
}
return -1;
}
//! validateTRTOption will return a string containing an error message if
//! options
//! contain non-numeric characters, or if there are duplicate option names
//! found.
//! Otherwise, returns the empty string.
std::string validateTRTOption(const std::set<char>& seenShortNames,
const std::set<std::string>& seenLongNames,
const TRTOption& opt) {
if (opt.shortName != 0) {
if (!std::isalnum(opt.shortName)) {
return "Short name '" + std::to_string(opt.shortName) +
"' is non-alphanumeric";
}
if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
return "Short name '" + std::to_string(opt.shortName) +
"' is a duplicate";
}
}
if (!opt.longName.empty()) {
for (const char& c : opt.longName) {
if (!std::isalnum(c) && c != '-' && c != '_') {
return "Long name '" + opt.longName +
"' contains characters that are not '-', '_', or alphanumeric";
}
}
if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
return "Long name '" + opt.longName + "' is a duplicate";
}
}
return "";
}
//! validateTRTOptions will return a string containing an error message if any
//! options contain non-numeric characters, or if there are duplicate option
//! names found. Otherwise, returns the empty string.
std::string validateTRTOptions(const std::vector<TRTOption>& options) {
std::set<char> seenShortNames;
std::set<std::string> seenLongNames;
for (size_t i = 0; i < options.size(); ++i) {
const std::string errMsg =
validateTRTOption(seenShortNames, seenLongNames, options[i]);
if (!errMsg.empty()) {
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
}
seenShortNames.insert(options[i].shortName);
seenLongNames.insert(options[i].longName);
}
return "";
}
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
//! fields set accordingly. Assumes that options is validated.
//! ErrMsg will be set if:
//! - an argument is null
//! - an argument is empty
//! - an argument does not have option (i.e. "-" and "--")
//! - a short argument has more than 1 character
//! - the last argument in the list requires a value
TRTParsedArgs parseArgs(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
TRTParsedArgs parsedArgs;
parsedArgs.values.resize(options.size());
for (int i = 1; i < argc; ++i) // index of current command-line argument
{
if (argv[i] == nullptr) {
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
}
const std::string argStr(argv[i]);
if (argStr.empty()) {
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
}
// No starting hyphen means it is a positional argument
if (argStr[0] != '-') {
parsedArgs.positionalArgs.push_back(argStr);
continue;
}
if (argStr == "-" || argStr == "--") {
return TRTParsedArgs{"Argument does not specify an option at index " +
std::to_string(i)};
}
// If only 1 hyphen, char after is the flag.
TRTOption opt{' ', "", false, ""};
std::string value;
if (argStr[1] != '-') {
// Must only have 1 char after the hyphen
if (argStr.size() > 2) {
return TRTParsedArgs{
"Short arg contains more than 1 character at index " +
std::to_string(i)};
}
opt.shortName = argStr[1];
} else {
opt.longName = argStr.substr(2);
// We need to support --foo=bar syntax, so look for '='
const size_t eqIndex = opt.longName.find('=');
if (eqIndex < opt.longName.size()) {
value = opt.longName.substr(eqIndex + 1);
opt.longName = opt.longName.substr(0, eqIndex);
}
}
const int idx = getTRTOptionIndex(options, opt);
if (idx < 0) {
continue;
}
if (options[idx].valueRequired) {
if (!value.empty()) {
parsedArgs.values[idx].second.push_back(value);
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
continue;
}
if (i + 1 >= argc) {
return TRTParsedArgs{"Last argument requires value, but none given"};
}
const std::string nextArg(argv[i + 1]);
if (nextArg.size() >= 1 && nextArg[0] == '-') {
sample::gLogWarning << "Warning: Using '" << nextArg
<< "' as a value for '" << argStr
<< "', Should this be its own flag?" << std::endl;
}
parsedArgs.values[idx].second.push_back(nextArg);
i += 1; // Next argument already consumed
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
} else {
parsedArgs.values[idx].first += 1;
}
}
return parsedArgs;
}
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options) {
const std::string errMsg = validateTRTOptions(options);
if (!errMsg.empty()) {
return TRTParsedArgs{errMsg};
}
return parseArgs(argc, argv, options);
}
} // namespace utility
} // namespace nvinfer1

View File

@@ -0,0 +1,128 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_GET_OPTIONS_H
#define TRT_GET_OPTIONS_H
#include <string>
#include <utility>
#include <vector>
namespace nvinfer1 {
namespace utility {
//! TRTOption defines a command line option. At least 1 of shortName and
//! longName
//! must be defined.
//! If bool initialization is undefined behavior on your system, valueRequired
//! must also be explicitly defined.
//! helpText is optional.
struct TRTOption {
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
std::string longName; //!< Option name in long (double hyphen) form (i.e.
//!--foo, --bar)
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4,
//!--foo bar)
std::string helpText; //!< Text to show when printing out the command usage
};
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
//! argument list (argv).
//!
//! errMsg is a string containing an error message if any errors occurred. If it
//! is empty, no errors occurred.
//!
//! values stores a vector of pairs for each option (ordered by order in the
//! input). Each pair contains an int (the number of occurrences) and a vector
//! of strings (a list of values). The user should know which of these to use,
//! and which options required values. For non-value options, only occurrences
//! is
//! populated. For value-required options, occurrences == # of values. Values do
//! not need to be unique.
//!
//! positionalArgs stores additional arguments that are passed in without an
//! option (these must not start with a hyphen).
struct TRTParsedArgs {
std::string errMsg;
std::vector<std::pair<int, std::vector<std::string>>> values;
std::vector<std::string> positionalArgs;
};
//! Parse the input arguments passed to main() and extract options as well as
//! positional arguments.
//!
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
//!
//! If there is a single preceding hyphen, there should be exactly 1 character
//! after the hyphen, which is interpreted as the option.
//!
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
//! is interpreted as the option.
//!
//! If the option requires a value, the next argument is used as the value.
//!
//! Positional arguments must not start with a hyphen.
//!
//! If an argument requires a value, the next argument is interpreted as the
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
//! "--bar" as a value for option "foo" if "foo" requires a value).
//! We also support --name=value syntax. In this case, 'value' would be used as
//! the value, NOT the next argument.
//!
//! For options:
//! { { 'a', "", false },
//! { 'b', "", false },
//! { 0, "cee", false },
//! { 'd', "", true },
//! { 'e', "", true },
//! { 'f', "foo", true } }
//!
//! ./main hello world -a -a --cee -d 12 -f 34
//! and
//! ./main hello world -a -a --cee -d 12 --foo 34
//!
//! will result in:
//!
//! TRTParsedArgs {
//! errMsg: "",
//! values: { { 2, {} },
//! { 0, {} },
//! { 1, {} },
//! { 1, {"12"} },
//! { 0, {} },
//! { 1, {"34"} } }
//! positionalArgs: {"hello", "world"},
//! }
//!
//! Non-POSIX behavior:
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
//! option must have its own hyphen prefix.
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
//! whitespace-separated from the option it is for.
//!
//! @param[in] argc The number of arguments passed to main (including the
//! file name, which is disregarded)
//! @param[in] argv The arguments passed to main (including the file name,
//! which is disregarded)
//! @param[in] options List of TRTOptions to parse
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
//! the fields.
TRTParsedArgs getOptions(int argc, const char* const* argv,
const std::vector<TRTOption>& options);
} // namespace utility
} // namespace nvinfer1
#endif // TRT_GET_OPTIONS_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "logger.h"
#include "ErrorRecorder.h"
#include "logging.h"
SampleErrorRecorder gRecorder;
namespace sample {
Logger gLogger{Logger::Severity::kINFO};
LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
void setReportableSeverity(Logger::Severity severity) {
gLogger.setReportableSeverity(severity);
gLogVerbose.setReportableSeverity(severity);
gLogInfo.setReportableSeverity(severity);
gLogWarning.setReportableSeverity(severity);
gLogError.setReportableSeverity(severity);
gLogFatal.setReportableSeverity(severity);
}
} // namespace sample

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include "logging.h"
class SampleErrorRecorder;
extern SampleErrorRecorder gRecorder;
namespace sample {
extern Logger gLogger;
extern LogStreamConsumer gLogVerbose;
extern LogStreamConsumer gLogInfo;
extern LogStreamConsumer gLogWarning;
extern LogStreamConsumer gLogError;
extern LogStreamConsumer gLogFatal;
void setReportableSeverity(Logger::Severity severity);
} // namespace sample
#endif // LOGGER_H

View File

@@ -0,0 +1,573 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include "sampleOptions.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ostream>
#include <sstream>
#include <string>
namespace sample {
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf {
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
: mOutput(other.mOutput), mPrefix(other.mPrefix),
mShouldLog(other.mShouldLog) {}
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
LogStreamConsumerBuffer() = delete;
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
~LogStreamConsumerBuffer() override {
// std::streambuf::pbase() gives a pointer to the beginning of the buffered
// part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the
// output sequence
// if the pointer to the beginning is not equal to the pointer to the
// current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr()) {
putOutput();
}
}
//!
//! synchronizes the stream buffer and returns 0 on success
//! synchronizing the stream buffer consists of inserting the buffer contents
//! into the stream,
//! resetting the buffer and flushing the stream
//!
int32_t sync() override {
putOutput();
return 0;
}
void putOutput() {
if (mShouldLog) {
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
mOutput << "[";
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
<< "/";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
<< "-";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into
// the stream
mOutput << mPrefix << str();
}
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog{};
}; // class LogStreamConsumerBuffer
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
//! std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase {
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
bool shouldLog)
: mBuffer(stream, prefix, shouldLog) {}
protected:
std::mutex mLogMutex;
LogStreamConsumerBuffer mBuffer;
}; // class LogStreamConsumerBase
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when
//! logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the
//! LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to
//! std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from
//! being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
public:
//!
//! \brief Creates a LogStreamConsumer which logs messages with level
//! severity.
//! Reportable severity determines if the messages are severe enough to be
//! logged.
//!
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
nvinfer1::ILogger::Severity severity)
: LogStreamConsumerBase(severityOstream(severity),
severityPrefix(severity),
severity <= reportableSeverity),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
LogStreamConsumer(LogStreamConsumer&& other) noexcept
: LogStreamConsumerBase(severityOstream(other.mSeverity),
severityPrefix(other.mSeverity),
other.mShouldLog),
std::ostream(&mBuffer) // links the stream buffer with the stream
,
mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
LogStreamConsumer(const LogStreamConsumer& other) = delete;
LogStreamConsumer() = delete;
~LogStreamConsumer() = default;
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
void setReportableSeverity(Severity reportableSeverity) {
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
std::mutex& getMutex() { return mLogMutex; }
bool getShouldLog() const { return mShouldLog; }
private:
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
bool mShouldLog;
Severity mSeverity;
}; // class LogStreamConsumer
template <typename T>
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << obj;
}
return logger;
}
//!
//! Special handling std::endl
//!
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
std::ostream& (*f)(std::ostream&)) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << f;
}
return logger;
}
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
const nvinfer1::Dims& dims) {
if (logger.getShouldLog()) {
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
}
return logger;
}
//!
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and
//! samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or
//! internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to
//! emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output
//! is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results
//! to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
//! duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits
//! directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between
//! messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to
//! access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
//! implementation a member of the Logger
//! object.
//!
class Logger : public nvinfer1::ILogger {
public:
explicit Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity) {}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult {
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger
//! associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the
//! logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will
//! eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) noexcept override {
LogStreamConsumer(mReportableSeverity, severity)
<< "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of
//! this level or higher.
//!
void setReportableSeverity(Severity severity) noexcept {
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print
//! test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom
//! that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom {
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started), mName(name), mCmdline(cmdline) {}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting
//! with
//! "TensorRT" and containing dot-separated strings
//! containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name,
const std::string& cmdline) {
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an
//! array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, int32_t argc,
char const* const* argv) {
// Append TensorRT version as info
const std::string vname =
name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
auto cmdline = genCmdlineString(argc, argv);
return defineTest(vname, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom) {
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of
//! TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int32_t reportPass(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int32_t reportFail(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int32_t reportWaive(TestAtom const& testAtom) {
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int32_t reportTest(TestAtom const& testAtom, bool pass) {
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const { return mReportableSeverity; }
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the
//! given severity
//!
static const char* severityPrefix(Severity severity) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
return "[F] ";
case Severity::kERROR:
return "[E] ";
case Severity::kWARNING:
return "[W] ";
case Severity::kINFO:
return "[I] ";
case Severity::kVERBOSE:
return "[V] ";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message
//! with the given result
//!
static const char* testResultString(TestResult result) {
switch (result) {
case TestResult::kRUNNING:
return "RUNNING";
case TestResult::kPASSED:
return "PASSED";
case TestResult::kFAILED:
return "FAILED";
case TestResult::kWAIVED:
return "WAIVED";
default:
assert(0);
return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the
//! given severity
//!
static std::ostream& severityOstream(Severity severity) {
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(TestAtom const& testAtom, TestResult result) {
severityOstream(Severity::kINFO)
<< "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int32_t argc, char const* const* argv) {
std::stringstream ss;
for (int32_t i = 0; i < argc; i++) {
if (i > 0) {
ss << " ";
}
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
}; // class Logger
namespace {
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages
//! of severity kINTERNAL_ERROR
//! ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
return LogStreamConsumer(logger.getReportableSeverity(),
Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
} // namespace sample
#endif // TENSORRT_LOGGING_H

View File

@@ -0,0 +1,126 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PARSER_ONNX_CONFIG_H
#define PARSER_ONNX_CONFIG_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
#include "NvOnnxParser.h"
#define ONNX_DEBUG 1
/**
* \class ParserOnnxConfig
* \brief Configuration Manager Class Concrete Implementation
*
* \note:
*
*/
using namespace std;
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
protected:
string mModelFilename{};
string mTextFilename{};
string mFullTextFilename{};
nvinfer1::DataType mModelDtype;
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
bool mPrintLayercInfo;
public:
ParserOnnxConfig()
: mModelDtype(nvinfer1::DataType::kFLOAT),
mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
mPrintLayercInfo(false) {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~ParserOnnxConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
mModelDtype = modelDtype;
}
virtual nvinfer1::DataType getModelDtype() const noexcept {
return mModelDtype;
}
virtual const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
virtual void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = string(onnxFilename);
}
virtual nvonnxparser::IOnnxConfig::Verbosity
getVerbosityLevel() const noexcept {
return mVerbosity;
}
virtual void addVerbosity() noexcept { ++mVerbosity; }
virtual void reduceVerbosity() noexcept { --mVerbosity; }
virtual void
setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
mVerbosity = verbosity;
}
virtual const char* getTextFileName() const noexcept {
return mTextFilename.c_str();
}
virtual void setTextFileName(const char* textFilename) noexcept {
mTextFilename = string(textFilename);
}
virtual const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = string(fullTextFilename);
}
virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
virtual void setPrintLayerInfo(bool src) noexcept {
mPrintLayercInfo = src;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
virtual bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
virtual void destroy() noexcept { delete this; }
}; // class ParserOnnxConfig
#endif

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_SAFE_COMMON_H
#define TENSORRT_SAFE_COMMON_H
#include "NvInferRuntimeCommon.h"
#include <cstdlib>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
#define CHECK(status) \
do { \
auto ret = (status); \
if (ret != 0) { \
std::cerr << "Cuda failure: " << ret << std::endl; \
abort(); \
} \
} while (0)
namespace samplesCommon {
template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
if (!obj) {
throw std::runtime_error("Failed to create object");
}
return std::shared_ptr<T>(obj);
}
inline uint32_t elementSize(nvinfer1::DataType t) {
switch (t) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kINT8:
return 1;
case nvinfer1::DataType::kBOOL:
return 1;
}
return 0;
}
template <typename A, typename B> inline A divUp(A x, B n) {
return (x + n - 1) / n;
}
} // namespace samplesCommon
#endif // TENSORRT_SAFE_COMMON_H

View File

@@ -0,0 +1,251 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SampleConfig_H
#define SampleConfig_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
class SampleConfig : public nvonnxparser::IOnnxConfig {
public:
enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
private:
std::string mModelFilename;
std::string mEngineFilename;
std::string mTextFilename;
std::string mFullTextFilename;
std::string mImageFilename;
std::string mReferenceFilename;
std::string mOutputFilename;
std::string mCalibrationFilename;
std::string mTimingCacheFilename;
int64_t mLabel{-1};
int64_t mMaxBatchSize{32};
int64_t mCalibBatchSize{0};
int64_t mMaxNCalibBatch{0};
int64_t mFirstCalibBatch{0};
int64_t mUseDLACore{-1};
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
bool mTF32{true};
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
bool mPrintLayercInfo{false};
bool mDebugBuilder{false};
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
uint64_t mTopK{0};
float mFailurePercentage{-1.0f};
float mTolerance{0.0f};
float mAbsTolerance{1e-5f};
public:
SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~SampleConfig() {
#ifdef ONNX_DEBUG
if (isDebug()) {
std::cout << "SampleConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
void setModelDtype(const nvinfer1::DataType mdt) noexcept {
mModelDtype = mdt;
}
nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
bool getTF32() const noexcept { return mTF32; }
void setTF32(bool enabled) noexcept { mTF32 = enabled; }
const char* getModelFileName() const noexcept {
return mModelFilename.c_str();
}
void setModelFileName(const char* onnxFilename) noexcept {
mModelFilename = std::string(onnxFilename);
}
Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
void addVerbosity() noexcept { ++mVerbosity; }
void reduceVerbosity() noexcept { --mVerbosity; }
virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
const char* getEngineFileName() const noexcept {
return mEngineFilename.c_str();
}
void setEngineFileName(const char* engineFilename) noexcept {
mEngineFilename = std::string(engineFilename);
}
const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
void setTextFileName(const char* textFilename) noexcept {
mTextFilename = std::string(textFilename);
}
const char* getFullTextFileName() const noexcept {
return mFullTextFilename.c_str();
}
void setFullTextFileName(const char* fullTextFilename) noexcept {
mFullTextFilename = std::string(fullTextFilename);
}
void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label
int64_t getLabel() const noexcept { return mLabel; } //!< get the Label
bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
void setPrintLayerInfo(bool b) noexcept {
mPrintLayercInfo = b;
} //!< get the boolean variable corresponding to the Layer Info, see
//! getPrintLayerInfo()
void setMaxBatchSize(int64_t maxBatchSize) noexcept {
mMaxBatchSize = maxBatchSize;
} //!< set the Max Batch Size
int64_t getMaxBatchSize() const noexcept {
return mMaxBatchSize;
} //!< get the Max Batch Size
void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
mCalibBatchSize = CalibBatchSize;
} //!< set the calibration batch size
int64_t getCalibBatchSize() const noexcept {
return mCalibBatchSize;
} //!< get calibration batch size
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
mMaxNCalibBatch = MaxNCalibBatch;
} //!< set Max Number of Calibration Batches
int64_t getMaxNCalibBatch() const noexcept {
return mMaxNCalibBatch;
} //!< get the Max Number of Calibration Batches
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
mFirstCalibBatch = FirstCalibBatch;
} //!< set the first calibration batch
int64_t getFirstCalibBatch() const noexcept {
return mFirstCalibBatch;
} //!< get the first calibration batch
void setUseDLACore(int64_t UseDLACore) noexcept {
mUseDLACore = UseDLACore;
} //!< set the DLA core to use
int64_t getUseDLACore() const noexcept {
return mUseDLACore;
} //!< get the DLA core to use
void setDebugBuilder() noexcept {
mDebugBuilder = true;
} //!< enable the Debug info, while building the engine.
bool getDebugBuilder() const noexcept {
return mDebugBuilder;
} //!< get the boolean variable, corresponding to the debug builder
const char*
getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
{
return mImageFilename.c_str();
}
void setImageFileName(
const char* imageFilename) noexcept //!< get the Image file name
{
mImageFilename = std::string(imageFilename);
}
const char* getReferenceFileName() const noexcept {
return mReferenceFilename.c_str();
}
void setReferenceFileName(
const char* referenceFilename) noexcept //!< set reference file name
{
mReferenceFilename = std::string(referenceFilename);
}
void setInputDataFormat(InputDataFormat idt) noexcept {
mInputDataFormat = idt;
} //!< specifies expected data format of the image file (PPM or ASCII)
InputDataFormat getInputDataFormat() const noexcept {
return mInputDataFormat;
} //!< returns the expected data format of the image file.
const char* getOutputFileName()
const noexcept //!< specifies the file to save the results
{
return mOutputFilename.c_str();
}
void setOutputFileName(
const char* outputFilename) noexcept //!< get the output file name
{
mOutputFilename = std::string(outputFilename);
}
const char* getCalibrationFileName() const noexcept {
return mCalibrationFilename.c_str();
} //!< specifies the file containing the list of image files for int8
//! calibration
void setCalibrationFileName(
const char* calibrationFilename) noexcept //!< get the int 8 calibration
//! list file name
{
mCalibrationFilename = std::string(calibrationFilename);
}
uint64_t getTopK() const noexcept { return mTopK; }
void setTopK(uint64_t topK) noexcept {
mTopK = topK;
} //!< If this options is specified, return the K top probabilities.
float getFailurePercentage() const noexcept { return mFailurePercentage; }
void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
float getTolerance() const noexcept { return mTolerance; }
void setTolerance(float t) noexcept { mTolerance = t; }
const char* getTimingCacheFilename() const noexcept {
return mTimingCacheFilename.c_str();
}
void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
mTimingCacheFilename = std::string(timingCacheFilename);
}
bool isDebug() const noexcept {
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
void destroy() noexcept { delete this; }
}; // class SampleConfig
#endif

View File

@@ -0,0 +1,397 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_DEVICE_H
#define TRT_SAMPLE_DEVICE_H
#include <cassert>
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <thread>
namespace sample {
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
if (ret != cudaSuccess) {
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
abort();
}
}
class TrtCudaEvent;
namespace {
void cudaSleep(void* sleep) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
}
} // namespace
//!
//! \class TrtCudaStream
//! \brief Managed CUDA stream
//!
class TrtCudaStream {
public:
TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
TrtCudaStream(const TrtCudaStream&) = delete;
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
TrtCudaStream(TrtCudaStream&&) = delete;
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
cudaStream_t get() const { return mStream; }
void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
void wait(TrtCudaEvent& event);
void sleep(float* ms) {
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
}
private:
cudaStream_t mStream{};
};
//!
//! \class TrtCudaEvent
//! \brief Managed CUDA event
//!
class TrtCudaEvent {
public:
explicit TrtCudaEvent(bool blocking = true) {
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
}
TrtCudaEvent(const TrtCudaEvent&) = delete;
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
TrtCudaEvent(TrtCudaEvent&&) = delete;
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
cudaEvent_t get() const { return mEvent; }
void record(const TrtCudaStream& stream) {
cudaCheck(cudaEventRecord(mEvent, stream.get()));
}
void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
// Returns time elapsed time in milliseconds
float operator-(const TrtCudaEvent& e) const {
float time{0};
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
return time;
}
private:
cudaEvent_t mEvent{};
};
inline void TrtCudaStream::wait(TrtCudaEvent& event) {
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
}
//!
//! \class TrtCudaGraph
//! \brief Managed CUDA graph
//!
class TrtCudaGraph {
public:
explicit TrtCudaGraph() = default;
TrtCudaGraph(const TrtCudaGraph&) = delete;
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
TrtCudaGraph(TrtCudaGraph&&) = delete;
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
~TrtCudaGraph() {
if (mGraphExec) {
cudaGraphExecDestroy(mGraphExec);
}
}
void beginCapture(TrtCudaStream& stream) {
cudaCheck(
cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
}
bool launch(TrtCudaStream& stream) {
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
}
void endCapture(TrtCudaStream& stream) {
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
cudaCheck(cudaGraphDestroy(mGraph));
}
void endCaptureOnError(TrtCudaStream& stream) {
// There are two possibilities why stream capture would fail:
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
// (2) TRT reports a failure.
// In case (1), the returning mGraph should be nullptr.
// In case (2), the returning mGraph is not nullptr, but it should not be
// used.
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
if (ret == cudaErrorStreamCaptureInvalidated) {
assert(mGraph == nullptr);
} else {
assert(ret == cudaSuccess);
assert(mGraph != nullptr);
cudaCheck(cudaGraphDestroy(mGraph));
mGraph = nullptr;
}
// Clean up any CUDA error.
cudaGetLastError();
sample::gLogWarning << "The CUDA graph capture on the stream has failed."
<< std::endl;
}
private:
cudaGraph_t mGraph{};
cudaGraphExec_t mGraphExec{};
};
//!
//! \class TrtCudaBuffer
//! \brief Managed buffer for host and device
//!
template <typename A, typename D> class TrtCudaBuffer {
public:
TrtCudaBuffer() = default;
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
TrtCudaBuffer(TrtCudaBuffer&& rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
if (this != &rhs) {
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
return *this;
}
~TrtCudaBuffer() { reset(); }
TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
void allocate(size_t size) {
reset();
A()(&mPtr, size);
}
void reset(void* ptr = nullptr) {
if (mPtr) {
D()(mPtr);
}
mPtr = ptr;
}
void* get() const { return mPtr; }
private:
void* mPtr{nullptr};
};
struct DeviceAllocator {
void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
};
struct DeviceDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
};
struct ManagedAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocManaged(ptr, size));
}
};
struct HostAllocator {
void operator()(void** ptr, size_t size) {
cudaCheck(cudaMallocHost(ptr, size));
}
};
struct HostDeallocator {
void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
};
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
//!
//! \class MirroredBuffer
//! \brief Coupled host and device buffers
//!
class IMirroredBuffer {
public:
//!
//! Allocate memory for the mirrored buffer give the size
//! of the allocation.
//!
virtual void allocate(size_t size) = 0;
//!
//! Get the pointer to the device side buffer.
//!
//! \return pointer to device memory or nullptr if uninitialized.
//!
virtual void* getDeviceBuffer() const = 0;
//!
//! Get the pointer to the host side buffer.
//!
//! \return pointer to host memory or nullptr if uninitialized.
//!
virtual void* getHostBuffer() const = 0;
//!
//! Copy the memory from host to device.
//!
virtual void hostToDevice(TrtCudaStream& stream) = 0;
//!
//! Copy the memory from device to host.
//!
virtual void deviceToHost(TrtCudaStream& stream) = 0;
//!
//! Interface to get the size of the memory
//!
//! \return the size of memory allocated.
//!
virtual size_t getSize() const = 0;
//!
//! Virtual destructor declaraion
//!
virtual ~IMirroredBuffer() = default;
}; // class IMirroredBuffer
//!
//! Class to have a seperate memory buffer for discrete device and host
//! allocations.
//!
class DiscreteMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mHostBuffer.allocate(size);
mDeviceBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
void* getHostBuffer() const { return mHostBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
cudaMemcpyHostToDevice, stream.get()));
}
void deviceToHost(TrtCudaStream& stream) {
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
cudaMemcpyDeviceToHost, stream.get()));
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtHostBuffer mHostBuffer;
TrtDeviceBuffer mDeviceBuffer;
}; // class DiscreteMirroredBuffer
//!
//! Class to have a unified memory buffer for embedded devices.
//!
class UnifiedMirroredBuffer : public IMirroredBuffer {
public:
void allocate(size_t size) {
mSize = size;
mBuffer.allocate(size);
}
void* getDeviceBuffer() const { return mBuffer.get(); }
void* getHostBuffer() const { return mBuffer.get(); }
void hostToDevice(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
void deviceToHost(TrtCudaStream& stream) {
// Does nothing since we are using unified memory.
}
size_t getSize() const { return mSize; }
private:
size_t mSize{0};
TrtManagedBuffer mBuffer;
}; // class UnifiedMirroredBuffer
inline void setCudaDevice(int device, std::ostream& os) {
cudaCheck(cudaSetDevice(device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// clang-format off
os << "=== Device Information ===" << std::endl;
os << "Selected Device: " << properties.name << std::endl;
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
os << "SMs: " << properties.multiProcessorCount << std::endl;
os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
// clang-format on
}
} // namespace sample
#endif // TRT_SAMPLE_DEVICE_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,195 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_ENGINES_H
#define TRT_SAMPLE_ENGINES_H
#include <iostream>
#include <vector>
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvInferConsistency.h"
#include "NvInferSafeRuntime.h"
#include "NvOnnxParser.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
struct Parser {
TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
TrtUniquePtr<nvonnxparser::IParser> onnxParser;
operator bool() const { return caffeParser || onnxParser; }
};
struct BuildEnvironment {
TrtUniquePtr<INetworkDefinition> network;
//! Parser that creates the network. Must be declared *after* network, so that
//! when
//! ~BuildEnvironment() executes, the parser is destroyed before the network
//! is destroyed.
Parser parser;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<uint8_t> engineBlob;
};
//!
//! \brief Generate a network definition for a given model
//!
//! \return Parser The parser used to initialize the network and that holds the
//! weights for the network, or an invalid
//! parser (the returned parser converts to false if tested)
//!
//! Constant input dimensions in the model must not be changed in the
//! corresponding
//! network definition, because its correctness may rely on the constants.
//!
//! \see Parser::operator bool()
//!
Parser modelToNetwork(const ModelOptions& model,
nvinfer1::INetworkDefinition& network, std::ostream& err);
//!
//! \brief Set up network and config
//!
//! \return boolean Return true if network and config were successfully set
//!
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
IBuilder& builder, INetworkDefinition& network,
IBuilderConfig& config, std::ostream& err,
std::vector<std::vector<char>>& sparseWeights);
//!
//! \brief Log refittable layers and weights of a refittable engine
//!
void dumpRefittable(nvinfer1::ICudaEngine& engine);
//!
//! \brief Load a serialized engine
//!
//! \return Pointer to the engine loaded or nullptr if the operation failed
//!
nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
std::ostream& err);
//!
//! \brief Save an engine into a file
//!
//! \return boolean Return true if the engine was successfully saved
//!
bool saveEngine(const nvinfer1::ICudaEngine& engine,
const std::string& fileName, std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, BuildEnvironment& env,
std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save
//! engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys,
std::ostream& err) {
BuildEnvironment env;
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
if (getEngineBuildEnv(model, build, sys, env, err)) {
engine.swap(env.engine);
}
return engine;
}
//!
//! \brief Create a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* networkToSerialized(const BuildOptions& build,
const SystemOptions& sys, IBuilder& builder,
INetworkDefinition& network,
std::ostream& err);
//!
//! \brief Tranfer model to a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
IHostMemory* modelToSerialized(const ModelOptions& model,
const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
//!
//! \brief Serialize network and save it into a file
//!
//! \return boolean Return true if the network was successfully serialized and
//! saved
//!
bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
const SystemOptions& sys, std::ostream& err);
bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
bool multiThreading);
//!
//! \brief Set tensor scales from a calibration table
//!
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
const std::vector<IOFormat>& inputFormats,
const std::vector<IOFormat>& outputFormats,
const std::string& calibrationFile);
//!
//! \brief Check if safe runtime is loaded.
//!
bool hasSafeRuntime();
//!
//! \brief Create a safe runtime object if the dynamic library is loaded.
//!
nvinfer1::safe::IRuntime*
createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
//!
//! \brief Check if consistency checker is loaded.
//!
bool hasConsistencyChecker();
//!
//! \brief Create a consistency checker object if the dynamic library is loaded.
//!
nvinfer1::consistency::IConsistencyChecker*
createConsistencyChecker(nvinfer1::ILogger& logger,
IHostMemory const* engine) noexcept;
//!
//! \brief Run consistency check on serialized engine.
//!
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
} // namespace sample
#endif // TRT_SAMPLE_ENGINES_H

View File

@@ -0,0 +1,943 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <array>
#include <chrono>
#include <cuda_profiler_api.h>
#include <functional>
#include <limits>
#include <memory>
#include <mutex>
#include <numeric>
#include <thread>
#include <utility>
#include <vector>
#if defined(__QNX__)
#include <sys/neutrino.h>
#include <sys/syspage.h>
#endif
#include "NvInfer.h"
#include "ErrorRecorder.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleEngines.h"
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
#include "sampleUtils.h"
namespace sample {
template <class MapType, class EngineType>
bool validateTensorNames(const MapType& map, const EngineType* engine,
const int32_t endBindingIndex) {
// Check if the provided input tensor names match the input tensors of the
// engine.
// Throw an error if the provided input tensor names cannot be found because
// it implies a potential typo.
for (const auto& item : map) {
bool tensorNameFound{false};
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (engine->bindingIsInput(b) &&
engine->getBindingName(b) == item.first) {
tensorNameFound = true;
break;
}
}
if (!tensorNameFound) {
sample::gLogError
<< "Cannot find input tensor with name \"" << item.first
<< "\" in the engine bindings! "
<< "Please make sure the input tensor names are correct."
<< std::endl;
return false;
}
}
return true;
}
template <class EngineType, class ContextType> class FillBindingClosure {
private:
using InputsMap = std::unordered_map<std::string, std::string>;
using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
EngineType const* engine;
ContextType const* context;
InputsMap const& inputs;
BindingsVector& bindings;
int32_t batch;
int32_t endBindingIndex;
void fillOneBinding(int32_t bindingIndex, int64_t vol) {
auto const dims = getDims(bindingIndex);
auto const name = engine->getBindingName(bindingIndex);
auto const isInput = engine->bindingIsInput(bindingIndex);
auto const dataType = engine->getBindingDataType(bindingIndex);
auto const* bindingInOutStr = isInput ? "input" : "output";
for (auto& binding : bindings) {
const auto input = inputs.find(name);
if (isInput && input != inputs.end()) {
sample::gLogInfo << "Using values loaded from " << input->second
<< " for input " << name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType,
input->second);
} else {
sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
<< name << std::endl;
binding->addBinding(bindingIndex, name, isInput, vol, dataType);
}
sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
<< name << " with dimensions " << dims << std::endl;
}
}
bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
if (!validateTensorNames(inputs, engine, endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --loadInputs flag."
<< std::endl;
return false;
}
for (int32_t b = 0; b < endBindingIndex; b++) {
auto const dims = getDims(b);
auto const comps = engine->getBindingComponentsPerElement(b);
auto const strides = context->getStrides(b);
int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
fillOneBinding(b, vol);
}
return true;
}
Dims getDims(int32_t bindingIndex);
public:
FillBindingClosure(EngineType const* _engine, ContextType const* _context,
InputsMap const& _inputs, BindingsVector& _bindings,
int32_t _batch, int32_t _endBindingIndex)
: engine(_engine), context(_context), inputs(_inputs),
bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
bool operator()() { return fillAllBindings(batch, endBindingIndex); }
};
template <>
Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
getDims(int32_t bindingIndex) {
return context->getBindingDimensions(bindingIndex);
}
template <>
Dims FillBindingClosure<
nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
return engine->getBindingDimensions(bindingIndex);
}
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference) {
int32_t device{};
cudaCheck(cudaGetDevice(&device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// Use managed memory on integrated devices when transfers are skipped
// and when it is explicitly requested on the commandline.
bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
inference.useManaged};
using FillSafeBindings =
FillBindingClosure<nvinfer1::safe::ICudaEngine,
nvinfer1::safe::IExecutionContext>;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
auto* safeEngine = iEnv.safeEngine.get();
for (int32_t s = 0; s < inference.streams; ++s) {
iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
const int32_t nBindings = safeEngine->getNbBindings();
auto const* safeContext = iEnv.safeContext.front().get();
// batch is set to 1 because safety only support explicit batch.
return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
inference.inputs, iEnv.bindings, 1, nBindings)();
}
using FillStdBindings =
FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
for (int32_t s = 0; s < inference.streams; ++s) {
auto ec = iEnv.engine->createExecutionContext();
if (ec == nullptr) {
sample::gLogError << "Unable to create execution context for stream " << s
<< "." << std::endl;
return false;
}
iEnv.context.emplace_back(ec);
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
}
if (iEnv.profiler) {
iEnv.context.front()->setProfiler(iEnv.profiler.get());
// Always run reportToProfiler() after enqueue launch
iEnv.context.front()->setEnqueueEmitsProfile(false);
}
const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
const int32_t nBindings = iEnv.engine->getNbBindings();
const int32_t bindingsInProfile =
nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
const int32_t endBindingIndex =
bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
if (nOptProfiles > 1) {
sample::gLogWarning << "Multiple profiles are currently not supported. "
"Running with one profile."
<< std::endl;
}
// Make sure that the tensor names provided in command-line args actually
// exist in any of the engine bindings
// to avoid silent typos.
if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
endBindingIndex)) {
sample::gLogError << "Invalid tensor names found in --shapes flag."
<< std::endl;
return false;
}
// Set all input dimensions before all bindings can be allocated
for (int32_t b = 0; b < endBindingIndex; ++b) {
if (iEnv.engine->bindingIsInput(b)) {
auto dims = iEnv.context.front()->getBindingDimensions(b);
const bool isScalar = dims.nbDims == 0;
const bool isDynamicInput =
std::any_of(dims.d, dims.d + dims.nbDims,
[](int32_t dim) { return dim == -1; }) ||
iEnv.engine->isShapeBinding(b);
if (isDynamicInput) {
auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
std::vector<int32_t> staticDims;
if (shape == inference.shapes.end()) {
// If no shape is provided, set dynamic dimensions to 1.
constexpr int32_t DEFAULT_DIMENSION = 1;
if (iEnv.engine->isShapeBinding(b)) {
if (isScalar) {
staticDims.push_back(1);
} else {
staticDims.resize(dims.d[0]);
std::fill(staticDims.begin(), staticDims.end(),
DEFAULT_DIMENSION);
}
} else {
staticDims.resize(dims.nbDims);
std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
[&](int32_t dimension) {
return dimension >= 0 ? dimension
: DEFAULT_DIMENSION;
});
}
sample::gLogWarning << "Dynamic dimensions required for input: "
<< iEnv.engine->getBindingName(b)
<< ", but no shapes were provided. Automatically "
"overriding shape to: "
<< staticDims << std::endl;
} else if (inference.inputs.count(shape->first) &&
iEnv.engine->isShapeBinding(b)) {
if (isScalar || dims.nbDims == 1) {
// Load shape tensor from file.
size_t const size = isScalar ? 1 : dims.d[0];
staticDims.resize(size);
auto const& filename = inference.inputs.at(shape->first);
auto dst = reinterpret_cast<char*>(staticDims.data());
loadFromFile(filename, dst,
size * sizeof(decltype(staticDims)::value_type));
} else {
sample::gLogWarning << "Cannot load shape tensor " << shape->first
<< " from file, "
<< "ND-Shape isn't supported yet" << std::endl;
// Fallback
staticDims = shape->second;
}
} else {
staticDims = shape->second;
}
for (auto& c : iEnv.context) {
if (iEnv.engine->isShapeBinding(b)) {
if (!c->setInputShapeBinding(b, staticDims.data())) {
return false;
}
} else {
if (!c->setBindingDimensions(b, toDims(staticDims))) {
return false;
}
}
}
}
}
}
auto* engine = iEnv.engine.get();
auto const* context = iEnv.context.front().get();
int32_t const batch =
engine->hasImplicitBatchDimension() ? inference.batch : 1;
return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
batch, endBindingIndex)();
}
namespace {
#if defined(__QNX__)
using TimePoint = double;
#else
using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
#endif
TimePoint getCurrentTime() {
#if defined(__QNX__)
uint64_t const currentCycles = ClockCycles();
uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
// Return current timestamp in ms.
return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
#else
return std::chrono::high_resolution_clock::now();
#endif
}
//!
//! \struct SyncStruct
//! \brief Threads synchronization structure
//!
struct SyncStruct {
std::mutex mutex;
TrtCudaStream mainStream;
TrtCudaEvent gpuStart{cudaEventBlockingSync};
TimePoint cpuStart{};
float sleep{};
};
struct Enqueue {
explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
: mContext(context), mBuffers(buffers) {}
nvinfer1::IExecutionContext& mContext;
void** mBuffers{};
};
//!
//! \class EnqueueImplicit
//! \brief Functor to enqueue inference with implict batch
//!
class EnqueueImplicit : private Enqueue {
public:
explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
int32_t batch)
: Enqueue(context, buffers), mBatch(batch) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueue()"
<< std::endl;
}
return true;
}
return false;
}
private:
int32_t mBatch;
};
//!
//! \class EnqueueExplicit
//! \brief Functor to enqueue inference with explict batch
//!
class EnqueueExplicit : private Enqueue {
public:
explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
: Enqueue(context, buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
!mContext.reportToProfiler()) {
gLogWarning
<< "Failed to collect layer timing info from previous enqueueV2()"
<< std::endl;
}
return true;
}
return false;
}
};
//!
//! \class EnqueueGraph
//! \brief Functor to enqueue inference from CUDA Graph
//!
class EnqueueGraph {
public:
explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
TrtCudaGraph& graph)
: mGraph(graph), mContext(context) {}
bool operator()(TrtCudaStream& stream) const {
if (mGraph.launch(stream)) {
// Collecting layer timing info from current profile index of execution
// context
if (mContext.getProfiler() && !mContext.reportToProfiler()) {
gLogWarning << "Failed to collect layer timing info from previous CUDA "
"graph launch"
<< std::endl;
}
return true;
}
return false;
}
TrtCudaGraph& mGraph;
nvinfer1::IExecutionContext& mContext;
};
//!
//! \class EnqueueSafe
//! \brief Functor to enqueue safe execution context
//!
class EnqueueSafe {
public:
explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
void** buffers)
: mContext(context), mBuffers(buffers) {}
bool operator()(TrtCudaStream& stream) const {
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
return true;
}
return false;
}
nvinfer1::safe::IExecutionContext& mContext;
void** mBuffers{};
};
using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
enum class StreamType : int32_t {
kINPUT = 0,
kCOMPUTE = 1,
kOUTPUT = 2,
kNUM = 3
};
enum class EventType : int32_t {
kINPUT_S = 0,
kINPUT_E = 1,
kCOMPUTE_S = 2,
kCOMPUTE_E = 3,
kOUTPUT_S = 4,
kOUTPUT_E = 5,
kNUM = 6
};
using MultiStream =
std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
static_cast<int32_t>(EventType::kNUM)>;
using EnqueueTimes = std::array<TimePoint, 2>;
//!
//! \class Iteration
//! \brief Inference iteration and streams management
//!
template <class ContextType> class Iteration {
public:
Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
Bindings& bindings)
: mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
mContext(&context) {
for (int32_t d = 0; d < mDepth; ++d) {
for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
}
}
createEnqueueFunction(inference, context, bindings);
}
bool query(bool skipTransfers) {
if (mActive[mNext]) {
return true;
}
if (!skipTransfers) {
record(EventType::kINPUT_S, StreamType::kINPUT);
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
record(EventType::kINPUT_E, StreamType::kINPUT);
wait(EventType::kINPUT_E,
StreamType::kCOMPUTE); // Wait for input DMA before compute
}
record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
recordEnqueueTime();
if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
return false;
}
recordEnqueueTime();
record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
if (!skipTransfers) {
wait(EventType::kCOMPUTE_E,
StreamType::kOUTPUT); // Wait for compute before output DMA
record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
}
mActive[mNext] = true;
moveNext();
return true;
}
float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
if (mActive[mNext]) {
if (skipTransfers) {
getEvent(EventType::kCOMPUTE_E).synchronize();
} else {
getEvent(EventType::kOUTPUT_E).synchronize();
}
trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
mActive[mNext] = false;
return getEvent(EventType::kCOMPUTE_S) - gpuStart;
}
return 0;
}
void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
std::vector<InferenceTrace>& trace, bool skipTransfers) {
for (int32_t d = 0; d < mDepth; ++d) {
sync(cpuStart, gpuStart, trace, skipTransfers);
moveNext();
}
}
void wait(TrtCudaEvent& gpuStart) {
getStream(StreamType::kINPUT).wait(gpuStart);
}
void setInputData() {
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
}
void fetchOutputData() {
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
}
private:
void moveNext() { mNext = mDepth - 1 - mNext; }
TrtCudaStream& getStream(StreamType t) {
return mStream[static_cast<int32_t>(t)];
}
TrtCudaEvent& getEvent(EventType t) {
return *mEvents[mNext][static_cast<int32_t>(t)];
}
void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
void recordEnqueueTime() {
mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
enqueueStart = 1 - enqueueStart;
}
TimePoint getEnqueueTime(bool start) {
return mEnqueueTimes[mNext][start ? 0 : 1];
}
void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
InferenceTrace getTrace(const TimePoint& cpuStart,
const TrtCudaEvent& gpuStart, bool skipTransfers) {
float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_S) - gpuStart;
float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
: getEvent(EventType::kINPUT_E) - gpuStart;
float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_S) - gpuStart;
float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
: getEvent(EventType::kOUTPUT_E) - gpuStart;
return InferenceTrace(mStreamId,
std::chrono::duration<float, std::milli>(
getEnqueueTime(true) - cpuStart)
.count(),
std::chrono::duration<float, std::milli>(
getEnqueueTime(false) - cpuStart)
.count(),
is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
}
void createEnqueueFunction(const InferenceOptions& inference,
nvinfer1::IExecutionContext& context,
Bindings& bindings) {
if (inference.batch) {
mEnqueue = EnqueueFunction(EnqueueImplicit(
context, mBindings.getDeviceBuffers(), inference.batch));
} else {
mEnqueue = EnqueueFunction(
EnqueueExplicit(context, mBindings.getDeviceBuffers()));
}
if (inference.graph) {
TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
// Avoid capturing initialization calls by executing the enqueue function
// at least
// once before starting CUDA graph capture.
const auto ret = mEnqueue(stream);
assert(ret);
stream.synchronize();
mGraph.beginCapture(stream);
// The built TRT engine may contain operations that are not permitted
// under CUDA graph capture mode.
// When the stream is capturing, the enqueue call may return false if the
// current CUDA graph capture fails.
if (mEnqueue(stream)) {
mGraph.endCapture(stream);
mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
} else {
mGraph.endCaptureOnError(stream);
// Ensure any CUDA error has been cleaned up.
cudaCheck(cudaGetLastError());
sample::gLogWarning << "The built TensorRT engine contains operations "
"that are not permitted under "
"CUDA graph capture mode."
<< std::endl;
sample::gLogWarning << "The specified --useCudaGraph flag has been "
"ignored. The inference will be "
"launched without using CUDA graph launch."
<< std::endl;
}
}
}
void createEnqueueFunction(const InferenceOptions&,
nvinfer1::safe::IExecutionContext& context,
Bindings&) {
mEnqueue =
EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
}
Bindings& mBindings;
TrtCudaGraph mGraph;
EnqueueFunction mEnqueue;
int32_t mStreamId{0};
int32_t mNext{0};
int32_t mDepth{2}; // default to double buffer to hide DMA transfers
std::vector<bool> mActive;
MultiStream mStream;
std::vector<MultiEvent> mEvents;
int32_t enqueueStart{0};
std::vector<EnqueueTimes> mEnqueueTimes;
ContextType* mContext{nullptr};
};
template <class ContextType>
bool inferenceLoop(
std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
bool skipTransfers, float idleMs) {
float durationMs = 0;
int32_t skip = 0;
for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
++i) {
for (auto& s : iStreams) {
if (!s->query(skipTransfers)) {
return false;
}
}
for (auto& s : iStreams) {
durationMs = std::max(durationMs,
s->sync(cpuStart, gpuStart, trace, skipTransfers));
}
if (durationMs < warmupMs) // Warming up
{
if (durationMs) // Skip complete iterations
{
++skip;
}
continue;
}
if (idleMs != 0.F) {
std::this_thread::sleep_for(
std::chrono::duration<float, std::milli>(idleMs));
}
}
for (auto& s : iStreams) {
s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
}
return true;
}
template <class ContextType>
void inferenceExecution(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
const int32_t threadIdx, const int32_t streamsPerThread,
int32_t device, std::vector<InferenceTrace>& trace) {
float warmupMs = inference.warmup;
float durationMs = inference.duration * 1000.F + warmupMs;
cudaCheck(cudaSetDevice(device));
std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
for (int32_t s = 0; s < streamsPerThread; ++s) {
const int32_t streamId{threadIdx * streamsPerThread + s};
auto* iteration = new Iteration<ContextType>(
streamId, inference, *iEnv.template getContext<ContextType>(streamId),
*iEnv.bindings[streamId]);
if (inference.skipTransfers) {
iteration->setInputData();
}
iStreams.emplace_back(iteration);
}
for (auto& s : iStreams) {
s->wait(sync.gpuStart);
}
std::vector<InferenceTrace> localTrace;
if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
inference.iterations, durationMs, warmupMs, localTrace,
inference.skipTransfers, inference.idle)) {
iEnv.error = true;
}
if (inference.skipTransfers) {
for (auto& s : iStreams) {
s->fetchOutputData();
}
}
sync.mutex.lock();
trace.insert(trace.end(), localTrace.begin(), localTrace.end());
sync.mutex.unlock();
}
inline std::thread makeThread(const InferenceOptions& inference,
InferenceEnvironment& iEnv, SyncStruct& sync,
int32_t threadIdx, int32_t streamsPerThread,
int32_t device,
std::vector<InferenceTrace>& trace) {
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime());
return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
std::cref(inference), std::ref(iEnv), std::ref(sync),
threadIdx, streamsPerThread, device, std::ref(trace));
}
} // namespace
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace) {
cudaCheck(cudaProfilerStart());
trace.resize(0);
SyncStruct sync;
sync.sleep = inference.sleep;
sync.mainStream.sleep(&sync.sleep);
sync.cpuStart = getCurrentTime();
sync.gpuStart.record(sync.mainStream);
// When multiple streams are used, trtexec can run inference in two modes:
// (1) if inference.threads is true, then run each stream on each thread.
// (2) if inference.threads is false, then run all streams on the same thread.
const int32_t numThreads = inference.threads ? inference.streams : 1;
const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
std::vector<std::thread> threads;
for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
streamsPerThread, device, trace));
}
for (auto& th : threads) {
th.join();
}
cudaCheck(cudaProfilerStop());
auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
return a.h2dStart < b.h2dStart;
};
std::sort(trace.begin(), trace.end(), cmpTrace);
return !iEnv.error;
}
namespace {
size_t reportGpuMemory() {
static size_t prevFree{0};
size_t free{0};
size_t total{0};
size_t newlyAllocated{0};
cudaCheck(cudaMemGetInfo(&free, &total));
sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
if (prevFree != 0) {
newlyAllocated = (prevFree - free);
sample::gLogInfo << ", newly allocated GPU memory = "
<< newlyAllocated / 1024.0_MiB << " GiB";
}
sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
<< std::endl;
prevFree = free;
return newlyAllocated;
}
} // namespace
//! Returns true if deserialization is slower than expected or fails.
bool timeDeserialize(InferenceEnvironment& iEnv) {
constexpr int32_t kNB_ITERS{20};
std::unique_ptr<IRuntime> rt{
createInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<ICudaEngine> engine;
std::unique_ptr<safe::IRuntime> safeRT{
sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
std::unique_ptr<safe::ICudaEngine> safeEngine;
if (iEnv.safe) {
ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
safeRT->setErrorRecorder(&gRecorder);
}
auto timeDeserializeFn = [&]() -> float {
bool deserializeOK{false};
engine.reset(nullptr);
safeEngine.reset(nullptr);
auto startClock = std::chrono::high_resolution_clock::now();
if (iEnv.safe) {
safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size()));
deserializeOK = (safeEngine != nullptr);
} else {
engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
iEnv.engineBlob.size(), nullptr));
deserializeOK = (engine != nullptr);
}
auto endClock = std::chrono::high_resolution_clock::now();
// return NAN if deserialization failed.
return deserializeOK
? std::chrono::duration<float, std::milli>(endClock - startClock)
.count()
: NAN;
};
// Warmup the caches to make sure that cache thrashing isn't throwing off the
// results
{
sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
for (int32_t i = 0, e = 2; i < e; ++i) {
timeDeserializeFn();
}
}
sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
float const first = timeDeserializeFn();
// Check if first deserialization suceeded.
if (std::isnan(first)) {
sample::gLogError << "Engine deserialization failed." << std::endl;
return true;
}
sample::gLogInfo << "First deserialization time = " << first
<< " milliseconds" << std::endl;
// Record initial gpu memory state.
reportGpuMemory();
float totalTime{0.F};
for (int32_t i = 0; i < kNB_ITERS; ++i) {
totalTime += timeDeserializeFn();
}
const auto averageTime = totalTime / kNB_ITERS;
// reportGpuMemory sometimes reports zero after a single deserialization of a
// small engine,
// so use the size of memory for all the iterations.
const auto totalEngineSizeGpu = reportGpuMemory();
sample::gLogInfo << "Total deserialization time = " << totalTime
<< " milliseconds in " << kNB_ITERS
<< " iterations, average time = " << averageTime
<< " milliseconds, first time = " << first
<< " milliseconds." << std::endl;
sample::gLogInfo << "Deserialization Bandwidth = "
<< 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
<< std::endl;
// If the first deserialization is more than tolerance slower than
// the average deserialization, return true, which means an error occurred.
// The tolerance is set to 2x since the deserialization time is quick and
// susceptible
// to caching issues causing problems in the first timing.
const auto tolerance = 2.0F;
const bool isSlowerThanExpected = first > averageTime * tolerance;
if (isSlowerThanExpected) {
sample::gLogInfo << "First deserialization time divided by average time is "
<< (first / averageTime) << ". Exceeds tolerance of "
<< tolerance << "x." << std::endl;
}
return isSlowerThanExpected;
}
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format) {
auto runtime = std::unique_ptr<IRuntime>(
createInferRuntime(sample::gLogger.getTRTLogger()));
auto inspector =
std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
if (!iEnv.context.empty()) {
inspector->setExecutionContext(iEnv.context.front().get());
}
std::string result = inspector->getEngineInformation(format);
return result;
}
} // namespace sample

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_INFERENCE_H
#define TRT_SAMPLE_INFERENCE_H
#include "sampleReporting.h"
#include "sampleUtils.h"
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "NvInferSafeRuntime.h"
namespace sample {
struct InferenceEnvironment {
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
std::unique_ptr<Profiler> profiler;
std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
std::vector<std::unique_ptr<Bindings>> bindings;
bool error{false};
std::vector<uint8_t> engineBlob;
bool safe{false};
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
template <class ContextType>
inline ContextType* getContext(int32_t streamIdx);
};
template <>
inline nvinfer1::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return context[streamIdx].get();
}
template <>
inline nvinfer1::safe::IExecutionContext*
InferenceEnvironment::getContext(int32_t streamIdx) {
return safeContext[streamIdx].get();
}
//!
//! \brief Set up contexts and bindings for inference
//!
bool setUpInference(InferenceEnvironment& iEnv,
const InferenceOptions& inference);
//!
//! \brief Deserialize the engine and time how long it takes.
//!
bool timeDeserialize(InferenceEnvironment& iEnv);
//!
//! \brief Run inference and collect timing, return false if any error hit
//! during inference
//!
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
int32_t device, std::vector<InferenceTrace>& trace);
//!
//! \brief Get layer information of the engine.
//!
std::string getLayerInformation(const InferenceEnvironment& iEnv,
nvinfer1::LayerInformationFormat format);
} // namespace sample
#endif // TRT_SAMPLE_INFERENCE_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_OPTIONS_H
#define TRT_SAMPLE_OPTIONS_H
#include <algorithm>
#include <array>
#include <iostream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "NvInfer.h"
namespace sample {
// Build default params
constexpr int32_t maxBatchNotProvided{0};
constexpr int32_t defaultMinTiming{1};
constexpr int32_t defaultAvgTiming{8};
// System default params
constexpr int32_t defaultDevice{0};
// Inference default params
constexpr int32_t defaultBatch{1};
constexpr int32_t batchNotProvided{0};
constexpr int32_t defaultStreams{1};
constexpr int32_t defaultIterations{10};
constexpr float defaultWarmUp{200.F};
constexpr float defaultDuration{3.F};
constexpr float defaultSleep{};
constexpr float defaultIdle{};
// Reporting default params
constexpr int32_t defaultAvgRuns{10};
constexpr float defaultPercentile{99};
enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
using Arguments = std::unordered_multimap<std::string, std::string>;
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
using ShapeRange =
std::array<std::vector<int32_t>,
nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
using LayerOutputTypes =
std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
struct Options {
virtual void parse(Arguments& arguments) = 0;
};
struct BaseModelOptions : public Options {
ModelFormat format{ModelFormat::kANY};
std::string model;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct UffInput : public Options {
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
bool NHWC{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ModelOptions : public Options {
BaseModelOptions baseModel;
std::string prototxt;
std::vector<std::string> outputs;
UffInput uffInputs;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct BuildOptions : public Options {
int32_t maxBatch{maxBatchNotProvided};
double workspace{-1.0};
double dlaSRAM{-1.0};
double dlaLocalDRAM{-1.0};
double dlaGlobalDRAM{-1.0};
int32_t minTiming{defaultMinTiming};
int32_t avgTiming{defaultAvgTiming};
bool tf32{true};
bool fp16{false};
bool int8{false};
bool directIO{false};
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
LayerPrecisions layerPrecisions;
LayerOutputTypes layerOutputTypes;
bool safe{false};
bool consistency{false};
bool restricted{false};
bool save{false};
bool load{false};
bool refittable{false};
SparsityFlag sparsity{SparsityFlag::kDISABLE};
nvinfer1::ProfilingVerbosity profilingVerbosity{
nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
std::string engine;
std::string calibration;
std::unordered_map<std::string, ShapeRange> shapes;
std::unordered_map<std::string, ShapeRange> shapesCalib;
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
nvinfer1::TacticSources enabledTactics{0};
nvinfer1::TacticSources disabledTactics{0};
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
std::string timingCacheFile{};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SystemOptions : public Options {
int32_t device{defaultDevice};
int32_t DLACore{-1};
bool fallback{false};
std::vector<std::string> plugins;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct InferenceOptions : public Options {
int32_t batch{batchNotProvided};
int32_t iterations{defaultIterations};
int32_t streams{defaultStreams};
float warmup{defaultWarmUp};
float duration{defaultDuration};
float sleep{defaultSleep};
float idle{defaultIdle};
bool overlap{true};
bool skipTransfers{false};
bool useManaged{false};
bool spin{false};
bool threads{false};
bool graph{false};
bool skip{false};
bool rerun{false};
bool timeDeserialize{false};
bool timeRefit{false};
std::unordered_map<std::string, std::string> inputs;
std::unordered_map<std::string, std::vector<int32_t>> shapes;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct ReportingOptions : public Options {
bool verbose{false};
int32_t avgs{defaultAvgRuns};
float percentile{defaultPercentile};
bool refit{false};
bool output{false};
bool profile{false};
bool layerInfo{false};
std::string exportTimes;
std::string exportOutput;
std::string exportProfile;
std::string exportLayerInfo;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
struct SafeBuilderOptions : public Options {
std::string serialized{};
std::string onnxModelFile{};
bool help{false};
bool verbose{false};
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
bool int8{false};
std::string calibFile{};
std::vector<std::string> plugins;
bool consistency{false};
bool standard{false};
void parse(Arguments& arguments) override;
static void printHelp(std::ostream& out);
};
struct AllOptions : public Options {
ModelOptions model;
BuildOptions build;
SystemOptions system;
InferenceOptions inference;
ReportingOptions reporting;
bool helps{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
bool parseHelp(Arguments& arguments);
void helpHelp(std::ostream& out);
// Functions to print options
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
std::ostream& operator<<(std::ostream& os, const UffInput& input);
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
for (int32_t i = 0; i < dims.nbDims; ++i) {
os << (i ? "x" : "") << dims.d[i];
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const nvinfer1::WeightsRole role) {
switch (role) {
case nvinfer1::WeightsRole::kKERNEL: {
os << "Kernel";
break;
}
case nvinfer1::WeightsRole::kBIAS: {
os << "Bias";
break;
}
case nvinfer1::WeightsRole::kSHIFT: {
os << "Shift";
break;
}
case nvinfer1::WeightsRole::kSCALE: {
os << "Scale";
break;
}
case nvinfer1::WeightsRole::kCONSTANT: {
os << "Constant";
break;
}
case nvinfer1::WeightsRole::kANY: {
os << "Any";
break;
}
}
return os;
}
inline std::ostream& operator<<(std::ostream& os,
const std::vector<int32_t>& vec) {
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
os << (i ? "x" : "") << vec[i];
}
return os;
}
} // namespace sample
#endif // TRT_SAMPLES_OPTIONS_H

View File

@@ -0,0 +1,480 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <exception>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <utility>
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
using namespace nvinfer1;
namespace sample {
namespace {
//!
//! \brief Find percentile in an ascending sequence of timings
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
//!
template <typename T>
float findPercentile(float percentile,
std::vector<InferenceTime> const& timings,
T const& toFloat) {
int32_t const all = static_cast<int32_t>(timings.size());
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
if (percentile < 0.0f || percentile > 100.0f) {
throw std::runtime_error("percentile is not in [0, 100]!");
}
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
}
//!
//! \brief Find median in a sorted sequence of timings
//!
template <typename T>
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
if (timings.empty()) {
return std::numeric_limits<float>::infinity();
}
int32_t const m = timings.size() / 2;
if (timings.size() % 2) {
return toFloat(timings[m]);
}
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
}
//!
//! \brief Find coefficient of variance (which is std / mean) in a sorted
//! sequence of timings given the mean
//!
template <typename T>
float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
T const& toFloat, float mean) {
if (timings.empty()) {
return 0;
}
if (mean == 0.F) {
return std::numeric_limits<float>::infinity();
}
auto const metricAccumulator = [toFloat, mean](float acc,
InferenceTime const& a) {
float const diff = toFloat(a) - mean;
return acc + diff * diff;
};
float const variance =
std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
timings.size();
return std::sqrt(variance) / mean * 100.F;
}
inline InferenceTime traceToTiming(const InferenceTrace& a) {
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
(a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
(a.d2hEnd - a.h2dStart));
}
} // namespace
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float benchTimeMs, std::ostream& os) {
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
<< std::endl;
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
<< " s" << std::endl;
}
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os) {
int32_t count = 0;
InferenceTime sum;
os << std::endl;
os << "=== Trace details ===" << std::endl;
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
for (auto const& t : timings) {
sum += t;
if (++count == runsPerAvg) {
// clang-format off
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
<< " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
// clang-format on
count = 0;
sum.enq = 0;
sum.h2d = 0;
sum.compute = 0;
sum.d2h = 0;
sum.e2e = 0;
}
}
}
void printMetricExplanations(std::ostream& os) {
os << std::endl;
os << "=== Explanations of the performance metrics ===" << std::endl;
os << "Total Host Walltime: the host walltime from when the first query "
"(after warmups) is enqueued to when the "
"last query is completed."
<< std::endl;
os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
<< std::endl;
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
"the queries. If this is significantly "
"shorter than Total Host Walltime, the GPU may be under-utilized "
"because of host-side overheads or data "
"transfers."
<< std::endl;
os << "Throughput: the observed throughput computed by dividing the number "
"of queries by the Total Host Walltime. "
"If this is significantly lower than the reciprocal of GPU Compute "
"Time, the GPU may be under-utilized "
"because of host-side overheads or data transfers."
<< std::endl;
os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
"than GPU Compute Time, the GPU may be "
"under-utilized."
<< std::endl;
os << "H2D Latency: the latency for host-to-device data transfers for input "
"tensors of a single query."
<< std::endl;
os << "D2H Latency: the latency for device-to-host data transfers for output "
"tensors of a single query."
<< std::endl;
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
"Latency. This is the latency to infer a "
"single query."
<< std::endl;
os << "End-to-End Host Latency: the duration from when the H2D of a query is "
"called to when the D2H of the same "
"query is completed, which includes the latency to wait for the "
"completion of the previous query. This is "
"the latency of a query if multiple queries are enqueued consecutively."
<< std::endl;
}
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile) {
auto const metricComparator = [metricGetter](InferenceTime const& a,
InferenceTime const& b) {
return metricGetter(a) < metricGetter(b);
};
auto const metricAccumulator = [metricGetter](float acc,
InferenceTime const& a) {
return acc + metricGetter(a);
};
std::vector<InferenceTime> newTimings = timings;
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
PerformanceResult result;
result.min = metricGetter(newTimings.front());
result.max = metricGetter(newTimings.back());
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
metricAccumulator) /
newTimings.size();
result.median = findMedian(newTimings, metricGetter);
result.percentile = findPercentile(percentile, newTimings, metricGetter);
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
return result;
}
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
float percentile, int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
auto const latencyResult =
getPerformanceResult(timings, getLatency, percentile);
auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
auto const e2eLatencyResult =
getPerformanceResult(timings, getEndToEnd, percentile);
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
auto const enqueueResult =
getPerformanceResult(timings, getEnqueue, percentile);
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
auto const gpuComputeResult =
getPerformanceResult(timings, getCompute, percentile);
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
auto const toPerfString = [percentile](const PerformanceResult& r) {
std::stringstream s;
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
<< " ms, "
<< "median = " << r.median << " ms, percentile(" << percentile
<< "%) = " << r.percentile << " ms";
return s.str();
};
osInfo << std::endl;
osInfo << "=== Performance summary ===" << std::endl;
osInfo << "Throughput: " << throughput << " qps" << std::endl;
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
<< std::endl;
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
osInfo << "Total GPU Compute Time: "
<< gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
// Report warnings if the throughput is bound by other factors than GPU
// Compute Time.
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
if (enqueueResult.median >
kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
"Compute and the GPU may be under-utilized."
<< std::endl;
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs "
"where possible) may increase the "
"throughput."
<< std::endl;
}
if (h2dResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by host-to-device transfers for "
"the inputs rather than GPU Compute and "
"the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
if (d2hResult.median >= gpuComputeResult.median) {
osWarning << "* Throughput may be bound by device-to-host transfers for "
"the outputs rather than GPU Compute "
"and the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers."
<< std::endl;
}
// Report warnings if the GPU Compute Time is unstable.
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
osWarning
<< "* GPU compute time is unstable, with coefficient of variance = "
<< gpuComputeResult.coeffVar << "%." << std::endl;
osWarning << " If not already in use, locking GPU clock frequency or "
"adding --useSpinWait may improve the "
<< "stability." << std::endl;
}
// Explain what the metrics mean.
osInfo << "Explanations of the performance metrics are printed in the "
"verbose logs."
<< std::endl;
printMetricExplanations(osVerbose);
osInfo << std::endl;
}
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
const ReportingOptions& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose) {
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
return a.computeStart >= warmupMs;
};
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
int32_t const warmups = noWarmup - trace.begin();
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
// when implicit batch used, batchSize = options.inference.batch, which is
// parsed through --batch
// when explicit batch used, batchSize = options.inference.batch = 0
// treat inference with explicit batch as a single query and report the
// throughput
batchSize = batchSize ? batchSize : 1;
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
warmupMs, benchTime, osInfo);
std::vector<InferenceTime> timings(trace.size() - warmups);
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
printTiming(timings, reporting.avgs, osInfo);
printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
osWarning, osVerbose);
if (!reporting.exportTimes.empty()) {
exportJSONTrace(trace, reporting.exportTimes);
}
}
//! Printed format:
//! [ value, ...]
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
//! h2d" : time, "start compute" : time,
//! "end compute" : time, "start d2h" : time, "end d2h" : time,
//! "h2d" : time, "compute" : time,
//! "d2h" : time, "latency" : time, "end to end" : time }
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName) {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl;
char const* sep = " ";
for (auto const& t : trace) {
InferenceTime const it(traceToTiming(t));
os << sep << "{ ";
sep = ", ";
// clang-format off
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep
<< "\"endToEndMs\" : " << it.e2e << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
if (mIterator == mLayers.end()) {
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
mUpdatesCount += mLayers.empty() || first;
if (first) {
mIterator = mLayers.begin();
} else {
mLayers.emplace_back();
mLayers.back().name = layerName;
mIterator = mLayers.end() - 1;
}
}
mIterator->timeMs += timeMs;
++mIterator;
}
void Profiler::print(std::ostream& os) const noexcept {
std::string const nameHdr("Layer");
std::string const timeHdr(" Time (ms)");
std::string const avgHdr(" Avg. Time (ms)");
std::string const percentageHdr(" Time %");
float const totalTimeMs = getTotalTime();
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
return a.name.size() < b.name.size();
};
auto const longestName =
std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
auto const nameLength =
std::max(longestName->name.size() + 1, nameHdr.size());
auto const timeLength = timeHdr.size();
auto const avgLength = avgHdr.size();
auto const percentageLength = percentageHdr.size();
os << std::endl
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
<< std::endl;
for (auto const& p : mLayers) {
// clang-format off
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
<< std::endl;
}
{
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
// clang-format on
}
os << std::endl;
}
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl
<< " { \"count\" : " << mUpdatesCount << " }" << std::endl;
auto const totalTimeMs = getTotalTime();
for (auto const& l : mLayers) {
// clang-format off
os << ", {" << " \"name\" : \"" << l.name << "\""
", \"timeMs\" : " << l.timeMs
<< ", \"averageMs\" : " << l.timeMs / mUpdatesCount
<< ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
<< " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Input Tensors:" << std::endl;
bindings.dumpInputs(context, os);
}
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os) {
os << "Output Tensors:" << std::endl;
bindings.dumpOutputs(context, os);
}
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch) {
std::ofstream os(fileName, std::ofstream::trunc);
std::string sep = " ";
auto const output = bindings.getOutputBindings();
os << "[" << std::endl;
for (auto const& binding : output) {
// clang-format off
os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
sep = ", ";
os << " " << sep << "\"dimensions\" : \"";
bindings.dumpBindingDimensions(binding.second, context, os);
os << "\"" << std::endl;
os << " " << sep << "\"values\" : [ ";
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
os << " ]" << std::endl << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
} // namespace sample

View File

@@ -0,0 +1,211 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H
#include <functional>
#include <iostream>
#include "NvInfer.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample {
//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime {
InferenceTime(float q, float i, float c, float o, float e)
: enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
InferenceTime() = default;
InferenceTime(InferenceTime const&) = default;
InferenceTime(InferenceTime&&) = default;
InferenceTime& operator=(InferenceTime const&) = default;
InferenceTime& operator=(InferenceTime&&) = default;
~InferenceTime() = default;
float enq{0}; // Enqueue
float h2d{0}; // Host to Device
float compute{0}; // Compute
float d2h{0}; // Device to Host
float e2e{0}; // end to end
// ideal latency
float latency() const { return h2d + compute + d2h; }
};
//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace {
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
float ce, float os, float oe)
: stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
InferenceTrace() = default;
InferenceTrace(InferenceTrace const&) = default;
InferenceTrace(InferenceTrace&&) = default;
InferenceTrace& operator=(InferenceTrace const&) = default;
InferenceTrace& operator=(InferenceTrace&&) = default;
~InferenceTrace() = default;
int32_t stream{0};
float enqStart{0};
float enqEnd{0};
float h2dStart{0};
float h2dEnd{0};
float computeStart{0};
float computeEnd{0};
float d2hStart{0};
float d2hEnd{0};
};
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
a.d2h + b.d2h, a.e2e + b.e2e);
}
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
return a = a + b;
}
//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult {
float min{0};
float max{0};
float mean{0};
float median{0};
float percentile{0};
float coeffVar{0}; // coefficient of variation
};
//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
float walltime, std::ostream& os);
//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
std::ostream& os);
//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult
getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter,
float percentile);
//!
//! \brief Print the explanations of the performance metrics printed in
//! printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);
//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
ReportingOptions const& reporting, float warmupMs,
int32_t batchSize, std::ostream& osInfo,
std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
std::string const& fileName);
//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Print output tensors to stream
//!
void dumpOutputs(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::ostream& os);
//!
//! \brief Export output tensors to JSON file
//!
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
Bindings const& bindings, std::string const& fileName,
int32_t batch);
//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile {
std::string name;
float timeMs{0};
};
//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in
//! the same order
//!
class Profiler : public nvinfer1::IProfiler {
public:
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
void print(std::ostream& os) const noexcept;
//!
//! \brief Export a profile to JSON file
//!
void exportJSONProfile(std::string const& fileName) const noexcept;
private:
float getTotalTime() const noexcept {
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
return accumulator + lp.timeMs;
};
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
}
std::vector<LayerProfile> mLayers;
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
int32_t mUpdatesCount{0};
};
} // namespace sample
#endif // TRT_SAMPLE_REPORTING_H

View File

@@ -0,0 +1,494 @@
/*
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <unordered_map>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include "NvInfer.h"
#include "common.h"
#include "logger.h"
#include "sampleDevice.h"
#include "sampleOptions.h"
namespace sample {
inline int dataTypeSize(nvinfer1::DataType dataType) {
switch (dataType) {
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT:
return 4;
case nvinfer1::DataType::kHALF:
return 2;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
return 1;
}
return 0;
}
template <typename T> inline T roundUp(T m, T n) {
return ((m + n - 1) / n) * n;
}
inline int volume(const nvinfer1::Dims& d) {
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
int vecDim, int comps, int batch) {
int maxNbElems = 1;
for (int i = 0; i < dims.nbDims; ++i) {
// Get effective length of axis.
int d = dims.d[i];
// Any dimension is 0, it is an empty tensor.
if (d == 0) {
return 0;
}
if (i == vecDim) {
d = samplesCommon::divUp(d, comps);
}
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
}
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}
inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
if (vecDim != -1) {
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
}
return volume(dims) * std::max(batch, 1);
}
inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int>(vec.size()) > limit) {
sample::gLogWarning
<< "Vector too long, only first 8 elements are used in dimension."
<< std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
template <typename T>
inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
if (std::is_integral<T>::value) {
std::uniform_int_distribution<int> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
} else {
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<T>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
}
// Specialization needed for custom type __half
template <typename H>
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
H* typedBuffer = static_cast<H*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() {
return static_cast<H>(distribution(engine));
};
std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <>
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
__half max) {
fillBufferHalf(buffer, volume, min, max);
}
template <typename T>
inline void dumpBuffer(const void* buffer, const std::string& separator,
std::ostream& os, const Dims& dims, const Dims& strides,
int32_t vectorDim, int32_t spv) {
const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
std::multiplies<int64_t>());
const T* typedBuffer = static_cast<const T*>(buffer);
std::string sep;
for (int64_t v = 0; v < volume; ++v) {
int64_t curV = v;
int32_t dataOffset = 0;
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
int32_t dimVal = curV % dims.d[dimIndex];
if (dimIndex == vectorDim) {
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
} else {
dataOffset +=
dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
}
curV /= dims.d[dimIndex];
ASSERT(curV >= 0);
}
os << sep << typedBuffer[dataOffset];
sep = separator;
}
}
inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
ASSERT(dst);
std::ifstream file(fileName, std::ios::in | std::ios::binary);
if (file.is_open()) {
file.read(dst, size);
file.close();
} else {
std::stringstream msg;
msg << "Cannot open file " << fileName << "!";
throw std::invalid_argument(msg.str());
}
}
struct Binding {
bool isInput{false};
std::unique_ptr<IMirroredBuffer> buffer;
int64_t volume{0};
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
void fill(const std::string& fileName) {
loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
buffer->getSize());
}
void fill() {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
break;
}
case nvinfer1::DataType::kINT32: {
fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kINT8: {
fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
break;
}
case nvinfer1::DataType::kFLOAT: {
fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
case nvinfer1::DataType::kHALF: {
fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
break;
}
}
}
void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
int32_t spv, const std::string separator = " ") const {
switch (dataType) {
case nvinfer1::DataType::kBOOL: {
dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT32: {
dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kINT8: {
dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kFLOAT: {
dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
case nvinfer1::DataType::kHALF: {
dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
vectorDim, spv);
break;
}
}
}
};
class Bindings {
public:
Bindings() = delete;
explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
nvinfer1::DataType dataType,
const std::string& fileName = "") {
while (mBindings.size() <= static_cast<size_t>(b)) {
mBindings.emplace_back();
mDevicePointers.emplace_back();
}
mNames[name] = b;
if (mBindings[b].buffer == nullptr) {
if (mUseManaged) {
mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
} else {
mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
}
}
mBindings[b].isInput = isInput;
// Some memory allocators return nullptr when allocating zero bytes, but
// TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
if (volume == 0) {
mBindings[b].buffer->allocate(1);
} else {
mBindings[b].buffer->allocate(
static_cast<size_t>(volume) *
static_cast<size_t>(dataTypeSize(dataType)));
}
mBindings[b].volume = volume;
mBindings[b].dataType = dataType;
mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
if (isInput) {
if (fileName.empty()) {
fill(b);
} else {
fill(b, fileName);
}
}
}
void** getDeviceBuffers() { return mDevicePointers.data(); }
void transferInputToDevice(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (mBindings[b.second].isInput) {
mBindings[b.second].buffer->hostToDevice(stream);
}
}
}
void transferOutputToHost(TrtCudaStream& stream) {
for (auto& b : mNames) {
if (!mBindings[b.second].isInput) {
mBindings[b.second].buffer->deviceToHost(stream);
}
}
}
void fill(int binding, const std::string& fileName) {
mBindings[binding].fill(fileName);
}
void fill(int binding) { mBindings[binding].fill(); }
void dumpBindingDimensions(int binding,
const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
const auto dims = context.getBindingDimensions(binding);
// Do not add a newline terminator, because the caller may be outputting a
// JSON string.
os << dims;
}
void dumpBindingValues(const nvinfer1::IExecutionContext& context,
int binding, std::ostream& os,
const std::string& separator = " ",
int32_t batch = 1) const {
Dims dims = context.getBindingDimensions(binding);
Dims strides = context.getStrides(binding);
int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
const int32_t spv =
context.getEngine().getBindingComponentsPerElement(binding);
if (context.getEngine().hasImplicitBatchDimension()) {
auto insertN = [](Dims& d, int32_t bs) {
const int32_t nbDims = d.nbDims;
ASSERT(nbDims < Dims::MAX_DIMS);
std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
d.d[0] = bs;
d.nbDims = nbDims + 1;
};
int32_t batchStride = 0;
for (int32_t i = 0; i < strides.nbDims; ++i) {
if (strides.d[i] * dims.d[i] > batchStride) {
batchStride = strides.d[i] * dims.d[i];
}
}
insertN(dims, batch);
insertN(strides, batchStride);
vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
}
mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
}
void dumpInputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isInput = [](const Binding& b) { return b.isInput; };
dumpBindings(context, isInput, os);
}
void dumpOutputs(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
dumpBindings(context, isOutput, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
std::ostream& os) const {
auto all = [](const Binding& b) { return true; };
dumpBindings(context, all, os);
}
void dumpBindings(const nvinfer1::IExecutionContext& context,
bool (*predicate)(const Binding& b),
std::ostream& os) const {
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
os << n.first << ": (";
dumpBindingDimensions(binding, context, os);
os << ")" << std::endl;
dumpBindingValues(context, binding, os);
os << std::endl;
}
}
}
std::unordered_map<std::string, int> getInputBindings() const {
auto isInput = [](const Binding& b) { return b.isInput; };
return getBindings(isInput);
}
std::unordered_map<std::string, int> getOutputBindings() const {
auto isOutput = [](const Binding& b) { return !b.isInput; };
return getBindings(isOutput);
}
std::unordered_map<std::string, int> getBindings() const {
auto all = [](const Binding& b) { return true; };
return getBindings(all);
}
std::unordered_map<std::string, int>
getBindings(bool (*predicate)(const Binding& b)) const {
std::unordered_map<std::string, int> bindings;
for (const auto& n : mNames) {
const auto binding = n.second;
if (predicate(mBindings[binding])) {
bindings.insert(n);
}
}
return bindings;
}
private:
std::unordered_map<std::string, int32_t> mNames;
std::vector<Binding> mBindings;
std::vector<void*> mDevicePointers;
bool mUseManaged{false};
};
template <typename T> struct TrtDestroyer {
void operator()(T* t) { t->destroy(); }
};
template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
size_t nbBindings, bool isInput = true) {
bool broadcast = formats.size() == 1;
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
if (!formats.empty() && !validFormatsCount) {
if (isInput) {
throw std::invalid_argument(
"The number of inputIOFormats must match network's inputs or be one "
"for broadcasting.");
} else {
throw std::invalid_argument(
"The number of outputIOFormats must match network's outputs or be "
"one for broadcasting.");
}
}
return broadcast;
}
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
if (!iFile) {
sample::gLogWarning << "Could not read timing cache from: " << inFileName
<< ". A new timing cache will be generated and written."
<< std::endl;
return std::vector<char>();
}
iFile.seekg(0, std::ifstream::end);
size_t fsize = iFile.tellg();
iFile.seekg(0, std::ifstream::beg);
std::vector<char> content(fsize);
iFile.read(content.data(), fsize);
iFile.close();
sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
<< inFileName << std::endl;
return content;
}
inline void saveTimingCacheFile(const std::string outFileName,
const IHostMemory* blob) {
std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
if (!oFile) {
sample::gLogWarning << "Could not write timing cache to: " << outFileName
<< std::endl;
return;
}
oFile.write((char*)blob->data(), blob->size());
oFile.close();
sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
<< outFileName << std::endl;
}
inline int32_t getCudaDriverVersion() {
int32_t version{-1};
cudaCheck(cudaDriverGetVersion(&version));
return version;
}
inline int32_t getCudaRuntimeVersion() {
int32_t version{-1};
cudaCheck(cudaRuntimeGetVersion(&version));
return version;
}
} // namespace sample
#endif // TRT_SAMPLE_UTILS_H

View File

@@ -0,0 +1,453 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/tensorrt/trt_backend.h"
#include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h"
#endif
namespace fastdeploy {
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
if (dtype == nvinfer1::DataType::kFLOAT) {
return sizeof(float);
} else if (dtype == nvinfer1::DataType::kHALF) {
return sizeof(float) / 2;
} else if (dtype == nvinfer1::DataType::kINT8) {
return sizeof(int8_t);
} else if (dtype == nvinfer1::DataType::kINT32) {
return sizeof(int32_t);
}
// kBOOL
return sizeof(bool);
}
FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
if (dtype == nvinfer1::DataType::kFLOAT) {
return FDDataType::FP32;
} else if (dtype == nvinfer1::DataType::kHALF) {
return FDDataType::FP16;
} else if (dtype == nvinfer1::DataType::kINT8) {
return FDDataType::INT8;
} else if (dtype == nvinfer1::DataType::kINT32) {
return FDDataType::INT32;
}
// kBOOL
return FDDataType::BOOL;
}
std::vector<int> toVec(const nvinfer1::Dims& dim) {
std::vector<int> out(dim.d, dim.d + dim.nbDims);
return out;
}
bool TrtBackend::InitFromTrt(const std::string& trt_engine_file) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
std::string engine_buffer;
engine_buffer.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(engine_buffer.at(0)), engine_buffer.size());
fin.close();
SampleUniquePtr<IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())};
if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false;
}
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(engine_buffer.data(),
engine_buffer.size()),
samplesCommon::InferDeleter());
if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false;
}
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
engine_->createExecutionContext());
FDASSERT(cudaStreamCreate(&stream_) == 0,
"[ERROR] Error occurs while calling cudaStreamCreate().");
GetInputOutputInfo();
initialized_ = true;
return true;
}
bool TrtBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const TrtBackendOption& option, bool verbose) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
#ifdef ENABLE_PADDLE_FRONTEND
char* model_content_ptr;
int model_content_size = 0;
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
&model_content_ptr, &model_content_size, 11, true,
verbose, true, true, true)) {
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
<< std::endl;
return false;
}
std::string onnx_model_proto(model_content_ptr,
model_content_ptr + model_content_size);
delete model_content_ptr;
model_content_ptr = nullptr;
return InitFromOnnx(onnx_model_proto, option, true);
#else
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
"call `InitFromOnnx` instead."
<< std::endl;
return false;
#endif
}
bool TrtBackend::InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option,
bool from_memory_buffer) {
if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
cudaSetDevice(option.gpu_id);
if (option.serialize_file != "") {
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
if (fin) {
FDLogger() << "Detect serialized TensorRT Engine file in "
<< option.serialize_file << ", will load it directly."
<< std::endl;
fin.close();
return InitFromTrt(option.serialize_file);
}
}
std::string onnx_content = "";
if (!from_memory_buffer) {
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
if (!fin) {
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
<< std::endl;
return false;
}
fin.seekg(0, std::ios::end);
onnx_content.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(onnx_content.at(0)), onnx_content.size());
fin.close();
} else {
onnx_content = model_file;
}
if (!CreateTrtEngine(onnx_content, option)) {
return false;
}
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
engine_->createExecutionContext());
FDASSERT(cudaStreamCreate(&stream_) == 0,
"[ERROR] Error occurs while calling cudaStreamCreate().");
GetInputOutputInfo();
initialized_ = true;
return true;
}
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
AllocateBufferInDynamicShape(inputs, outputs);
std::vector<void*> input_binds(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
if (inputs[0].dtype == FDDataType::INT64) {
int64_t* data = static_cast<int64_t*>(inputs[i].Data());
std::vector<int32_t> casted_data(data, data + inputs[i].Numel());
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
static_cast<void*>(casted_data.data()),
inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice,
stream_) == 0,
"[ERROR] Error occurs while copy memory from CPU to GPU.");
} else {
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
inputs[i].Data(), inputs[i].Nbytes(),
cudaMemcpyHostToDevice, stream_) == 0,
"[ERROR] Error occurs while copy memory from CPU to GPU.");
}
// FDASSERT(cudaMemcpy(inputs_buffer_[inputs[i].name].data(),
// inputs[i].GetData(), inputs[i].Nbytes(),
// cudaMemcpyHostToDevice) == 0,
// "[ERROR] Error occurs while copy memory from CPU to GPU.");
}
if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
FDERROR << "Failed to Infer with TensorRT." << std::endl;
return false;
}
for (size_t i = 0; i < outputs->size(); ++i) {
FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
outputs_buffer_[(*outputs)[i].name].data(),
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
stream_) == 0,
"[ERROR] Error occurs while copy memory from GPU to CPU.");
// FDASSERT(cudaMemcpy((*outputs)[i].data.data(),
// outputs_buffer_[(*outputs)[i].name].data(),
// (*outputs)[i].Nbytes(),
// cudaMemcpyDeviceToHost) == 0,
// "[ERROR] Error occurs while copy memory from GPU to CPU.");
}
// FDASSERT(cudaStreamSynchronize(stream_) == 0,
// "[ERROR] Error occurs while calling cudaStreamSynchronize().");
return true;
}
void TrtBackend::GetInputOutputInfo() {
inputs_desc_.clear();
outputs_desc_.clear();
auto num_binds = engine_->getNbBindings();
for (auto i = 0; i < num_binds; ++i) {
std::string name = std::string(engine_->getBindingName(i));
auto shape = toVec(engine_->getBindingDimensions(i));
auto dtype = engine_->getBindingDataType(i);
if (engine_->bindingIsInput(i)) {
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
inputs_buffer_[name] = DeviceBuffer(dtype);
} else {
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
outputs_buffer_[name] = DeviceBuffer(dtype);
}
}
bindings_.resize(num_binds);
}
void TrtBackend::AllocateBufferInDynamicShape(
const std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs) {
for (const auto& item : inputs) {
auto idx = engine_->getBindingIndex(item.name.c_str());
std::vector<int> shape(item.shape.begin(), item.shape.end());
auto dims = sample::toDims(shape);
context_->setBindingDimensions(idx, dims);
if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
inputs_buffer_[item.name].resize(dims);
bindings_[idx] = inputs_buffer_[item.name].data();
}
}
if (outputs->size() != outputs_desc_.size()) {
outputs->resize(outputs_desc_.size());
}
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
auto output_dims = context_->getBindingDimensions(idx);
(*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype);
(*outputs)[i].shape.assign(output_dims.d,
output_dims.d + output_dims.nbDims);
(*outputs)[i].name = outputs_desc_[i].name;
(*outputs)[i].data.resize(volume(output_dims) *
TrtDataTypeSize(outputs_desc_[i].dtype));
if ((*outputs)[i].Nbytes() >
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
}
}
}
bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
const TrtBackendOption& option) {
const auto explicitBatch =
1U << static_cast<uint32_t>(
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder) {
FDERROR << "Failed to call createInferBuilder()." << std::endl;
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(
builder->createNetworkV2(explicitBatch));
if (!network) {
FDERROR << "Failed to call createNetworkV2()." << std::endl;
return false;
}
auto config =
SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config) {
FDERROR << "Failed to call createBuilderConfig()." << std::endl;
return false;
}
if (option.enable_fp16) {
if (!builder->platformHasFastFp16()) {
FDLogger() << "[WARN] Detected FP16 is not supported in the current GPU, "
"will use FP32 instead."
<< std::endl;
} else {
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
}
auto parser = SampleUniquePtr<nvonnxparser::IParser>(
nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
if (!parser) {
FDERROR << "Failed to call createParser()." << std::endl;
return false;
}
if (!parser->parse(onnx_model.data(), onnx_model.size())) {
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
return false;
}
FDLogger() << "Start to building TensorRT Engine..." << std::endl;
bool fp16 = builder->platformHasFastFp16();
builder->setMaxBatchSize(option.max_batch_size);
config->setMaxWorkspaceSize(option.max_workspace_size);
if (option.fixed_shape.size() > 0) {
auto profile = builder->createOptimizationProfile();
for (auto& item : option.fixed_shape) {
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMIN,
sample::toDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: " + item.first +
" in TrtBackend.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kOPT,
sample::toDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: " + item.first +
" in TrtBackend.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMAX,
sample::toDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: " + item.first +
" in TrtBackend.");
}
config->addOptimizationProfile(profile);
} else if (option.max_shape.size() > 0) {
auto profile = builder->createOptimizationProfile();
FDASSERT(option.max_shape.size() == option.min_shape.size() &&
option.min_shape.size() == option.opt_shape.size(),
"[TrtBackend] Size of max_shape/opt_shape/min_shape in "
"TrtBackendOption should keep same.");
for (const auto& item : option.min_shape) {
// set min shape
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMIN,
sample::toDims(item.second)),
"[TrtBackend] Failed to set min_shape for input: " + item.first +
" in TrtBackend.");
// set optimization shape
auto iter = option.opt_shape.find(item.first);
FDASSERT(iter != option.opt_shape.end(),
"[TrtBackend] Cannot find input name: " + item.first +
" in TrtBackendOption::opt_shape.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kOPT,
sample::toDims(iter->second)),
"[TrtBackend] Failed to set opt_shape for input: " + item.first +
" in TrtBackend.");
// set max shape
iter = option.max_shape.find(item.first);
FDASSERT(iter != option.max_shape.end(),
"[TrtBackend] Cannot find input name: " + item.first +
" in TrtBackendOption::max_shape.");
FDASSERT(profile->setDimensions(item.first.c_str(),
nvinfer1::OptProfileSelector::kMAX,
sample::toDims(iter->second)),
"[TrtBackend] Failed to set max_shape for input: " + item.first +
" in TrtBackend.");
}
config->addOptimizationProfile(profile);
}
SampleUniquePtr<IHostMemory> plan{
builder->buildSerializedNetwork(*network, *config)};
if (!plan) {
FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
return false;
}
SampleUniquePtr<IRuntime> runtime{
createInferRuntime(sample::gLogger.getTRTLogger())};
if (!runtime) {
FDERROR << "Failed to call createInferRuntime()." << std::endl;
return false;
}
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
runtime->deserializeCudaEngine(plan->data(), plan->size()),
samplesCommon::InferDeleter());
if (!engine_) {
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
return false;
}
FDLogger() << "TensorRT Engine is built succussfully." << std::endl;
if (option.serialize_file != "") {
FDLogger() << "Serialize TensorRTEngine to local file "
<< option.serialize_file << "." << std::endl;
std::ofstream engine_file(option.serialize_file.c_str());
if (!engine_file) {
FDERROR << "Failed to open " << option.serialize_file << " to write."
<< std::endl;
return false;
}
engine_file.write(static_cast<char*>(plan->data()), plan->size());
engine_file.close();
FDLogger() << "TensorRTEngine is serialized to local file "
<< option.serialize_file
<< ", we can load this model from the seralized engine "
"directly next time."
<< std::endl;
}
return true;
}
TensorInfo TrtBackend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
" should less than the number of inputs:" +
std::to_string(NumInputs()) + ".");
TensorInfo info;
info.name = inputs_desc_[index].name;
info.shape.assign(inputs_desc_[index].shape.begin(),
inputs_desc_[index].shape.end());
info.dtype = GetFDDataType(inputs_desc_[index].dtype);
return info;
}
TensorInfo TrtBackend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index:" + std::to_string(index) +
" should less than the number of outputs:" +
std::to_string(NumOutputs()) + ".");
TensorInfo info;
info.name = outputs_desc_[index].name;
info.shape.assign(outputs_desc_[index].shape.begin(),
outputs_desc_[index].shape.end());
info.dtype = GetFDDataType(outputs_desc_[index].dtype);
return info;
}
} // namespace fastdeploy

View File

@@ -0,0 +1,98 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/common/argsParser.h"
#include "fastdeploy/backends/tensorrt/common/buffers.h"
#include "fastdeploy/backends/tensorrt/common/common.h"
#include "fastdeploy/backends/tensorrt/common/logger.h"
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
#include "NvInfer.h"
#include <cuda_runtime_api.h>
namespace fastdeploy {
using namespace samplesCommon;
struct TrtValueInfo {
std::string name;
std::vector<int> shape;
nvinfer1::DataType dtype;
};
struct TrtBackendOption {
int gpu_id = 0;
bool enable_fp16 = false;
bool enable_int8 = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
std::map<std::string, std::vector<int32_t>> fixed_shape;
std::map<std::string, std::vector<int32_t>> max_shape;
std::map<std::string, std::vector<int32_t>> min_shape;
std::map<std::string, std::vector<int32_t>> opt_shape;
std::string serialize_file = "";
};
std::vector<int> toVec(const nvinfer1::Dims& dim);
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
class TrtBackend : public BaseBackend {
public:
TrtBackend() : engine_(nullptr), context_(nullptr) {}
void BuildOption(const TrtBackendOption& option);
bool InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const TrtBackendOption& option = TrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option = TrtBackendOption(),
bool from_memory_buffer = false);
bool InitFromTrt(const std::string& trt_engine_file);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return inputs_desc_.size(); }
int NumOutputs() const { return outputs_desc_.size(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
private:
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
std::shared_ptr<nvinfer1::IExecutionContext> context_;
cudaStream_t stream_{};
std::vector<void*> bindings_;
std::vector<TrtValueInfo> inputs_desc_;
std::vector<TrtValueInfo> outputs_desc_;
std::map<std::string, DeviceBuffer> inputs_buffer_;
std::map<std::string, DeviceBuffer> outputs_buffer_;
void GetInputOutputInfo();
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs);
bool CreateTrtEngine(const std::string& onnx_model,
const TrtBackendOption& option);
};
} // namespace fastdeploy

View File

@@ -0,0 +1,50 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifndef FASTDEPLOY_DEBUG
#cmakedefine FASTDEPLOY_DEBUG
#endif
#ifndef FASTDEPLOY_LIB
#cmakedefine FASTDEPLOY_LIB
#endif
#ifndef ENABLE_PADDLE_FRONTEND
#cmakedefine ENABLE_PADDLE_FRONTEND
#endif
#ifndef ENABLE_ORT_BACKEND
#cmakedefine ENABLE_ORT_BACKEND
#endif
#ifndef WITH_GPU
#cmakedefine WITH_GPU
#endif
#ifndef ENABLE_TRT_BACKEND
#cmakedefine ENABLE_TRT_BACKEND
#endif
#ifndef ENABLE_VISION
#cmakedefine ENABLE_VISION
#endif
#ifndef ENABLE_OPENCV_CUDA
#cmakedefine ENABLE_OPENCV_CUDA
#endif
#ifndef ENABLE_VISION_VISUALIZE
#cmakedefine ENABLE_VISION_VISUALIZE
#endif

View File

@@ -0,0 +1,127 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h"
#ifdef WITH_GPU
#include <cuda_runtime_api.h>
#endif
namespace fastdeploy {
void* FDTensor::MutableData() {
if (external_data_ptr != nullptr) {
return external_data_ptr;
}
return data.data();
}
void* FDTensor::Data() {
if (external_data_ptr != nullptr) {
if (device == Device::GPU) {
#ifdef WITH_GPU
// need to copy cuda mem to cpu first
temporary_cpu_buffer.resize(Nbytes());
FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
Nbytes(), cudaMemcpyDeviceToHost) == 0,
"[ERROR] Error occurs while copy memory from GPU to CPU");
return temporary_cpu_buffer.data();
#else
FDASSERT(false,
"The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
"an unexpected problem happend.");
#endif
} else {
return external_data_ptr;
}
}
return data.data();
}
void FDTensor::SetExternalData(const std::vector<int>& new_shape,
const FDDataType& data_type, void* data_buffer) {
dtype = data_type;
shape.assign(new_shape.begin(), new_shape.end());
external_data_ptr = data_buffer;
}
void FDTensor::Allocate(const std::vector<int>& new_shape,
const FDDataType& data_type,
const std::string& tensor_name) {
dtype = data_type;
name = tensor_name;
shape.assign(new_shape.begin(), new_shape.end());
int unit = FDDataTypeSize(data_type);
int total_size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
data.resize(total_size * unit);
}
int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
int FDTensor::Numel() const {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
template <typename T>
void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
double* min) {
T* ptr = static_cast<T*>(src_ptr);
*mean = 0;
*max = -99999999;
*min = 99999999;
for (int i = 0; i < size; ++i) {
if (*(ptr + i) > *max) {
*max = *(ptr + i);
}
if (*(ptr + i) < *min) {
*min = *(ptr + i);
}
*mean += *(ptr + i);
}
*mean = *mean / size;
}
void FDTensor::PrintInfo(const std::string& prefix) {
double mean = 0;
double max = -99999999;
double min = 99999999;
if (dtype == FDDataType::FP32) {
CalculateStatisInfo<float>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::FP64) {
CalculateStatisInfo<double>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT8) {
CalculateStatisInfo<int8_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::UINT8) {
CalculateStatisInfo<uint8_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT32) {
CalculateStatisInfo<int32_t>(Data(), Numel(), &mean, &max, &min);
} else if (dtype == FDDataType::INT64) {
CalculateStatisInfo<int64_t>(Data(), Numel(), &mean, &max, &min);
} else {
FDASSERT(false,
"PrintInfo function doesn't support current situation, maybe you "
"need enhance this function now.")
}
std::cout << prefix << ": shape=";
for (int i = 0; i < shape.size(); ++i) {
std::cout << shape[i] << " ";
}
std::cout << ", dtype=" << FDDataTypeStr(dtype) << ", mean=" << mean
<< ", max=" << max << ", min=" << min << std::endl;
}
FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
} // namespace fastdeploy

View File

@@ -0,0 +1,84 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "fastdeploy/core/fd_type.h"
namespace fastdeploy {
struct FASTDEPLOY_DECL FDTensor {
std::vector<int8_t> data;
std::vector<int64_t> shape;
std::string name = "";
FDDataType dtype;
// This use to skip memory copy step
// the external_data_ptr will point to the user allocated memory
// user has to maintain the memory, allocate and release
void* external_data_ptr = nullptr;
// The internal data will be on CPU
// Some times, the external data is on the GPU, and we are going to use
// GPU to inference the model
// so we can skip data transfer, which may improve the efficience
Device device = Device::CPU;
// if the external data is not on CPU, we use this temporary buffer
// to transfer data to CPU at some cases we need to visit the
// other devices' data
std::vector<int8_t> temporary_cpu_buffer;
// Get data buffer pointer
void* MutableData();
// Use this data to get the tensor data to process
// Since the most senario is process data in CPU
// this function weill return a pointer to cpu memory
// buffer.
// If the original data is on other device, the data
// will copy to cpu store in `temporary_cpu_buffer`
void* Data();
// Set user memory buffer for Tensor, the memory is managed by
// the user it self, but the Tensor will share the memory with user
// So take care with the user buffer
void SetExternalData(const std::vector<int>& new_shape,
const FDDataType& data_type, void* data_buffer);
// Initialize Tensor
// Include setting attribute for tensor
// and allocate cpu memory buffer
void Allocate(const std::vector<int>& new_shape, const FDDataType& data_type,
const std::string& tensor_name = "");
// Total size of tensor memory buffer in bytes
int Nbytes() const;
// Total number of elements in this tensor
int Numel() const;
// Debug function
// Use this function to print shape, dtype, mean, max, min
// prefix will also be printed as tag
void PrintInfo(const std::string& prefix = "TensorInfo: ");
FDTensor() {}
explicit FDTensor(const std::string& tensor_name);
};
} // namespace fastdeploy

122
fastdeploy/core/fd_type.cc Normal file
View File

@@ -0,0 +1,122 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/core/fd_type.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
int FDDataTypeSize(FDDataType data_type) {
FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
if (data_type == FDDataType::BOOL) {
return sizeof(bool);
} else if (data_type == FDDataType::INT16) {
return sizeof(int16_t);
} else if (data_type == FDDataType::INT32) {
return sizeof(int32_t);
} else if (data_type == FDDataType::INT64) {
return sizeof(int64_t);
} else if (data_type == FDDataType::FP32) {
return sizeof(float);
} else if (data_type == FDDataType::FP64) {
return sizeof(double);
} else if (data_type == FDDataType::UINT8) {
return sizeof(uint8_t);
} else {
FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type));
}
return -1;
}
std::string FDDataTypeStr(FDDataType data_type) {
FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
if (data_type == FDDataType::BOOL) {
return "bool";
} else if (data_type == FDDataType::INT16) {
return "int16";
} else if (data_type == FDDataType::INT32) {
return "int32";
} else if (data_type == FDDataType::INT64) {
return "int64";
} else if (data_type == FDDataType::FP16) {
return "float16";
} else if (data_type == FDDataType::FP32) {
return "float32";
} else if (data_type == FDDataType::FP64) {
return "float64";
} else if (data_type == FDDataType::UINT8) {
return "uint8";
} else if (data_type == FDDataType::INT8) {
return "int8";
} else {
FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type));
}
return "UNKNOWN!";
}
std::string Str(Device& d) {
std::string out;
switch (d) {
case Device::DEFAULT:
out = "Device::DEFAULT";
break;
case Device::CPU:
out = "Device::CPU";
break;
case Device::GPU:
out = "Device::GPU";
break;
default:
out = "Device::UNKOWN";
}
return out;
}
std::string Str(FDDataType& fdt) {
std::string out;
switch (fdt) {
case FDDataType::BOOL:
out = "FDDataType::BOOL";
break;
case FDDataType::INT16:
out = "FDDataType::INT16";
break;
case FDDataType::INT32:
out = "FDDataType::INT32";
break;
case FDDataType::INT64:
out = "FDDataType::INT64";
break;
case FDDataType::FP32:
out = "FDDataType::FP32";
break;
case FDDataType::FP64:
out = "FDDataType::FP64";
break;
case FDDataType::FP16:
out = "FDDataType::FP16";
break;
case FDDataType::UINT8:
out = "FDDataType::UINT8";
break;
case FDDataType::INT8:
out = "FDDataType::INT8";
break;
default:
out = "FDDataType::UNKNOWN";
}
return out;
}
} // namespace fastdeploy

59
fastdeploy/core/fd_type.h Normal file
View File

@@ -0,0 +1,59 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ostream>
#include <sstream>
#include <string>
#include "fastdeploy/core/config.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
enum class Device { DEFAULT, CPU, GPU };
FASTDEPLOY_DECL std::string Str(Device& d);
enum class FDDataType {
BOOL,
INT16,
INT32,
INT64,
FP16,
FP32,
FP64,
UNKNOWN1,
UNKNOWN2,
UNKNOWN3,
UNKNOWN4,
UNKNOWN5,
UNKNOWN6,
UNKNOWN7,
UNKNOWN8,
UNKNOWN9,
UNKNOWN10,
UNKNOWN11,
UNKNOWN12,
UNKNOWN13,
UINT8,
INT8
};
FASTDEPLOY_DECL std::string Str(FDDataType& fdt);
FASTDEPLOY_DECL int32_t FDDataTypeSize(FDDataType data_dtype);
FASTDEPLOY_DECL std::string FDDataTypeStr(FDDataType data_dtype);
} // namespace fastdeploy

View File

@@ -1,186 +0,0 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import shutil
import requests
import time
import zipfile
import hashlib
import tqdm
import logging
DOWNLOAD_RETRY_LIMIT = 3
def md5check(fullname, md5sum=None):
if md5sum is None:
return True
logging.info("File {} md5 checking...".format(fullname))
md5 = hashlib.md5()
with open(fullname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
calc_md5sum = md5.hexdigest()
if calc_md5sum != md5sum:
logging.info("File {} md5 check failed, {}(calc) != "
"{}(base)".format(fullname, calc_md5sum, md5sum))
return False
return True
def move_and_merge_tree(src, dst):
"""
Move src directory to dst, if dst is already exists,
merge src to dst
"""
if not osp.exists(dst):
shutil.move(src, dst)
else:
if not osp.isdir(src):
shutil.move(src, dst)
return
for fp in os.listdir(src):
src_fp = osp.join(src, fp)
dst_fp = osp.join(dst, fp)
if osp.isdir(src_fp):
if osp.isdir(dst_fp):
move_and_merge_tree(src_fp, dst_fp)
else:
shutil.move(src_fp, dst_fp)
elif osp.isfile(src_fp) and \
not osp.isfile(dst_fp):
shutil.move(src_fp, dst_fp)
def download(url, path, rename=None, md5sum=None, show_progress=False):
"""
Download from url, save to path.
url (str): download url
path (str): download to given path
"""
if not osp.exists(path):
os.makedirs(path)
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
if rename is not None:
fullname = osp.join(path, rename)
retry_cnt = 0
while not (osp.exists(fullname) and md5check(fullname, md5sum)):
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
retry_cnt += 1
else:
logging.debug("{} download failed.".format(fname))
raise RuntimeError("Download from {} failed. "
"Retry limit reached".format(url))
logging.info("Downloading {} from {}".format(fname, url))
req = requests.get(url, stream=True)
if req.status_code != 200:
raise RuntimeError("Downloading from {} failed with code "
"{}!".format(url, req.status_code))
# For protecting download interupted, download to
# tmp_fullname firstly, move tmp_fullname to fullname
# after download finished
tmp_fullname = fullname + "_tmp"
total_size = req.headers.get('content-length')
with open(tmp_fullname, 'wb') as f:
if total_size and show_progress:
for chunk in tqdm.tqdm(
req.iter_content(chunk_size=1024),
total=(int(total_size) + 1023) // 1024,
unit='KB'):
f.write(chunk)
else:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
shutil.move(tmp_fullname, fullname)
logging.debug("{} download completed.".format(fname))
return fullname
def decompress(fname):
"""
Decompress for zip and tar file
"""
logging.info("Decompressing {}...".format(fname))
# For protecting decompressing interupted,
# decompress to fpath_tmp directory firstly, if decompress
# successed, move decompress files to fpath and delete
# fpath_tmp and remove download compress file.
fpath = osp.split(fname)[0]
fpath_tmp = osp.join(fpath, 'tmp')
if osp.isdir(fpath_tmp):
shutil.rmtree(fpath_tmp)
os.makedirs(fpath_tmp)
if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0:
with tarfile.open(fname) as tf:
tf.extractall(path=fpath_tmp)
elif fname.find('.zip') >= 0:
with zipfile.ZipFile(fname) as zf:
zf.extractall(path=fpath_tmp)
else:
raise TypeError("Unsupport compress file type {}".format(fname))
for f in os.listdir(fpath_tmp):
src_dir = osp.join(fpath_tmp, f)
dst_dir = osp.join(fpath, f)
move_and_merge_tree(src_dir, dst_dir)
shutil.rmtree(fpath_tmp)
logging.debug("{} decompressed.".format(fname))
return dst_dir
def url2dir(url, path, rename=None):
full_name = download(url, path, rename, show_progress=True)
print("SDK is donwloaded, now extracting...")
if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
return decompress(full_name)
def download_and_decompress(url, path='.', rename=None):
fname = osp.split(url)[-1]
fullname = osp.join(path, fname)
# if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')):
# fullname = osp.join(path, fname.split('.')[0])
nranks = 0
if nranks <= 1:
dst_dir = url2dir(url, path, rename)
if dst_dir is not None:
fullname = dst_dir
else:
lock_path = fullname + '.lock'
if not os.path.exists(fullname):
with open(lock_path, 'w'):
os.utime(lock_path, None)
if local_rank == 0:
dst_dir = url2dir(url, path, rename)
if dst_dir is not None:
fullname = dst_dir
os.remove(lock_path)
else:
while os.path.exists(lock_path):
time.sleep(1)
return

View File

@@ -0,0 +1,167 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
bool FastDeployModel::InitRuntime() {
FDASSERT(
ModelFormatCheck(runtime_option.model_file, runtime_option.model_format),
"ModelFormatCheck Failed.");
if (runtime_initialized_) {
FDERROR << "The model is already initialized, cannot be initliazed again."
<< std::endl;
return false;
}
if (runtime_option.backend != Backend::UNKNOWN) {
if (runtime_option.backend == Backend::ORT) {
if (!IsBackendAvailable(Backend::ORT)) {
FDERROR
<< "Backend::ORT is not complied with current FastDeploy library."
<< std::endl;
return false;
}
} else if (runtime_option.backend == Backend::TRT) {
if (!IsBackendAvailable(Backend::TRT)) {
FDERROR
<< "Backend:TRT is not complied with current FastDeploy library."
<< std::endl;
return false;
}
} else {
FDERROR << "Only support Backend::ORT / Backend::TRT now." << std::endl;
return false;
}
runtime_ = new Runtime();
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
if (runtime_option.device == Device::CPU) {
return CreateCpuBackend();
} else if (runtime_option.device == Device::GPU) {
#ifdef WITH_GPU
return CreateGpuBackend();
#else
FDERROR << "The compiled FastDeploy library doesn't support GPU now."
<< std::endl;
return false;
#endif
}
FDERROR << "Only support CPU/GPU now." << std::endl;
return false;
}
bool FastDeployModel::CreateCpuBackend() {
if (valid_cpu_backends.size() == 0) {
FDERROR << "There's no valid cpu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (auto& b : valid_cpu_backends) {
if (b == Backend::ORT) {
if (!IsBackendAvailable(Backend::ORT)) {
FDERROR << "OrtBackend is not complied with current FastDeploy library."
<< std::endl;
continue;
}
runtime_option.backend = b;
runtime_ = new Runtime();
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
} else {
FDERROR << "Only Backend::ORT as cpu backend is supported now."
<< std::endl;
return false;
}
}
FDERROR << "Cannot find an available cpu backend to load this model."
<< std::endl;
return false;
}
bool FastDeployModel::CreateGpuBackend() {
if (valid_gpu_backends.size() == 0) {
FDERROR << "There's no valid gpu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (auto& b : valid_gpu_backends) {
if (b == Backend::ORT) {
if (!IsBackendAvailable(Backend::ORT)) {
FDERROR << "OrtBackend is not complied with current FastDeploy library."
<< std::endl;
continue;
}
runtime_option.backend = b;
runtime_ = new Runtime();
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
} else if (b == Backend::TRT) {
if (!IsBackendAvailable(Backend::TRT)) {
FDERROR << "TrtBackend is not complied with current FastDeploy library."
<< std::endl;
continue;
}
runtime_option.backend = b;
runtime_ = new Runtime();
if (!runtime_->Init(runtime_option)) {
return false;
}
return true;
} else {
FDERROR << "Only Backend::ORT / Backend::TRT as gpu backends are "
"supported now."
<< std::endl;
return false;
}
}
FDERROR << "Cannot find an available gpu backend to load this model."
<< std::endl;
return false;
}
bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors) {
return runtime_->Infer(input_tensors, output_tensors);
}
void FastDeployModel::EnableDebug() {
#ifdef FASTDEPLOY_DEBUG
debug_ = true;
#else
FDLogger() << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
"cannot enable debug mode."
<< std::endl;
debug_ = false;
#endif
}
bool FastDeployModel::DebugEnabled() { return debug_; }
} // namespace fastdeploy

View File

@@ -0,0 +1,67 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/fastdeploy_runtime.h"
namespace fastdeploy {
class FASTDEPLOY_DECL FastDeployModel {
public:
virtual std::string ModelName() const { return "NameUndefined"; };
virtual bool InitRuntime();
virtual bool CreateCpuBackend();
virtual bool CreateGpuBackend();
virtual bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors);
RuntimeOption runtime_option;
std::vector<Backend> valid_cpu_backends = {Backend::ORT};
std::vector<Backend> valid_gpu_backends = {Backend::ORT};
std::vector<Backend> valid_external_backends;
bool initialized = false;
virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
virtual TensorInfo InputInfoOfRuntime(int index) {
return runtime_->GetInputInfo(index);
}
virtual TensorInfo OutputInfoOfRuntime(int index) {
return runtime_->GetOutputInfo(index);
}
virtual bool Initialized() const {
return runtime_initialized_ && initialized;
}
virtual void EnableDebug();
virtual bool DebugEnabled();
private:
Runtime* runtime_ = nullptr;
bool runtime_initialized_ = false;
bool debug_ = false;
};
#define TIMERECORD_START(id) \
TimeCounter tc_##id; \
tc_##id.Start();
#define TIMERECORD_END(id, prefix) \
if (DebugEnabled()) { \
tc_##id.End(); \
FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \
<< prefix << " duration = " << tc_##id.Duration() << "s." \
<< std::endl; \
}
} // namespace fastdeploy

View File

@@ -0,0 +1,163 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/fastdeploy_runtime.h"
#include "fastdeploy/utils/utils.h"
#ifdef ENABLE_ORT_BACKEND
#include "fastdeploy/backends/ort/ort_backend.h"
#endif
#ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h"
#endif
namespace fastdeploy {
std::vector<Backend> GetAvailableBackends() {
std::vector<Backend> backends;
#ifdef ENABLE_ORT_BACKEND
backends.push_back(Backend::ORT);
#endif
#ifdef ENABLE_TRT_BACKEND
backends.push_back(Backend::TRT);
#endif
return backends;
}
bool IsBackendAvailable(const Backend& backend) {
std::vector<Backend> backends = GetAvailableBackends();
for (size_t i = 0; i < backends.size(); ++i) {
if (backend == backends[i]) {
return true;
}
}
return false;
}
bool ModelFormatCheck(const std::string& model_file,
const Frontend& model_format) {
if (model_format == Frontend::PADDLE) {
if (model_file.size() < 8 ||
model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
FDLogger() << "With model format of Frontend::PADDLE, the model file "
"should ends with `.pdmodel`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == Frontend::ONNX) {
if (model_file.size() < 5 ||
model_file.substr(model_file.size() - 5, 5) != ".onnx") {
FDLogger() << "With model format of Frontend::ONNX, the model file "
"should ends with `.onnx`, but now it's "
<< model_file << std::endl;
return false;
}
} else {
FDLogger() << "Only support model format with frontend Frontend::PADDLE / "
"Frontend::ONNX."
<< std::endl;
return false;
}
return true;
}
bool Runtime::Init(const RuntimeOption& _option) {
option = _option;
if (option.backend == Backend::ORT) {
CreateOrtBackend();
} else if (option.backend == Backend::TRT) {
CreateTrtBackend();
} else {
FDERROR << "Runtime only support Backend::ORT/Backend::TRT as backend now."
<< std::endl;
return false;
}
return true;
}
TensorInfo Runtime::GetInputInfo(int index) {
return backend_->GetInputInfo(index);
}
TensorInfo Runtime::GetOutputInfo(int index) {
return backend_->GetOutputInfo(index);
}
bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors) {
return backend_->Infer(input_tensors, output_tensors);
}
void Runtime::CreateOrtBackend() {
#ifdef ENABLE_ORT_BACKEND
auto ort_option = OrtBackendOption();
ort_option.graph_optimization_level = option.ort_graph_opt_level;
ort_option.intra_op_num_threads = option.cpu_thread_num;
ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
ort_option.execution_mode = option.ort_execution_mode;
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
ort_option.gpu_id = option.device_id;
FDASSERT(option.model_format == Frontend::PADDLE ||
option.model_format == Frontend::ONNX,
"OrtBackend only support model format of Frontend::PADDLE / "
"Frontend::ONNX.");
backend_ = new OrtBackend();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_);
if (option.model_format == Frontend::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
"Load model from ONNX failed while initliazing OrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, ort_option),
"Load model from Paddle failed while initliazing OrtBackend.");
}
#else
FDASSERT(false, "OrtBackend is not available, please compiled with "
"ENABLE_ORT_BACKEND=ON.");
#endif
}
void Runtime::CreateTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
auto trt_option = TrtBackendOption();
trt_option.gpu_id = option.device_id;
trt_option.enable_fp16 = option.trt_enable_fp16;
trt_option.enable_int8 = option.trt_enable_int8;
trt_option.max_batch_size = option.trt_max_batch_size;
trt_option.max_workspace_size = option.trt_max_workspace_size;
trt_option.fixed_shape = option.trt_fixed_shape;
trt_option.max_shape = option.trt_max_shape;
trt_option.min_shape = option.trt_max_shape;
trt_option.opt_shape = option.trt_opt_shape;
trt_option.serialize_file = option.trt_serialize_file;
FDASSERT(option.model_format == Frontend::PADDLE ||
option.model_format == Frontend::ONNX,
"TrtBackend only support model format of Frontend::PADDLE / "
"Frontend::ONNX.");
backend_ = new TrtBackend();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_);
if (option.model_format == Frontend::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
"Load model from ONNX failed while initliazing TrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
option.params_file, trt_option),
"Load model from Paddle failed while initliazing TrtBackend.");
}
#else
FDASSERT(false, "TrtBackend is not available, please compiled with "
"ENABLE_TRT_BACKEND=ON.");
#endif
}
} // namespace fastdeploy

View File

@@ -0,0 +1,94 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/perf.h"
#include <map>
#include <vector>
namespace fastdeploy {
enum class Backend { UNKNOWN, ORT, TRT, PDRT };
enum class Frontend { PADDLE, ONNX };
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
bool ModelFormatCheck(const std::string& model_file,
const Frontend& model_format);
struct FASTDEPLOY_DECL RuntimeOption {
Backend backend = Backend::ORT;
// for cpu inference and preprocess
int cpu_thread_num = 8;
int device_id = 0;
#ifdef WITH_GPU
Device device = Device::GPU;
#else
Device device = Device::CPU;
#endif
// ======Only for ORT Backend========
// -1 means use default value by ort
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
// ORT_ENABLE_ALL
int ort_graph_opt_level = -1;
int ort_inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
int ort_execution_mode = -1;
// ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
std::string trt_serialize_file = "";
bool trt_enable_fp16 = false;
bool trt_enable_int8 = false;
size_t trt_max_batch_size = 32;
size_t trt_max_workspace_size = 1 << 30;
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
Frontend model_format = Frontend::PADDLE; // format of input model
};
struct FASTDEPLOY_DECL Runtime {
public:
// explicit Runtime(const RuntimeOption& _option = RuntimeOption());
bool Init(const RuntimeOption& _option);
bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors);
void CreateOrtBackend();
void CreateTrtBackend();
int NumInputs() { return backend_->NumInputs(); }
int NumOutputs() { return backend_->NumOutputs(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
RuntimeOption option;
private:
BaseBackend* backend_;
};
} // namespace fastdeploy

View File

@@ -0,0 +1,55 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import logging
from . import fastdeploy_main as C
class FastDeployModel:
def __init__(self, option):
self._model = None
self._runtime_option = option
if self._runtime_option is None:
self._runtime_option = C.RuntimeOption()
def model_name(self):
return self._model.model_name()
def num_inputs(self):
return self._model.num_inputs()
def num_outputs(self):
return self._model.num_outputs()
def get_input_info(self, index):
assert index < self.num_inputs(
), "The index:{} must be less than number of inputs:{}.".format(
index, self.num_inputs())
return self._model.get_input_info(index)
def get_output_info(self, index):
assert index < self.num_outputs(
), "The index:{} must be less than number of outputs:{}.".format(
index, self.num_outputs())
return self._model.get_output_info(index)
@property
def runtime_option(self):
return self._model.runtime_option if self._model is not None else None
@property
def initialized(self):
if self._model is None:
return false
return self._model.initialized()

View File

View File

@@ -0,0 +1,34 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindFDModel(pybind11::module& m) {
pybind11::class_<FastDeployModel>(m, "FastDeployModel")
.def(pybind11::init<>(), "Default Constructor")
.def("model_name", &FastDeployModel::ModelName)
.def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime)
.def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime)
.def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime)
.def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime)
.def("initialized", &FastDeployModel::Initialized)
.def_readwrite("runtime_option", &FastDeployModel::runtime_option)
.def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends)
.def_readwrite("valid_gpu_backends",
&FastDeployModel::valid_gpu_backends);
}
} // namespace fastdeploy

View File

@@ -0,0 +1,114 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindRuntime(pybind11::module& m) {
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
.def(pybind11::init())
.def_readwrite("model_file", &RuntimeOption::model_file)
.def_readwrite("params_file", &RuntimeOption::params_file)
.def_readwrite("model_format", &RuntimeOption::model_format)
.def_readwrite("backend", &RuntimeOption::backend)
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
.def_readwrite("device_id", &RuntimeOption::device_id)
.def_readwrite("device", &RuntimeOption::device)
.def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
.def_readwrite("ort_inter_op_num_threads",
&RuntimeOption::ort_inter_op_num_threads)
.def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
.def_readwrite("trt_fixed_shape", &RuntimeOption::trt_fixed_shape)
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size);
pybind11::class_<Runtime>(m, "Runtime")
.def(pybind11::init([](RuntimeOption& option) {
Runtime* runtime = new Runtime();
runtime->Init(option);
return runtime;
}))
.def("infer", [](Runtime& self,
std::map<std::string, pybind11::array>& data) {
std::vector<FDTensor> inputs(data.size());
int index = 0;
for (auto iter = data.begin(); iter != data.end(); ++iter) {
inputs[index].dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
inputs[index].shape.insert(
inputs[index].shape.begin(), iter->second.shape(),
iter->second.shape() + iter->second.ndim());
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
// use SetExternalData
inputs[index].data.resize(iter->second.nbytes());
memcpy(inputs[index].data.data(), iter->second.mutable_data(),
iter->second.nbytes());
inputs[index].name = iter->first;
}
std::vector<FDTensor> outputs(self.NumOutputs());
self.Infer(inputs, &outputs);
std::vector<pybind11::array> results;
results.reserve(outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;
});
pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
"Backend for inference.")
.value("UNKOWN", Backend::UNKNOWN)
.value("ORT", Backend::ORT)
.value("TRT", Backend::TRT)
.value("PDRT", Backend::PDRT);
pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
"Frontend for inference.")
.value("PADDLE", Frontend::PADDLE)
.value("ONNX", Frontend::ONNX);
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
"Device for inference.")
.value("CPU", Device::CPU)
.value("GPU", Device::GPU);
pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
"Data type of FastDeploy.")
.value("BOOL", FDDataType::BOOL)
.value("INT8", FDDataType::INT8)
.value("INT16", FDDataType::INT16)
.value("INT32", FDDataType::INT32)
.value("INT64", FDDataType::INT64)
.value("FP32", FDDataType::FP32)
.value("FP64", FDDataType::FP64)
.value("UINT8", FDDataType::UINT8);
pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name)
.def_readwrite("shape", &TensorInfo::shape)
.def_readwrite("dtype", &TensorInfo::dtype);
m.def("get_available_backends", []() { return GetAvailableBackends(); });
}
} // namespace fastdeploy

115
fastdeploy/pybind/main.cc Normal file
View File

@@ -0,0 +1,115 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindRuntime(pybind11::module&);
void BindFDModel(pybind11::module&);
void BindVision(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
pybind11::dtype dt;
if (fd_dtype == FDDataType::INT32) {
dt = pybind11::dtype::of<int32_t>();
} else if (fd_dtype == FDDataType::INT64) {
dt = pybind11::dtype::of<int64_t>();
} else if (fd_dtype == FDDataType::FP32) {
dt = pybind11::dtype::of<float>();
} else if (fd_dtype == FDDataType::FP64) {
dt = pybind11::dtype::of<double>();
} else {
FDASSERT(false, "The function doesn't support data type of " +
FDDataTypeStr(fd_dtype) + ".");
}
return dt;
}
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
return FDDataType::INT32;
} else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
return FDDataType::INT64;
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
return FDDataType::FP32;
} else if (np_dtype.is(pybind11::dtype::of<double>())) {
return FDDataType::FP64;
}
FDASSERT(false, "NumpyDataTypeToFDDataType() only support "
"int32/int64/float32/float64 now.");
return FDDataType::FP32;
}
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
bool share_buffer) {
tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
tensor->shape.insert(tensor->shape.begin(), pyarray.shape(),
pyarray.shape() + pyarray.ndim());
if (share_buffer) {
tensor->external_data_ptr = pyarray.mutable_data();
} else {
tensor->data.resize(pyarray.nbytes());
memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
}
}
#ifdef ENABLE_VISION
int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
return CV_32S;
} else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
return CV_8U;
} else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
return CV_8U;
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
return CV_32F;
} else {
FDASSERT(
false,
"NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
"now.");
}
return CV_8U;
}
cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
FDASSERT(
pyarray.ndim() == 3,
"Require rank of array to be 3 with HWC format while converting it to "
"cv::Mat.");
int channel = *(pyarray.shape() + 2);
int height = *(pyarray.shape());
int width = *(pyarray.shape() + 1);
return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
pyarray.mutable_data());
}
#endif
PYBIND11_MODULE(fastdeploy_main, m) {
m.doc() =
"Make programer easier to deploy deeplearning model, save time to save "
"the world!";
BindRuntime(m);
BindFDModel(m);
#ifdef ENABLE_VISION
auto vision_module =
m.def_submodule("vision", "Vision module of FastDeploy.");
BindVision(vision_module);
#endif
}
} // namespace fastdeploy

88
fastdeploy/pybind/main.h Normal file
View File

@@ -0,0 +1,88 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <type_traits>
#include "fastdeploy/fastdeploy_runtime.h"
#ifdef ENABLE_VISION
#include "fastdeploy/vision.h"
#endif
namespace fastdeploy {
void BindBackend(pybind11::module&);
void BindVision(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype);
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
bool share_buffer = false);
#ifdef ENABLE_VISION
cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
#endif
template <typename T> FDDataType CTypeToFDDataType() {
if (std::is_same<T, int32_t>::value) {
return FDDataType::INT32;
} else if (std::is_same<T, int64_t>::value) {
return FDDataType::INT64;
} else if (std::is_same<T, float>::value) {
return FDDataType::FP32;
} else if (std::is_same<T, double>::value) {
return FDDataType::FP64;
}
FDASSERT(false,
"CTypeToFDDataType only support int32/int64/float32/float64 now.");
return FDDataType::FP32;
}
template <typename T>
std::vector<pybind11::array>
PyBackendInfer(T& self, const std::vector<std::string>& names,
std::vector<pybind11::array>& data) {
std::vector<FDTensor> inputs(data.size());
for (size_t i = 0; i < data.size(); ++i) {
// TODO(jiangjiajun) here is considered to use user memory directly
inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype());
inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
data[i].shape() + data[i].ndim());
inputs[i].data.resize(data[i].nbytes());
memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
inputs[i].name = names[i];
}
std::vector<FDTensor> outputs(self.NumOutputs());
self.Infer(inputs, &outputs);
std::vector<pybind11::array> results;
results.reserve(outputs.size());
for (size_t i = 0; i < outputs.size(); ++i) {
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
memcpy(results[i].mutable_data(), outputs[i].data.data(),
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
}
return results;
}
} // namespace fastdeploy

49
fastdeploy/utils/perf.h Normal file
View File

@@ -0,0 +1,49 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/utils/utils.h"
#include <chrono> // NOLINT
namespace fastdeploy {
class FASTDEPLOY_DECL TimeCounter {
public:
void Start() { begin_ = std::chrono::system_clock::now(); }
void End() { end_ = std::chrono::system_clock::now(); }
double Duration() {
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
return static_cast<double>(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den;
}
void PrintInfo(const std::string& prefix = "TimeCounter: ",
bool print_out = true) {
if (!print_out) {
return;
}
FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
}
private:
std::chrono::time_point<std::chrono::system_clock> begin_;
std::chrono::time_point<std::chrono::system_clock> end_;
};
} // namespace fastdeploy

Some files were not shown because too many files have changed in this diff Show More