mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
first commit
This commit is contained in:
178
.clang-format
Normal file
178
.clang-format
Normal file
@@ -0,0 +1,178 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: LLVM
|
||||
AccessModifierOffset: -1
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignArrayOfStructures: None
|
||||
AlignConsecutiveMacros: None
|
||||
AlignConsecutiveAssignments: None
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: Align
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
AlwaysBreakTemplateDeclarations: MultiLine
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeConceptDeclarations: true
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakInheritanceList: BeforeColon
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BreakConstructorInitializers: BeforeColon
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 80
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IfMacros:
|
||||
- KJ_IF_MAYBE
|
||||
IncludeBlocks: Preserve
|
||||
IncludeCategories:
|
||||
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '(Test)?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
IndentAccessModifiers: false
|
||||
IndentCaseLabels: false
|
||||
IndentCaseBlocks: false
|
||||
IndentGotoLabels: true
|
||||
IndentPPDirectives: None
|
||||
IndentExternBlock: AfterExternBlock
|
||||
IndentRequires: false
|
||||
IndentWidth: 2
|
||||
IndentWrappedFunctionNames: false
|
||||
InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
LambdaBodyIndentation: Signature
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
ObjCBinPackProtocolList: Auto
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 60
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
PPIndentWidth: -1
|
||||
ReferenceAlignment: Pointer
|
||||
ReflowComments: true
|
||||
ShortNamespaceLines: 1
|
||||
SortIncludes: CaseSensitive
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 1
|
||||
SpacesInAngles: Never
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeSquareBrackets: false
|
||||
BitFieldColonSpacing: Both
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 8
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
...
|
||||
|
||||
15
.clang_format.hook
Normal file
15
.clang_format.hook
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
readonly VERSION="3.8"
|
||||
|
||||
version=$(clang-format -version)
|
||||
|
||||
if ! [[ version==∗"VERSION"* ]]; then
|
||||
echo "clang-format version check failed."
|
||||
echo "a version contains 'VERSION′isneeded,butget′version'"
|
||||
echo "you can install the right version, and make an soft-link to '$PATH' env"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
clang-format -style=google $@
|
||||
60
.cpplint_pre_commit.hook
Normal file
60
.cpplint_pre_commit.hook
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
#TOTAL_ERRORS=0
|
||||
#echo "HAHAHAHAHHA"
|
||||
#exit 5
|
||||
#
|
||||
#files=$(
|
||||
#
|
||||
#if [[ ! $TRAVIS_BRANCH ]]; then
|
||||
# # install cpplint on local machine.
|
||||
# if [[ ! $(which cpplint) ]]; then
|
||||
# pip install cpplint
|
||||
# fi
|
||||
# # diff files on local machine.
|
||||
# files=$(git diff --cached --name-status | awk 'Extra open brace or missing close brace2}')
|
||||
#else
|
||||
# # diff files between PR and latest commit on Travis CI.
|
||||
# branch_ref=(gitrev−parse"TRAVIS_BRANCH")
|
||||
# head_ref=$(git rev-parse HEAD)
|
||||
# files=(gitdiff−−name−statusbranch_ref $head_ref | awk 'Extra open brace or missing close brace2}')
|
||||
#fi
|
||||
## The trick to remove deleted files: https://stackoverflow.com/a/2413151
|
||||
#for file in $files; do
|
||||
# echo $file
|
||||
# if [[ $file =~ ^(patches/.*) ]]; then
|
||||
# continue;
|
||||
# else
|
||||
# cpplint --filter=-readability/fn_size $file;
|
||||
# TOTAL_ERRORS=(exprTOTAL_ERRORS + $?);
|
||||
# fi
|
||||
#done
|
||||
#
|
||||
#exit $TOTAL_ERRORS
|
||||
|
||||
if git rev-parse --verify HEAD >/dev/null 2>&1
|
||||
then
|
||||
against=HEAD
|
||||
else
|
||||
# Initial commit: diff against an empty tree object
|
||||
against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
|
||||
fi
|
||||
|
||||
# Redirect output to stderr.
|
||||
exec 1>&2
|
||||
|
||||
cpplint=cpplint
|
||||
sum=0
|
||||
filters='-build/include_order,-build/namespaces,-legal/copyright,-runtime/references,-build/include_what_you_use'
|
||||
|
||||
# for cpp
|
||||
for file in $(git diff-index --name-status $against -- | grep -E '\.[ch](pp)?$' | awk '{print $2}'); do
|
||||
$cpplint --filter=$filters $file
|
||||
sum=$(expr ${sum} + $?)
|
||||
done
|
||||
|
||||
if [ ${sum} -eq 0 ]; then
|
||||
exit 0
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
fastdeploy/libs/lib*
|
||||
43
.pre-commit-config.yaml
Normal file
43
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
repos:
|
||||
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
|
||||
rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
|
||||
hooks:
|
||||
- id: yapf
|
||||
files: \.py$
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: a11d9314b22d8f8c7556443875b731ef05965464
|
||||
hooks:
|
||||
- id: check-merge-conflict
|
||||
- id: check-symlinks
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- id: detect-private-key
|
||||
- id: check-symlinks
|
||||
- id: check-added-large-files
|
||||
- repo: local
|
||||
|
||||
hooks:
|
||||
- id: copyright_checker
|
||||
name: copyright_checker
|
||||
entry: python ./.copyright.hook
|
||||
language: system
|
||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
|
||||
exclude: (?!.*third_party)^.*$
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: clang-format-with-version-check
|
||||
name: clang-format
|
||||
description: Format files with ClangFormat.
|
||||
entry: bash .clang_format.hook -i
|
||||
language: system
|
||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: cpplint-cpp-source
|
||||
name: cpplint
|
||||
description: Check C++ code style using cpplint.py.
|
||||
entry: bash .cpplint_pre_commit.hook
|
||||
language: system
|
||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
|
||||
261
CMakeLists.txt
Normal file
261
CMakeLists.txt
Normal file
@@ -0,0 +1,261 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
PROJECT(fastdeploy C CXX)
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
|
||||
|
||||
include(ExternalProject)
|
||||
add_subdirectory(fastdeploy)
|
||||
include(external/utils.cmake)
|
||||
|
||||
# Set C++11 as standard for the whole project
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
endif(NOT MSVC)
|
||||
|
||||
#############################CMAKE FOR FASTDEPLOY################################
|
||||
option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
|
||||
option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
|
||||
option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF)
|
||||
option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF)
|
||||
option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.")
|
||||
option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.")
|
||||
option(ENABLE_VISION "if to enable vision models usage." OFF)
|
||||
option(ENABLE_VISION_VISUALIZE "if to enable visualize vision model result toolbox." ON)
|
||||
|
||||
# Please don't open this flag now, some bugs exists.
|
||||
option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow process image with GPU." OFF)
|
||||
option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF)
|
||||
|
||||
if(ENABLE_DEBUG)
|
||||
add_definitions(-DFASTDEPLOY_DEBUG)
|
||||
endif()
|
||||
if(NOT CUDA_DIRECTORY)
|
||||
set(CUDA_DIRECTORY "/usr/local/cuda")
|
||||
endif()
|
||||
|
||||
option(BUILD_FASTDEPLOY_PYTHON "if build python lib for fastdeploy." OFF)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
add_definitions(-DFASTDEPLOY_LIB)
|
||||
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/*.cc)
|
||||
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/ort/*.cc)
|
||||
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cpp)
|
||||
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*.cc)
|
||||
file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/*_pybind.cc)
|
||||
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS})
|
||||
|
||||
set(DEPEND_LIBS "")
|
||||
|
||||
file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
|
||||
string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
|
||||
|
||||
set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs)
|
||||
if(ENABLE_PADDLE_FRONTEND)
|
||||
add_definitions(-DENABLE_PADDLE_FRONTEND)
|
||||
include(${PROJECT_SOURCE_DIR}/external/paddle2onnx.cmake)
|
||||
list(APPEND DEPEND_LIBS external_paddle2onnx)
|
||||
endif(ENABLE_PADDLE_FRONTEND)
|
||||
|
||||
if(ENABLE_ORT_BACKEND)
|
||||
add_definitions(-DENABLE_ORT_BACKEND)
|
||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
|
||||
include(external/onnxruntime.cmake)
|
||||
list(APPEND DEPEND_LIBS external_onnxruntime)
|
||||
endif()
|
||||
|
||||
if(WITH_GPU)
|
||||
if(APPLE)
|
||||
message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
|
||||
set(WITH_GPU OFF)
|
||||
else()
|
||||
add_definitions(-DWITH_GPU)
|
||||
include_directories(${CUDA_DIRECTORY}/include)
|
||||
find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
|
||||
list(APPEND DEPEND_LIBS ${CUDA_LIB})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_TRT_BACKEND)
|
||||
if(APPLE)
|
||||
message(FATAL_ERROR "Cannot enable tensorrt backend in mac os, please set -DENABLE_TRT_BACKEND=OFF.")
|
||||
endif()
|
||||
if(NOT WITH_GPU)
|
||||
message(FATAL_ERROR "While -DENABLE_TRT_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
|
||||
endif()
|
||||
add_definitions(-DENABLE_TRT_BACKEND)
|
||||
include_directories(${TRT_DIRECTORY}/include)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/common)
|
||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
|
||||
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
|
||||
list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
|
||||
|
||||
# copy tensorrt libraries to third lib
|
||||
if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
|
||||
file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
|
||||
endif()
|
||||
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
|
||||
file(COPY ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB} DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib" FOLLOW_SYMLINK_CHAIN)
|
||||
endif()
|
||||
|
||||
if(ENABLE_VISION)
|
||||
add_definitions(-DENABLE_VISION)
|
||||
if(ENABLE_OPENCV_CUDA)
|
||||
add_definitions(-DENABLE_OPENCV_CUDA)
|
||||
if(APPLE)
|
||||
message(FATAL_ERROR "Cannot enable opencv with cuda in mac os, please set -DENABLE_OPENCV_CUDA=OFF.")
|
||||
endif()
|
||||
endif()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
|
||||
list(APPEND DEPEND_LIBS yaml-cpp)
|
||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS})
|
||||
include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include)
|
||||
include(external/opencv.cmake)
|
||||
|
||||
if(ENABLE_VISION_VISUALIZE)
|
||||
add_definitions(-DENABLE_VISION_VISUALIZE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
|
||||
|
||||
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS})
|
||||
|
||||
add_library(fastdeploy SHARED ${ALL_DEPLOY_SRCS})
|
||||
redefine_file_macro(fastdeploy)
|
||||
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
|
||||
if(NOT APPLE)
|
||||
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--start-group,--exclude-libs,ALL")
|
||||
endif()
|
||||
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
|
||||
|
||||
file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION)
|
||||
string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION)
|
||||
if (APPLE)
|
||||
# set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
|
||||
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
|
||||
elseif(MSVC)
|
||||
else()
|
||||
set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
|
||||
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL")
|
||||
set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s)
|
||||
endif()
|
||||
|
||||
find_package(OpenMP)
|
||||
if(OpenMP_CXX_FOUND)
|
||||
list(APPEND DEPEND_LIBS OpenMP::OpenMP_CXX)
|
||||
endif()
|
||||
set_target_properties(fastdeploy PROPERTIES VERSION ${FASTDEPLOY_VERSION})
|
||||
target_link_libraries(fastdeploy ${DEPEND_LIBS})
|
||||
|
||||
include(external/summary.cmake)
|
||||
fastdeploy_summary()
|
||||
|
||||
install(
|
||||
TARGETS fastdeploy
|
||||
LIBRARY DESTINATION lib
|
||||
)
|
||||
install(
|
||||
DIRECTORY ${PROJECT_SOURCE_DIR}/fastdeploy
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h"
|
||||
PATTERN "${PROJECT_SOURCE_DIR}/fastdeploy/backends/*/*.h"
|
||||
)
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
|
||||
DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs
|
||||
)
|
||||
install(
|
||||
FILES
|
||||
${PROJECT_SOURCE_DIR}/LICENSE
|
||||
${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt
|
||||
${PROJECT_SOURCE_DIR}/VERSION_NUMBER
|
||||
${PROJECT_SOURCE_DIR}/FastDeploy.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_PREFIX}
|
||||
)
|
||||
|
||||
# Build demo cpp
|
||||
if(ENABLE_VISION)
|
||||
add_executable(yolov5_exe ${PROJECT_SOURCE_DIR}/demo/cpp/vision/yolov5.cc)
|
||||
target_link_libraries(yolov5_exe PUBLIC fastdeploy)
|
||||
endif()
|
||||
|
||||
if(BUILD_FASTDEPLOY_PYTHON)
|
||||
add_definitions(-DBUILD_FASTDEPLOY_PYTHON)
|
||||
if("${PY_EXT_SUFFIX}" STREQUAL "")
|
||||
if(MSVC)
|
||||
set(PY_EXT_SUFFIX ".pyd")
|
||||
else()
|
||||
set(PY_EXT_SUFFIX ".so")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12
|
||||
# Use the following command in the future; now this is only compatible with the latest pybind11
|
||||
# find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED)
|
||||
find_package(PythonInterp ${PY_VERSION} REQUIRED)
|
||||
find_package(PythonLibs ${PY_VERSION})
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "AIX")
|
||||
set(CMAKE_NO_SYSTEM_FROM_IMPORTED 1)
|
||||
endif()
|
||||
|
||||
add_library(fastdeploy_main MODULE ${DEPLOY_PYBIND_SRCS})
|
||||
redefine_file_macro(fastdeploy_main)
|
||||
set_target_properties(fastdeploy_main PROPERTIES PREFIX "")
|
||||
set_target_properties(fastdeploy_main
|
||||
PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
|
||||
set_target_properties(fastdeploy_main PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
|
||||
set_target_properties(fastdeploy_main
|
||||
PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
target_include_directories(fastdeploy_main PRIVATE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
${PYTHON_INCLUDE_DIR})
|
||||
|
||||
target_include_directories(fastdeploy_main PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include)
|
||||
|
||||
if(APPLE)
|
||||
set_target_properties(fastdeploy_main
|
||||
PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
|
||||
elseif(WIN32)
|
||||
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
|
||||
else()
|
||||
target_link_libraries(fastdeploy_main PUBLIC fastdeploy)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
target_link_libraries(fastdeploy_main PRIVATE ${PYTHON_LIBRARIES})
|
||||
target_compile_options(fastdeploy_main
|
||||
PRIVATE /MP
|
||||
/wd4244 # 'argument': conversion from 'google::
|
||||
# protobuf::uint64' to 'int', possible
|
||||
# loss of data
|
||||
/wd4267 # Conversion from 'size_t' to 'int',
|
||||
# possible loss of data
|
||||
/wd4996 # The second parameter is ignored.
|
||||
${EXTRA_FLAGS})
|
||||
target_compile_options(fastdeploy_main PRIVATE $<$<NOT:$<CONFIG:Debug>>:/MT> $<$<CONFIG:Debug>:/MTd>)
|
||||
endif()
|
||||
endif(BUILD_FASTDEPLOY_PYTHON)
|
||||
59
FastDeploy.cmake.in
Normal file
59
FastDeploy.cmake.in
Normal file
@@ -0,0 +1,59 @@
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
|
||||
|
||||
set(WITH_GPU @WITH_GPU@)
|
||||
set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
|
||||
set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND&)
|
||||
set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
|
||||
set(ENABLE_VISION @ENABLE_VISION@)
|
||||
set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
|
||||
|
||||
set(FASTDEPLOY_LIBS "")
|
||||
set(FASTDEPLOY_INCS "")
|
||||
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/include)
|
||||
|
||||
find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${FDLIB})
|
||||
if(ENABLE_ORT_BACKEND)
|
||||
find_library(ORT_LIB onnxruntime ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${ORT_LIB})
|
||||
endif()
|
||||
|
||||
if(WITH_GPU)
|
||||
if (NOT CUDA_DIRECTORY)
|
||||
message(FATAL_ERROR "[FastDeploy] Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/usr/local/cuda")
|
||||
endif()
|
||||
find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
|
||||
list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
|
||||
|
||||
if (ENABLE_TRT_BACKEND)
|
||||
if (NOT TRT_DIRECTORY)
|
||||
message(FATAL_ERROR "[FastDeploy] Please define TRT_DIRECTORY, e.g -DTRT_DIRECTORY=/usr/downloads/TensorRT-8.4.1.0")
|
||||
endif()
|
||||
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib)
|
||||
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_VISION)
|
||||
find_library(OPENCV_CORE_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
find_library(OPENCV_HIGHGUI_LIB opencv_highgui ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
find_library(OPENCV_IMGPROC_LIB opencv_imgproc ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
find_library(OPENCV_IMGCODESC_LIB opencv_imgcodecs ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${OPENCV_CORE_LIB} ${OPENCV_HIGHGUI_LIB} ${OPENCV_IMGPROC_LIB} ${OPENCV_IMGCODESC_LIB})
|
||||
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/include)
|
||||
|
||||
if(ENABLE_OPENCV_CUDA)
|
||||
find_library(OPENCV_CUDAARITHM_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
find_library(OPENCV_CUDAIMGPROC_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
find_library(OPENCV_CUDAWARPING_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${OPENCV_CUDAARITHM_LIB} ${OPENCV_CUDAIMGPROC_LIB} ${OPENCV_CUDAWARPING_LIB})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_PADDLE_FRONTEND)
|
||||
find_library(PADDLE2ONNX_LIB paddle2onnx ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib)
|
||||
list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
|
||||
endif()
|
||||
208
README.md
208
README.md
@@ -1,202 +1,28 @@
|
||||
# ⚡️FastDeploy
|
||||
# FastDeploy
|
||||
|
||||
</p>
|
||||
模型推理就用FastDeploy!
|
||||
|
||||
------------------------------------------------------------------------------------------
|
||||
## 环境要求
|
||||
- python >= 3.6
|
||||
- cmake >= 3.18
|
||||
- gcc >= 8.2
|
||||
- cuda >= 11.0(如若需要启用GPU)
|
||||
- tensorrt >= 8.4(如若需要启用TensorRT后端)
|
||||
|
||||
<p align="center">
|
||||
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-dfd.svg"></a>
|
||||
<a href="https://github.com/PaddlePaddle/FastDeploy/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/FastDeploy?color=ffa"></a>
|
||||
<a href=""><img src="https://img.shields.io/badge/python-3.7+-aff.svg"></a>
|
||||
<a href=""><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
|
||||
<a href="https://github.com/PaddlePaddle/FastDeploy/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/FastDeploy?color=9ea"></a>
|
||||
<a href="https://github.com/PaddlePaddle/FastDeploy/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/FastDeploy?color=3af"></a>
|
||||
<a href="https://pypi.org/project/FastDeploy-python/"><img src="https://img.shields.io/pypi/dm/FastDeploy-python?color=9cf"></a>
|
||||
<a href="https://github.com/PaddlePaddle/FastDeploy/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/FastDeploy?color=9cc"></a>
|
||||
<a href="https://github.com/PaddlePaddle/FastDeploy/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/FastDeploy?color=ccf"></a>
|
||||
</p>
|
||||
## 如何利用FastDeploy快速完成模型部署
|
||||
|
||||
- [C++部署指南](docs/cpp/README.md)
|
||||
- [Python部署指南](docs/python/README.md)
|
||||
|
||||
<h4 align="center">
|
||||
<a href=#特性> 特性 </a> |
|
||||
<a href=#SDK安装> 安装 </a> |
|
||||
<a href=#SDK使用> 快速开始 </a> |
|
||||
<a href=#社区交流> 社区交流 </a>
|
||||
</h4>
|
||||
## 如何自行编译FastDeploy
|
||||
|
||||
**⚡️FastDeploy**是一款**简单易用**的推理部署工具箱。覆盖业界主流**优质预训练模型**并提供**开箱即用**的开发体验,包括图像分类、目标检测、图像分割、人脸检测、人体关键点识别、文字识别等多任务,满足开发者**多场景**,**多硬件**、**多平台**的快速部署需求。
|
||||
- [FastDeploy编译指南](docs/compile/README.md)
|
||||
|
||||
## News 📢
|
||||
|
||||
* 🔥 2022.6.30 晚20:30,⚡️FastDeploy天使用户邀测沟通会,与开发者共同讨论推理部署痛点问题,欢迎大家扫码报名入群获取会议链接。
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854075-2c0f9997-ed18-4b17-9aaf-1b43266d3996.jpeg" width = "150" height = "150" />
|
||||
</div>
|
||||
|
||||
* 🔥 2022.6.27 [**⚡️FastDeploy v0.1.0**](https://github.com/PaddlePaddle/FastDeploy/releases/tag/release%2F0.1.0)测试版发布!🎉
|
||||
* 💎 发布40个重点模型在8种重点软硬件环境的支持的SDK
|
||||
* 😊 支持网页端、pip包两种下载使用方式
|
||||
|
||||
|
||||
## 特性
|
||||
|
||||
|
||||
### 📦**开箱即用的推理部署工具链,支持云边端、多硬件、多平台部署**
|
||||
- 网页端点选下载、PIP 安装一行命令,快速下载多种类型SDK安装包
|
||||
- 云端(含服务器、数据中心):
|
||||
- 支持一行命令启动 Serving 服务(含网页图形化展示)
|
||||
- 支持一行命令启动图像、本地视频流、本地摄像头、网络视频流预测
|
||||
- 支持 Window、Linux 操作系统
|
||||
- 支持 Python、C++ 编程语言
|
||||
- 边缘端:
|
||||
- 支持 NVIDIA Jetson 等边缘设备,支持视频流预测服务
|
||||
- 端侧(含移动端)
|
||||
- 支持 iOS、Android 移动端
|
||||
- 支持 ARM CPU 端侧设备
|
||||
- 支持主流硬件
|
||||
- 支持 Intel CPU 系列(含酷睿、至强等)
|
||||
- 支持 ARM CPU 全系(含高通、MTK、RK等)
|
||||
- 支持 NVIDIA GPU 全系(含 V100、T4、Jetson 等)
|
||||
|
||||
### 🤗**丰富的预训练模型,轻松下载SDK搞定推理部署**
|
||||
|
||||
|
||||
<font size=0.5>
|
||||
|
||||
|<font size=2> 模型| <font size=2> 任务 |<font size=2> 大小(MB) | <font size=2>端侧 | <font size=2>移动端 |<font size=2> 移动端 |<font size=2>边缘端 |<font size=2>服务器+云端 | <font size=2>服务器+云端 |<font size=2> 服务器+云端 |<font size=2> 服务器+云端 |
|
||||
|---|---|---|---|---|---|---|---|---|---|---|
|
||||
|----- | ---- |----- |<font size=2> Linux | <font size=2> Android |<font size=2> iOS | <font size=2> Linux |<font size=2> Linux |<font size=2> Linux |<font size=2> Windows |<font size=2> Windows |
|
||||
|----- | ---- |--- | <font size=2> ARM CPU |<font size=2> ARM CPU | <font size=2> ARM CPU |<font size=2> Jetson |<font size=2> X86 CPU |<font size=2> GPU |<font size=2> X86 CPU |<font size=2> GPU |
|
||||
| <font size=2> [PP-LCNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 11.9 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
| <font size=2> [PP-LCNetv2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 26.6 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [EfficientNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |31.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [GhostNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 20.8 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [MobileNetV1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 17 |✅|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [MobileNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 14.2 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [MobileNetV3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 22 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [ShuffleNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md)|Classfication | 9.2 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [SqueezeNetV1.1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |5 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [Inceptionv3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |95.5 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-HGNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 59 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [ResNet50_vd](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 102.5 |❌|❌|❌|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [SwinTransformer_224_win7](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 352.7 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-PicoDet_s_320_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.1 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-PicoDet_s_320_lcnet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.9 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [CenterNet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.8 |✅|✅|✅|✅ |✅ |✅|✅|✅|
|
||||
|<font size=2> [YOLOv3_MobileNetV3](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 94.6 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-YOLO_tiny_650e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [SSD_MobileNetV1_300_120e_voc](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 23.3 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [YOLOX_Nano_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 3.7 |❌|❌|❌|✅|✅ |✅|✅|✅|
|
||||
|<font size=2> [PP-YOLO_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 188.5|✅ |✅ |✅ |✅ |✅ |✅|✅|✅|
|
||||
|<font size=2> [PP-YOLOv2_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 218.7 |✅|✅|✅|✅|✅ |✅|✅|✅|
|
||||
|<font size=2> [PP-YOLO_crn_l_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 209.1 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [YOLOv5s](https://github.com/ultralytics/yolov5) |Detection | 29.3|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [Faster R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 167.2 |❌|❌|❌|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Face Detection |1.5|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [RetinaFace](https://github.com/biubug6/Pytorch_Retinaface) |Face Localisation |1.7| ✅|❌|❌|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-TinyPose](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Keypoint Detection| 5.5 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-LiteSeg(STDC1)](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)|Segmentation | 32.2|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-HumanSeg-Lite](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md) |Segmentation | 0.556|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [HRNet-w18](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 38.7|✅|✅|✅|❌|✅|✅|✅|✅|
|
||||
|<font size=2> [Mask R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation| 107.2|❌|❌|❌|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-HumanSeg-Server](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation | 107.2|✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [Unet](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 53.7|❌|✅|❌|❌|✅|✅|✅|❌|
|
||||
|<font size=2> [Deeplabv3-ResNet50](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md)|Segmentation |156.5|❌|❌|❌|❌|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-OCRv1](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
|<font size=2> [PP-OCRv2](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
| <font size=2> [PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/PP-OCRv3_introduction.md) |OCR | 2.4+10.6 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
| <font size=2> [PP-OCRv3-tiny](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/models_list.md) |OCR |2.4+10.7 |✅|✅|✅|✅|✅|✅|✅|✅|
|
||||
</font>
|
||||
|
||||
|
||||
## SDK安装
|
||||
|
||||
### 方式1:网页版下载安装
|
||||
|
||||
- 可以登录[EasyEdge网页端](https://ai.baidu.com/easyedge/app/openSource)下载SDK
|
||||
|
||||
### 方式2:pip安装
|
||||
|
||||
开发者可以通过pip安装`fastdeploy-python`来获取最新的下载链接
|
||||
|
||||
- 环境依赖
|
||||
|
||||
python >= 3.6
|
||||
|
||||
- 安装方式
|
||||
## 代码提交
|
||||
|
||||
提交代码前,先初始化代码环境,在clone代码后,执行
|
||||
```
|
||||
pip install fastdeploy-python --upgrade
|
||||
sh commit-prepare.sh
|
||||
```
|
||||
|
||||
- 使用方式
|
||||
|
||||
- 列出FastDeploy当前支持的所有模型
|
||||
```
|
||||
fastdeploy --list_models
|
||||
```
|
||||
- 下载模型在具体平台和对应硬件上的部署SDK以及示例
|
||||
```
|
||||
fastdeploy --download_sdk \
|
||||
--model PP-PicoDet-s_320 \
|
||||
--platform Linux \
|
||||
--soc x86 \
|
||||
--save_dir .
|
||||
```
|
||||
|
||||
- 参数说明
|
||||
- `list_models`: 列出FastDeploy当前最新支持的所有模型
|
||||
- `download_sdk`: 下载模型在具体平台和对应硬件上的部署SDK以及示例
|
||||
- `model`: 模型名,如"PP-PicoDet-s_320",可通过`list_models`查看所有的可选项
|
||||
- `platform`: 部署平台,支持 Windows/Linux/Android/iOS
|
||||
- `soc`: 部署硬件,支持 x86/x86-NVIDIA-GPU/ARM/Jetson
|
||||
- `save_dir`: SDK下载保存目录
|
||||
|
||||
## SDK使用
|
||||
### 1 云+服务器部署
|
||||
- Linux 系统(X86 CPU、NVIDIA GPU)
|
||||
- [C++ Inference部署(含视频流)](./docs/Linux-CPP-SDK-Inference.md)
|
||||
- [C++ 服务化部署](./docs/Linux-CPP-SDK-Serving.md)
|
||||
- [Python Inference部署](./docs/Linux-Python-SDK-Inference.md)
|
||||
- [Python 服务化部署](./docs/Linux-Python-SDK-Serving.md)
|
||||
- Window系统(X86 CPU、NVIDIA GPU)
|
||||
- [C++ Inference部署(含视频流)](./docs/Windows-CPP-SDK-Inference.md)
|
||||
- [C++ 服务化部署](./docs/Windows-CPP-SDK-Serving.md)
|
||||
- [Python Inference部署](./docs/Windows-Python-SDK-Inference.md)
|
||||
- [Python 服务化部署](./docs/Windows-Python-SDK-Serving.md)
|
||||
|
||||
### 2 边缘侧部署
|
||||
- ArmLinux 系统(NVIDIA Jetson Nano/TX2/Xavier)
|
||||
- [C++ Inference部署(含视频流)](./docs/Jetson-Linux-CPP-SDK-Inference.md)
|
||||
- [C++ 服务化部署](./docs/Jetson-Linux-CPP-SDK-Serving.md)
|
||||
|
||||
### 3 端侧部署
|
||||
- ArmLinux 系统(ARM CPU)
|
||||
- [C++ Inference部署(含视频流)](./docs/ARM-Linux-CPP-SDK-Inference.md)
|
||||
- [C++ 服务化部署](./docs/ARM-Linux-CPP-SDK-Serving.md)
|
||||
- [Python Inference部署](./docs/ARM-Linux-Python-SDK-Inference.md)
|
||||
- [Python 服务化部署](./docs/ARM-Linux-Python-SDK-Serving.md)
|
||||
|
||||
### 4 移动端部署
|
||||
- [iOS 系统部署](./docs/iOS-SDK.md)
|
||||
- [Android 系统部署](./docs/Android-SDK.md)
|
||||
|
||||
### 5 自定义模型部署
|
||||
- [快速实现个性化模型替换](./docs/Replace-Model-With-Anther-One.md)
|
||||
|
||||
## 社区交流
|
||||
- **加入社区👬:** 微信扫描二维码后,填写问卷加入交流群,与开发者共同讨论推理部署痛点问题
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854075-2c0f9997-ed18-4b17-9aaf-1b43266d3996.jpeg" width = "200" height = "200" />
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
## Acknowledge
|
||||
|
||||
本项目中SDK生成和下载使用了[EasyEdge](https://ai.baidu.com/easyedge/app/openSource)中的免费开放能力,再次表示感谢。
|
||||
|
||||
## License
|
||||
|
||||
FastDeploy遵循[Apache-2.0开源协议](./LICENSE)。
|
||||
在之后commit代码时,会自动进行代码格式的检查。
|
||||
|
||||
734
ThirdPartyNotices.txt
Normal file
734
ThirdPartyNotices.txt
Normal file
@@ -0,0 +1,734 @@
|
||||
This project depends on some open source projects, list as below
|
||||
|
||||
--------
|
||||
1. https://github.com/protocolbuffers/protobuf
|
||||
|
||||
Copyright 2008 Google Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Code generated by the Protocol Buffer compiler is owned by the owner
|
||||
of the input file used when generating it. This code is not
|
||||
standalone and requires a support library to be linked with it. This
|
||||
support library is itself covered by the above license.
|
||||
|
||||
--------
|
||||
2. https://github.com/onnx/onnx
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
3. https://github.com/microsoft/onnxruntime
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
--------
|
||||
4. https://github.com/pybind/pybind11
|
||||
|
||||
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
|
||||
external contributions to this project including patches, pull requests, etc.
|
||||
|
||||
--------
|
||||
4. https://github.com/onnx/onnx-tensorrt
|
||||
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2021 NVIDIA Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
5. https://github.com/opencv/opencv
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
6. https://github.com/jbeder/yaml-cpp
|
||||
|
||||
Copyright (c) 2008-2015 Jesse Beder.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
1
VERSION_NUMBER
Normal file
1
VERSION_NUMBER
Normal file
@@ -0,0 +1 @@
|
||||
0.3.0
|
||||
@@ -3,4 +3,5 @@ cd $path
|
||||
|
||||
pip install pre-commit
|
||||
pip install yapf
|
||||
pip install cpplint
|
||||
pre-commit install
|
||||
|
||||
49
demo/cpp/vision/yolov5.cc
Normal file
49
demo/cpp/vision/yolov5.cc
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/vision.h"
|
||||
|
||||
int main() {
|
||||
auto model = fastdeploy::vision::ultralytics::YOLOv5("yolov5s.onnx");
|
||||
model.EnableDebug();
|
||||
if (!model.Initialized()) {
|
||||
std::cout << "Init Failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
cv::Mat im = cv::imread("bus.jpg");
|
||||
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
auto im1 = im.clone();
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
if (!model.Predict(&im1, &res)) {
|
||||
std::cout << "Predict Failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
auto vis_im = im.clone();
|
||||
if (!model.Predict(&im, &res)) {
|
||||
std::cout << "Predict Failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
fastdeploy::vision::Visualize::VisDetection(&vis_im, res);
|
||||
cv::imwrite("vis.jpg", vis_im);
|
||||
// Print Detection Result
|
||||
std::cout << res.Str() << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
10
demo/python/vision/yolov5.py
Normal file
10
demo/python/vision/yolov5.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import fastdeploy as fd
|
||||
import cv2
|
||||
|
||||
# 获取模型 和 测试图片
|
||||
# wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
|
||||
# wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
|
||||
model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx")
|
||||
im = cv2.imread("bus.jpg")
|
||||
result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5)
|
||||
print(result)
|
||||
@@ -1,404 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在ARM Linux C++环境下 : (1)推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中ARM Linux Python请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK-Inference.md)文档。
|
||||
|
||||
**注意**:部分模型(如Tinypose、OCR等)仅支持图像推理,不支持视频推理。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. 硬件支持](#1-硬件支持)
|
||||
* [2. 软件环境](#2-软件环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1 预测图像](#21-预测图像)
|
||||
* [2.2 预测视频流](#22-预测视频流)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. SDK参数运行配置](#1-sdk参数运行配置)
|
||||
* [2. 初始化Predictor](#2-初始化predictor)
|
||||
* [3. 预测推理](#3-预测推理)
|
||||
* [3.1 预测图像](#31-预测图像)
|
||||
* [3.2 预测视频](#32-预测视频)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. 硬件支持
|
||||
|
||||
目前支持的ARM架构:aarch64 、armv7hf
|
||||
|
||||
## 2. 软件环境
|
||||
|
||||
1.运行二进制文件-环境要求
|
||||
|
||||
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
|
||||
* Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`
|
||||
* Linux下C++基础库GLIBCXX的命令(因系统差异,库路径会有不同):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
|
||||
* glibc:2.23以上
|
||||
* Linux查看命令:`ldd --version`
|
||||
|
||||
2.二次开发编译-环境要求
|
||||
|
||||
编译源代码时,除gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,cmake需满足:
|
||||
|
||||
* cmake: 3.0 以上
|
||||
|
||||
* Linux查看命令:`cmake --version`
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-m43157-b97741-x86
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_ocr_linux_cpp_aarch64_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv8架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可
|
||||
├── ReadMe.txt
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
└── baidu_easyedge_ocr_linux_cpp_armv7l_armv7hf_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv7架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可
|
||||
└── python # Python SDK 文件
|
||||
```
|
||||
|
||||
**注意**:
|
||||
|
||||
1. 【OCR需要编译】因为OCR任务的特殊性,本次SDK没有提供bin文件夹可执行文件。开发者根据需要,满足文档中gcc和cmake要求后,在`src/demo*`路径编译获取可执行文件,具体可参考。
|
||||
2. 【OCR仅支持图像推理,不支持视频流推理】
|
||||
3. ARM-Linux-Python的环境要求和使用,请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK.md)文档。
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1 预测图像
|
||||
|
||||
```bash
|
||||
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
|
||||
|
||||
```bash
|
||||
> ./easyedge_image_inference ../../../../RES 2.jpeg
|
||||
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
|
||||
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
|
||||
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
|
||||
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
|
||||
Done
|
||||
```
|
||||
|
||||
### 2.2 预测视频流
|
||||
|
||||
```
|
||||
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
|
||||
```
|
||||
|
||||
其中 video_type 支持三种:
|
||||
|
||||
```
|
||||
video_type : 1 // 本地视频文件
|
||||
video_type : 2 // 摄像头的index
|
||||
video_type : 3 // 网络视频流
|
||||
```
|
||||
|
||||
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如:
|
||||
|
||||
```
|
||||
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4
|
||||
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
|
||||
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
|
||||
```
|
||||
|
||||
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习后二次开发。更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,如下代码片段`step`注释所示。
|
||||
|
||||
> ❗注意:<br>
|
||||
> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
|
||||
> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
```cpp
|
||||
// step 1: SDK配置运行参数
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
|
||||
// step 2: 创建并初始化Predictor;这这里选择合适的引擎
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
|
||||
// step 3-1: 预测图像
|
||||
auto img = cv::imread({图片路径});
|
||||
std::vector<EdgeResultData> results;
|
||||
predictor->infer(img, results);
|
||||
|
||||
// step 3-2: 预测视频
|
||||
std::vector<EdgeResultData> results;
|
||||
FrameTensor frame_tensor;
|
||||
VideoConfig video_config;
|
||||
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
|
||||
video_config.source_value = video_src;
|
||||
/*
|
||||
... more video_configs, 根据需要配置video_config的各选项
|
||||
*/
|
||||
auto video_decoding = CreateVideoDecoding(video_config);
|
||||
while (video_decoding->next(frame_tensor) == EDGE_OK) {
|
||||
results.clear();
|
||||
if (frame_tensor.is_needed) {
|
||||
predictor->infer(frame_tensor.frame, results);
|
||||
render(frame_tensor.frame, results, predictor->model_info().kind);
|
||||
}
|
||||
//video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置
|
||||
//video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置
|
||||
}
|
||||
```
|
||||
|
||||
若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。
|
||||
|
||||
## 1. SDK参数运行配置
|
||||
|
||||
SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
|
||||
|
||||
配置参数使用方法如下:
|
||||
|
||||
```
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
```
|
||||
|
||||
## 2. 初始化Predictor
|
||||
|
||||
* 接口
|
||||
|
||||
```cpp
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
predictor->init();
|
||||
```
|
||||
|
||||
若返回非0,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. 预测推理
|
||||
|
||||
### 3.1 预测图像
|
||||
|
||||
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
|
||||
|
||||
* 接口输入
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief
|
||||
* 通用接口
|
||||
* @param image: must be BGR , HWC format (opencv default)
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
|
||||
```
|
||||
|
||||
图片的格式务必为opencv默认的BGR, HWC格式。
|
||||
|
||||
* 接口返回
|
||||
|
||||
`EdgeResultData`中可以获取对应的分类信息、位置信息。
|
||||
|
||||
```cpp
|
||||
struct EdgeResultData {
|
||||
int index; // 分类结果的index
|
||||
std::string label; // 分类结果的label
|
||||
float prob; // 置信度
|
||||
|
||||
// 物体检测 或 图像分割时使用:
|
||||
float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。
|
||||
|
||||
// 图像分割时使用:
|
||||
cv::Mat mask; // 0, 1 的mask
|
||||
std::string mask_rle; // Run Length Encoding,游程编码的mask
|
||||
};
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
*** 关于图像分割mask_rle ***
|
||||
|
||||
该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
|
||||
|
||||
### 3.2 预测视频
|
||||
|
||||
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
|
||||
|
||||
* 接口输入
|
||||
|
||||
class`VideoDecoding`:
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 获取输入源的下一帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int next(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 显示当前frame_tensor中的视频帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int display(const FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int save(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的fps属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_fps() = 0;
|
||||
/**
|
||||
* @brief 获取视频的width属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_width() = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的height属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_height() = 0;
|
||||
```
|
||||
|
||||
struct `VideoConfig`
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 视频源、抽帧策略、存储策略的设置选项
|
||||
*/
|
||||
struct VideoConfig {
|
||||
SourceType source_type; // 输入源类型
|
||||
std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址
|
||||
int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true
|
||||
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false
|
||||
int input_fps{0}; // 在采取抽帧之前设置视频的fps
|
||||
Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效
|
||||
|
||||
bool enable_display{false}; // 默认不支持。
|
||||
std::string window_name{"EasyEdge"};
|
||||
bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame
|
||||
|
||||
bool enable_save{false};
|
||||
std::string save_path; // frame存储为视频文件的路径
|
||||
bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame
|
||||
|
||||
std::map<std::string, std::string> conf;
|
||||
};
|
||||
```
|
||||
|
||||
| 序号 | 字段 | 含义 |
|
||||
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 |
|
||||
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 |
|
||||
| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 |
|
||||
| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 |
|
||||
| 5 | `input_fps` | 用于抽帧前设置fps |
|
||||
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
|
||||
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
|
||||
|
||||
*** 注意:***
|
||||
|
||||
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。
|
||||
|
||||
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
|
||||
|
||||
```
|
||||
video_config.conf["backend"] = "2";
|
||||
```
|
||||
|
||||
3. 部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。
|
||||
|
||||
具体接口调用流程,可以参考SDK中的`demo_video_inference`。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
3. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,318 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在ARM Linux C++环境下:(1)服务化推理部署步骤;(2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中ARM Linux Python请参考[ARM Linux Python环境下的HTTP推理部署](./ARM-Linux-Python-SDK-Serving.md)文档。
|
||||
|
||||
**注意**:部分模型(如OCR等)不支持服务化推理。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [安装准备](#安装准备)
|
||||
|
||||
* [1. 硬件支持](#1-硬件支持)
|
||||
* [2. 软件环境](#2-软件环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试 HTTP Demo](#2-测试-http-demo)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
|
||||
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
|
||||
* [3. http返回数据](#3-http返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 安装准备
|
||||
|
||||
## 1. 硬件支持
|
||||
|
||||
目前支持的ARM架构:aarch64 、armv7hf
|
||||
|
||||
## 2. 软件环境
|
||||
|
||||
1.运行二进制文件-环境要求
|
||||
|
||||
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
|
||||
* Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`;
|
||||
* Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
|
||||
* glibc:2.23以上
|
||||
* Linux查看命令:`ldd --version`
|
||||
|
||||
2.二次开发编译-环境要求
|
||||
|
||||
编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。
|
||||
|
||||
* cmake: 3.0 以上
|
||||
|
||||
* Linux查看命令:`cmake --version`
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-m43157-b97741-x86
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
└── python # Python SDK 文件
|
||||
```
|
||||
|
||||
## 2. 测试 HTTP Demo
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1 启动HTTP预测服务
|
||||
|
||||
```
|
||||
./easyedge_serving {模型RES文件夹路径}
|
||||
```
|
||||
|
||||
启动后,日志中会显示如下设备IP和24401端口号信息:
|
||||
|
||||
```
|
||||
HTTP is now serving at 0.0.0.0:24401
|
||||
```
|
||||
|
||||
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
|
||||
|
||||
同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。
|
||||
|
||||
# HTTP API流程详解
|
||||
|
||||
本章节主要结合[2.1 HTTP Demo]()的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief 开启一个简单的demo http服务。
|
||||
* 该方法会block直到收到sigint/sigterm。
|
||||
* http服务里,图片的解码运行在cpu之上,可能会降低推理速度。
|
||||
* @tparam ConfigT
|
||||
* @param config
|
||||
* @param host
|
||||
* @param port
|
||||
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
|
||||
* @param instance_num 实例数量,根据内存/显存/时延要求调整
|
||||
* @return
|
||||
*/
|
||||
template<typename ConfigT>
|
||||
int start_http_server(
|
||||
const ConfigT &config,
|
||||
const std::string &host,
|
||||
int port,
|
||||
const std::string &service_id,
|
||||
int instance_num = 1);
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式一:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
|
||||
|
||||
Python请求示例
|
||||
|
||||
```Python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
### 2.2 http 请求方法二:使用图片base64格式
|
||||
|
||||
HTTP方法:POST
|
||||
Header如下:
|
||||
|
||||
| 参数 | 值 |
|
||||
| ------------ | ---------------- |
|
||||
| Content-Type | application/json |
|
||||
|
||||
**Body请求填写**:
|
||||
|
||||
* 分类网络:
|
||||
body 中请求示例
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
"top_num": 5
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
|
||||
|
||||
* 检测和分割网络:
|
||||
Body请求示例:
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情:
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
|
||||
|
||||
Python请求示例:
|
||||
|
||||
```Python
|
||||
import base64
|
||||
import requests
|
||||
def main():
|
||||
with open("图像路径", 'rb') as f:
|
||||
result = requests.post("http://{服务ip地址}:24401/", json={
|
||||
"image": base64.b64encode(f.read()).decode("utf8")
|
||||
})
|
||||
# print(result.request.body)
|
||||
# print(result.request.headers)
|
||||
print(result.content)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
```
|
||||
|
||||
## 3. http返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 使用libcurl请求http服务时,速度明显变慢
|
||||
|
||||
这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可
|
||||
|
||||
```bash
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
```
|
||||
|
||||
3. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
4. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,371 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK, 在**ARM Linux Python** 环境下:(1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。其中ARM Linux C++请参考[ARM Linux C++环境下的推理部署](./ARM-Linux-CPP-SDK-Inference.md)文档。
|
||||
|
||||
**注意**:部分模型(如Tinypose、OCR等)仅支持图像推理,不支持视频推理。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1.SDK下载](#1sdk下载)
|
||||
* [2.硬件支持](#2硬件支持)
|
||||
* [3.python环境](#3python环境)
|
||||
* [4.安装依赖](#4安装依赖)
|
||||
* [4.1.安装paddlepaddle](#41安装paddlepaddle)
|
||||
* [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1.文件结构说明](#1文件结构说明)
|
||||
* [2.测试Demo](#2测试demo)
|
||||
* [2.1预测图像](#21预测图像)
|
||||
|
||||
* [Demo API介绍](#demo-api介绍)
|
||||
|
||||
* [1.基础流程](#1基础流程)
|
||||
* [2.初始化](#2初始化)
|
||||
* [3.SDK参数配置](#3sdk参数配置)
|
||||
* [4.预测图像](#4预测图像)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1.SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86--[部署芯片]
|
||||
├──...
|
||||
├──python # Linux Python SDK
|
||||
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
|
||||
├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
|
||||
├── infer_demo # demo体验完整文件
|
||||
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ └── demo_serving.py # 提供http服务的demo文件
|
||||
├── tensor_demo # 学习自定义算法前后处理时使用
|
||||
│ └── demo_xxx.py
|
||||
```
|
||||
|
||||
## 2.硬件支持
|
||||
|
||||
目前支持的ARM架构:aarch64 、armv7hf
|
||||
|
||||
## 3.python环境
|
||||
|
||||
> ARM Linux SDK仅支持Python 3.6
|
||||
|
||||
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
|
||||
|
||||
```shell
|
||||
$python3 --version
|
||||
```
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
$python3 -m pip --version
|
||||
```
|
||||
|
||||
## 4.安装依赖
|
||||
|
||||
### 4.1.安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。
|
||||
|
||||
`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
### 4.2.安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1.文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
.EasyEdge-Linux-x86--[部署芯片]
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ └── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── python # Python SDK 文件
|
||||
├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl #EasyEdge Python Wheel 包
|
||||
├── infer_demo
|
||||
├── demo_armv8_cpu.py # 图像推理
|
||||
├── demo_serving.py # HTTP服务化推理
|
||||
└── tensor_demo # 学习自定义算法前后处理时使用
|
||||
├── demo_armv8_cpu.py
|
||||
```
|
||||
|
||||
## 2.测试Demo
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。
|
||||
|
||||
### 2.1预测图像
|
||||
|
||||
使用infer_demo文件夹下的demo文件。
|
||||
|
||||
```bash
|
||||
python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="300"></div>
|
||||
|
||||
```shell
|
||||
2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine...
|
||||
2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2
|
||||
{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'}
|
||||
```
|
||||
|
||||
可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
|
||||
|
||||
# Demo API介绍
|
||||
|
||||
本章节主要结合[测试Demo](#2测试Demo)的Demo示例介绍推理API,方便开发者学习后二次开发。
|
||||
|
||||
## 1.基础流程
|
||||
|
||||
> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
`infer_demo/demo_xx_xx.py`
|
||||
|
||||
```python
|
||||
# 引入EasyEdge运行库
|
||||
import BaiduAI.EasyEdge as edge
|
||||
|
||||
# 创建并初始化一个预测Progam;选择合适的引擎
|
||||
pred = edge.Program()
|
||||
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
|
||||
# 预测图像
|
||||
res = pred.infer_image({numpy.ndarray的图片})
|
||||
|
||||
# 关闭结束预测Progam
|
||||
pred.close()
|
||||
```
|
||||
|
||||
`infer_demo/demo_serving.py`
|
||||
|
||||
```python
|
||||
import BaiduAI.EasyEdge as edge
|
||||
from BaiduAI.EasyEdge.serving import Serving
|
||||
|
||||
# 创建并初始化Http服务
|
||||
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
|
||||
|
||||
# 运行Http服务
|
||||
# 请参考同级目录下demo_xx_xx.py里:
|
||||
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
|
||||
# 对以下参数device\device_id和engine进行修改
|
||||
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
```
|
||||
|
||||
## 2.初始化
|
||||
|
||||
* 接口
|
||||
|
||||
```python
|
||||
def init(self,
|
||||
model_dir,
|
||||
device=Device.CPU,
|
||||
engine=Engine.PADDLE_FLUID,
|
||||
config_file='conf.json',
|
||||
preprocess_file='preprocess_args.json',
|
||||
model_file='model',
|
||||
params_file='params',
|
||||
label_file='label_list.txt',
|
||||
infer_cfg_file='infer_cfg.json',
|
||||
device_id=0,
|
||||
thread_num=1
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
model_dir: str
|
||||
device: BaiduAI.EasyEdge.Device,比如:Device.CPU
|
||||
engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID
|
||||
config_file: str
|
||||
preprocess_file: str
|
||||
model_file: str
|
||||
params_file: str
|
||||
label_file: str 标签文件
|
||||
infer_cfg_file: 包含预处理、后处理信息的文件
|
||||
device_id: int 设备ID
|
||||
thread_num: int CPU的线程数
|
||||
|
||||
Raises:
|
||||
RuntimeError, IOError
|
||||
Returns:
|
||||
bool: True if success
|
||||
"""
|
||||
```
|
||||
|
||||
若返回不是True,请查看输出日志排查错误原因。
|
||||
|
||||
## 3.SDK参数配置
|
||||
|
||||
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
|
||||
```
|
||||
|
||||
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
|
||||
```
|
||||
|
||||
## 4.预测图像
|
||||
|
||||
* 接口
|
||||
|
||||
```python
|
||||
def infer_image(self, img,
|
||||
threshold=0.3,
|
||||
channel_order='HWC',
|
||||
color_format='BGR',
|
||||
data_type='numpy'):
|
||||
"""
|
||||
|
||||
Args:
|
||||
img: np.ndarray or bytes
|
||||
threshold: float
|
||||
only return result with confidence larger than threshold
|
||||
channel_order: string
|
||||
channel order HWC or CHW
|
||||
color_format: string
|
||||
color format order RGB or BGR
|
||||
data_type: string
|
||||
仅在图像分割时有意义。 'numpy' or 'string'
|
||||
'numpy': 返回已解析的mask
|
||||
'string': 返回未解析的mask游程编码
|
||||
|
||||
Returns:
|
||||
list
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
* 返回格式: `[dict1, dict2, ...]`
|
||||
|
||||
| 字段 | 类型 | 取值 | 说明 |
|
||||
| ---------- | -------------------- | --------- | ------------------------ |
|
||||
| confidence | float | 0~1 | 分类或检测的置信度 |
|
||||
| label | string | | 分类或检测的类别 |
|
||||
| index | number | | 分类或检测的类别 |
|
||||
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
|
||||
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
|
||||
| mask | string/numpy.ndarray | 图像分割的mask | |
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
|
||||
|
||||
***结果示例***
|
||||
|
||||
i) 图像分类
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 736,
|
||||
"label": "table",
|
||||
"confidence": 0.9
|
||||
}
|
||||
```
|
||||
|
||||
ii) 物体检测
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 8,
|
||||
"label": "cat",
|
||||
"confidence": 1.0,
|
||||
"x1": 0.21289,
|
||||
"y1": 0.12671,
|
||||
"x2": 0.91504,
|
||||
"y2": 0.91211,
|
||||
}
|
||||
```
|
||||
|
||||
iii) 图像分割
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "cat",
|
||||
"score": 1.0,
|
||||
"location": {
|
||||
"left": ...,
|
||||
"top": ...,
|
||||
"width": ...,
|
||||
"height": ...,
|
||||
},
|
||||
"mask": ...
|
||||
}
|
||||
```
|
||||
|
||||
mask字段中,data_type为`numpy`时,返回图像掩码的二维数组
|
||||
|
||||
```
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
# FAQ
|
||||
|
||||
1.执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,266 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK, 在**ARM Linux Python** 环境下: (1)**服务化**推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。其中ARM Linux Python请参考[ARM Linux C++环境下的HTTP推理部署](./ARM-Linux-CPP-SDK-Serving.md)文档。
|
||||
|
||||
**注意**:部分模型(如OCR等)不支持服务化推理。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1.SDK下载](#1sdk下载)
|
||||
* [2.硬件支持](#2硬件支持)
|
||||
* [3.Python环境](#3python环境)
|
||||
* [4.安装依赖](#4安装依赖)
|
||||
* [4.1.安装paddlepaddle](#41安装paddlepaddle)
|
||||
* [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1.文件结构说明](#1文件结构说明)
|
||||
* [2.测试Serving服务](#2测试serving服务)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
|
||||
* [3. http返回数据](#3-http返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1.SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下。
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86-[部署芯片]
|
||||
├── RES # 模型文件资源文件夹,可替换为其他模型
|
||||
├── README.md
|
||||
├── cpp # C++ SDK
|
||||
└── python # Python SDK
|
||||
```
|
||||
|
||||
## 2.硬件支持
|
||||
|
||||
目前支持的ARM架构:aarch64 、armv7hf
|
||||
|
||||
## 3.Python环境
|
||||
|
||||
> ARM Linux SDK仅支持Python 3.6
|
||||
|
||||
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,需要根据ARM Linux下Python安装方式进行安装。(不建议在ARM Linux下使用conda,因为ARM Linux场景通常资源很有限)
|
||||
|
||||
```shell
|
||||
$python3 --version
|
||||
```
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
$python3 -m pip --version
|
||||
```
|
||||
|
||||
## 4.安装依赖
|
||||
|
||||
### 4.1.安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。
|
||||
|
||||
`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
### 4.2.安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
# 二.快速开始
|
||||
|
||||
## 1.文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86--[部署芯片]
|
||||
├──...
|
||||
├──python # Linux Python SDK
|
||||
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
|
||||
├── BBaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl
|
||||
├── infer_demo # demo体验完整文件
|
||||
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ └── demo_serving.py # 提供http服务的demo文件
|
||||
├── tensor_demo # 学习自定义算法前后处理时使用
|
||||
│ └── demo_xxx.py
|
||||
```
|
||||
|
||||
## 2.测试Serving服务
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。
|
||||
|
||||
### 2.1 启动HTTP预测服务
|
||||
|
||||
指定对应的模型文件夹(默认为`RES`)、设备ip和指定端口号,运行如下命令。
|
||||
|
||||
```shell
|
||||
python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401}
|
||||
```
|
||||
|
||||
成功启动后,终端中会显示如下字样。
|
||||
|
||||
```shell
|
||||
...
|
||||
* Running on {host ip}:24401
|
||||
```
|
||||
|
||||
如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。
|
||||
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
|
||||
|
||||
如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。
|
||||
|
||||
# 三. HTTP API流程详解
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动使用`demo_serving.py`文件
|
||||
|
||||
```python
|
||||
class Serving(object):
|
||||
"""
|
||||
SDK local serving
|
||||
"""
|
||||
|
||||
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
|
||||
|
||||
self.program = None
|
||||
self.model_dir = model_dir
|
||||
self.model_filename = model_filename
|
||||
self.params_filename = params_filename
|
||||
self.program_lock = threading.Lock()
|
||||
self.license_key = license
|
||||
# 只有ObjectTracking会初始化video_processor
|
||||
self.video_processor = None
|
||||
|
||||
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
|
||||
"""
|
||||
Args:
|
||||
host : str
|
||||
port : str
|
||||
device : BaiduAI.EasyEdge.Device,比如:Device.CPU
|
||||
engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID
|
||||
"""
|
||||
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容
|
||||
|
||||
Python请求示例
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
## 3. http返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
1.执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,404 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在Android环境下:(1)推理操作步骤;(2)介绍模型SDK使用说明,方便开发者了解项目后二次开发。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [系统支持说明](#系统支持说明)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. APP 标准版测试](#2-app-标准版测试)
|
||||
* [2.1 扫码体验](#21-扫码体验)
|
||||
* [2.2 源码运行](#22-源码运行)
|
||||
* [3. 精简版测试](#3-精简版测试)
|
||||
|
||||
* [SDK使用说明](#sdk使用说明)
|
||||
|
||||
* [1. 集成指南](#1-集成指南)
|
||||
* [1.1 依赖库集成](#11-依赖库集成)
|
||||
* [1.2 添加权限](#12-添加权限)
|
||||
* [1.3 混淆规则(可选)](#13-混淆规则可选)
|
||||
* [2. API调用流程示例](#2-api调用流程示例)
|
||||
* [2.1 初始化](#21-初始化)
|
||||
* [2.2 预测图像](#22-预测图像)
|
||||
|
||||
* [错误码](#错误码)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 系统支持说明
|
||||
|
||||
1. Android 版本支持范围:Android 5.0(API21)<= Android < Android 10(API 29)。
|
||||
|
||||
2. 硬件支持情况:支持 arm64-v8a 和 armeabi-v7a,暂不支持模拟器。
|
||||
* 官网测试机型:红米k30,Vivo v1981a,华为oxp-an00,华为cdy-an90,华为pct-al10,荣耀yal-al00,OPPO Reno5 Pro 5G
|
||||
3. 其他说明
|
||||
* 【图像分割类算法】(1)图像分割类算法,暂未提供实时摄像头推理功能,开发者可根据自己需要,进行安卓开发;(2)PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景,本次安卓SDK仅支持竖屏场景,开发者可根据自己需要,开发横屏功能。
|
||||
* 【OCR模型】OCR任务第一次启动任务,第一张推理时间久,属于正常情况(因为涉及到模型加载、预处理等工作)。
|
||||
|
||||
> 预测图像时运行内存不能过小,一般大于模型资源文件夹大小的3倍。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Android-SDK
|
||||
├── app
|
||||
│ ├── src/main
|
||||
│ │ ├── assets
|
||||
│ │ │ ├── demo
|
||||
│ │ │ │ └── conf.json # APP名字
|
||||
│ │ │ ├── infer # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ │ │ │ ├── model # 模型结构文件
|
||||
│ │ │ │ ├── params # 模型参数文件
|
||||
│ │ │ │ ├── label_list.txt # 模型标签文件
|
||||
│ │ │ │ └── infer_cfg.json # 模型前后处理等配置文件
|
||||
│ │ ├── java/com.baidu.ai.edge/demo
|
||||
│ │ │ ├── infertest # 通用ARM精简版测试
|
||||
│ │ │ │ ├── TestInferClassifyTask.java # 图像分类
|
||||
│ │ │ │ ├── TestInferDetectionTask.java # 物体检测
|
||||
│ │ │ │ ├── TestInferSegmentTask.java # 实例分割
|
||||
│ │ │ │ ├── TestInferPoseTask.java # 姿态估计
|
||||
│ │ │ │ ├── TestInferOcrTask.java # OCR
|
||||
│ │ │ │ └── MainActivity.java # 精简版启动 Activity
|
||||
│ │ │ ├── MainActivity.java # Demo APP 启动 Activity
|
||||
│ │ │ ├── CameraActivity.java # 摄像头UI逻辑
|
||||
│ │ │ └── ...
|
||||
│ │ └── ...
|
||||
│ ├── libs
|
||||
│ │ ├── armeabi-v7a # v7a的依赖库
|
||||
│ │ ├── arm64-v8a # v8a的依赖库
|
||||
│ │ └── easyedge-sdk.jar # jar文件
|
||||
│ └── ...
|
||||
├── camera_ui # UI模块,包含相机逻辑
|
||||
├── README.md
|
||||
└── ... # 其他 gradle 等工程文件
|
||||
```
|
||||
|
||||
## 2. APP 标准版测试
|
||||
|
||||
考虑部分Android开发板没有摄像头,因此本项目开发了标准版和精简版两种。标准版会调用Android系统的摄像头,采集摄像头来进行AI模型推理;精简版在没有摄像头的开发板上运行,需要开发者准备图像。开发者根据硬件情况,选择对应的版本。
|
||||
|
||||
### 2.1 扫码体验
|
||||
|
||||
扫描二维码(二维码见下载网页`体验Demo`),无需任何依赖,手机上下载即可直接体验。
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854064-a31755d1-52b9-416d-b35d-885b7338a6cc.png" width="600"></div>
|
||||
|
||||
### 2.2 源码运行
|
||||
|
||||
(1)下载对应的SDK,解压工程。</br>
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854071-f4c17de8-83c2-434e-882d-c175f4202a2d.png" width="600"></div>
|
||||
(2)打开Android Studio, 点击 "Import Project...",即:File->New-> "Import Project...", 选择解压后的目录。</br>
|
||||
(3)手机链接Android Studio,并打开开发者模式。(不了解开发者模式的开发者,可浏览器搜索)</br>
|
||||
(4)此时点击运行按钮,手机上会有新app安装完毕,运行效果和二维码扫描的一样。</br>
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854049-988414c7-116a-4261-a0c7-2705cc199538.png" width="400"></div>
|
||||
|
||||
## 3. 精简版测试
|
||||
|
||||
* 考虑部分Android开发板没有摄像头,本项目提供了精简版本,精简版忽略摄像头等UI逻辑,可兼容如无摄像头的开发板测试。
|
||||
|
||||
* 精简版对应的测试图像路径,在代码`src/main/java/com.baidu.ai.edge/demo/TestInfer*.java`中进行了设置,开发者可以准备图像到对应路径测试,也可以修改java代码测试。
|
||||
|
||||
* 支持以下硬件环境的精简版测试:通用ARM:图像分类、物体检测、实例分割、姿态估计、文字识别。
|
||||
|
||||
示例代码位于 app 模块下 infertest 目录,修改 app/src/main/AndroidManifest.xml 中的启动 Activity 开启测试。
|
||||
修改前:
|
||||
|
||||
```
|
||||
<activity android:name=".MainActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
infertest.MainActivity
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
<activity
|
||||
android:name=".CameraActivity"
|
||||
android:screenOrientation="portrait" >
|
||||
</activity>
|
||||
```
|
||||
|
||||
修改后:
|
||||
|
||||
```
|
||||
<!-- 以通用ARM为例 -->
|
||||
<activity android:name=".infertest.MainActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
```
|
||||
|
||||
注意:修改后,因为没有测试数据,需要开发者准备一张测试图像,放到 `app/src/main/asserts/` 路径下,并按照`app/src/main/java/com/baidu/ai/edge/demo/infertest/TestInfer*.java`中的图像命名要求对图像进行命名。
|
||||
|
||||
<div align="center">
|
||||
|
||||
| Demo APP 检测模型运行示例 | 精简版检测模型运行示例 |
|
||||
| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||
|  |  |
|
||||
</div>
|
||||
|
||||
# SDK使用说明
|
||||
|
||||
本节介绍如何将 SDK 接入开发者的项目中使用。
|
||||
|
||||
## 1. 集成指南
|
||||
|
||||
步骤一:依赖库集成
|
||||
步骤二:添加必要权限
|
||||
步骤三:混淆配置(可选)
|
||||
|
||||
### 1.1 依赖库集成
|
||||
|
||||
A. 项目中未集成其他 jar 包和 so 文件:
|
||||
|
||||
```
|
||||
// 1. 复制 app/libs 至项目的 app/libs 目录
|
||||
// 2. 参考 app/build.gradle 配置 NDK 可用架构和 so 依赖库目录
|
||||
|
||||
android {
|
||||
...
|
||||
defaultConfig {
|
||||
ndk {
|
||||
abiFilters 'armeabi-v7a', 'arm64-v8a'
|
||||
}
|
||||
}
|
||||
sourceSets {
|
||||
main {
|
||||
jniLibs.srcDirs = ['libs']
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
B. 项目中已集成其他 jar 包,未集成 so 文件:
|
||||
|
||||
```
|
||||
// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录
|
||||
// 2. 复制 app/libs 下 armeabi-v7a 和 arm64-v8a 目录至 app/src/main/jniLibs 目录下
|
||||
// 3. 参考 app/build.gradle 配置 NDK 可用架构
|
||||
|
||||
android {
|
||||
...
|
||||
defaultConfig {
|
||||
ndk {
|
||||
abiFilters 'armeabi-v7a', 'arm64-v8a'
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
C. 项目中已集成其他 jar 包和 so 文件:
|
||||
|
||||
```
|
||||
// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录
|
||||
// 2. 融合 app/libs 下 armeabi-v7a 和 arm64-v8a 下的 so 文件与其他同架构 so 文件同目录
|
||||
// 3. 参考 app/build.gradle 配置 NDK 可用架构
|
||||
|
||||
android {
|
||||
...
|
||||
defaultConfig {
|
||||
ndk {
|
||||
abiFilters 'armeabi-v7a', 'arm64-v8a' // 只支持 v7a 和 v8a 两种架构,有其他架构需删除
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 1.2 添加权限
|
||||
|
||||
参考 app/src/main/AndroidManifest.xml 中配置的权限。
|
||||
|
||||
```
|
||||
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE"/>
|
||||
<uses-permission android:name="android.permission.INTERNET"/>
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
|
||||
```
|
||||
|
||||
### 1.3 混淆规则(可选)
|
||||
|
||||
请不要混淆 jar 包文件,参考 app/proguard-rules.pro 配置。
|
||||
|
||||
```
|
||||
-keep class com.baidu.ai.edge.core.*.*{ *; }
|
||||
```
|
||||
|
||||
## 2. API调用流程示例
|
||||
|
||||
以通用ARM的图像分类预测流程为例,详细说明请参考后续章节:
|
||||
|
||||
```
|
||||
try {
|
||||
// step 1-1: 准备配置类
|
||||
InferConfig config = new InferConfig(context.getAssets(), "infer");
|
||||
|
||||
// step 1-2: 准备预测 Manager
|
||||
InferManager manager = new InferManager(context, config, "");
|
||||
|
||||
// step 2-1: 准备待预测的图像,必须为 Bitmap.Config.ARGB_8888 格式,一般为默认格式
|
||||
Bitmap image = getFromSomeWhere();
|
||||
|
||||
// step 2-2: 预测图像
|
||||
List<ClassificationResultModel> results = manager.classify(image, 0.3f);
|
||||
|
||||
// step 3: 解析结果
|
||||
for (ClassificationResultModel resultModel : results) {
|
||||
Log.i(TAG, "labelIndex=" + resultModel.getLabelIndex()
|
||||
+ ", labelName=" + resultModel.getLabel()
|
||||
+ ", confidence=" + resultModel.getConfidence());
|
||||
}
|
||||
|
||||
// step 4: 释放资源。预测完毕请及时释放资源
|
||||
manager.destroy();
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, e.getMessage());
|
||||
}
|
||||
```
|
||||
|
||||
### 2.1 初始化
|
||||
|
||||
**准备配置类**
|
||||
芯片与配置类对应关系:
|
||||
|
||||
- 通用ARM:InferConfig
|
||||
|
||||
```
|
||||
// 示例
|
||||
// 参数二为芯片对应的模型资源文件夹名称
|
||||
InferConfig config = new InferConfig(context.getAssets(), "infer");
|
||||
```
|
||||
|
||||
**准备预测 Manager**
|
||||
芯片与 Manager 对应关系:
|
||||
|
||||
- 通用ARM:InferManager
|
||||
|
||||
```
|
||||
// 示例
|
||||
// 参数二为配置类对象
|
||||
// 参数三保持空字符串即可
|
||||
InferManager manager = new InferManager(context, config, "");
|
||||
```
|
||||
|
||||
> **注意**
|
||||
>
|
||||
> 1. 同一时刻只能有且唯一有效的 Manager,若要新建一个 Manager,之前创建的 Manager 需先调用 destroy() 销毁;
|
||||
> 2. Manager 的任何方法都不能在 UI 线程调用;
|
||||
> 3. Manager 的任何成员变量及方法由于线程同步问题,都必须在同一个线程中执行;
|
||||
|
||||
### 2.2 预测图像
|
||||
|
||||
本节介绍各种模型类型的预测函数及结果解析。
|
||||
|
||||
> **注意**
|
||||
> 预测函数可以多次调用,但必须在同一个线程中,不支持并发
|
||||
> 预测函数中的 confidence 非必需,默认使用模型推荐值。填 0 可返回所有结果
|
||||
> 待预测的图像必须为 Bitmap.Config.ARGB_8888 格式的 Bitmap
|
||||
|
||||
**图像分类**
|
||||
|
||||
```
|
||||
// 预测函数
|
||||
List<ClassificationResultModel> classify(Bitmap bitmap) throws BaseException;
|
||||
List<ClassificationResultModel> classify(Bitmap bitmap, float confidence) throws BaseException;
|
||||
|
||||
// 返回结果
|
||||
ClassificationResultModel
|
||||
- label: 分类标签,定义在label_list.txt中
|
||||
- labelIndex: 分类标签对应的序号
|
||||
- confidence: 置信度,0-1
|
||||
```
|
||||
|
||||
**物体检测**
|
||||
|
||||
```
|
||||
// 预测函数
|
||||
List<DetectionResultModel> detect(Bitmap bitmap) throws BaseException;
|
||||
List<DetectionResultModel> detect(Bitmap bitmap, float confidence) throws BaseException;
|
||||
|
||||
// 返回结果
|
||||
DetectionResultModel
|
||||
- label: 标签,定义在label_list.txt中
|
||||
- confidence: 置信度,0-1
|
||||
- bounds: Rect,包含左上角和右下角坐标,指示物体在图像中的位置
|
||||
```
|
||||
|
||||
**实例分割**
|
||||
|
||||
```
|
||||
// 预测函数
|
||||
List<SegmentationResultModel> segment(Bitmap bitmap) throws BaseException;
|
||||
List<SegmentationResultModel> segment(Bitmap bitmap, float confidence) throws BaseException;
|
||||
|
||||
// 返回结果
|
||||
SegmentationResultModel
|
||||
- label: 标签,定义在label_list.txt中
|
||||
- confidence: 置信度,0-1
|
||||
- lableIndex: 标签对应的序号
|
||||
- box: Rect,指示物体在图像中的位置
|
||||
- mask: byte[],表示原图大小的0,1掩码,绘制1的像素即可得到当前对象区域
|
||||
- maskLEcode: mask的游程编码
|
||||
```
|
||||
|
||||
> 关于 maskLEcode 的解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
**姿态估计**
|
||||
|
||||
```
|
||||
// 预测函数
|
||||
List<PoseResultModel> pose(Bitmap bitmap) throws BaseException;
|
||||
|
||||
// 返回结果
|
||||
PoseResultModel
|
||||
- label: 标签,定义在label_list.txt中
|
||||
- confidence: 置信度,0-1
|
||||
- points: Pair<Point, Point>, 2个点构成一条线
|
||||
```
|
||||
|
||||
**文字识别**
|
||||
|
||||
```
|
||||
// 预测函数
|
||||
List<OcrResultModel> ocr(Bitmap bitmap) throws BaseException;
|
||||
List<OcrResultModel> ocr(Bitmap bitmap, float confidence) throws BaseException;
|
||||
|
||||
// 返回结果
|
||||
OcrResultModel
|
||||
- label: 识别出的文字
|
||||
- confidence: 置信度,0-1
|
||||
- points: List<Point>, 文字所在区域的点位
|
||||
```
|
||||
|
||||
# 错误码
|
||||
|
||||
| 错误码 | 错误描述 | 详细描述及解决方法 |
|
||||
| ---- | ------------------------------ | ------------------------------------------------------------------------------------ |
|
||||
| 1001 | assets 目录下用户指定的配置文件不存在 | SDK可以使用assets目录下config.json作为配置文件。如果传入的config.json不在assets目录下,则有此报错 |
|
||||
| 1002 | 用户传入的配置文件作为json解析格式不准确,如缺少某些字段 | 正常情况下,demo中的config.json不要修改 |
|
||||
| 19xx | Sdk内部错误 | 请与百度人员联系 |
|
||||
| 2001 | XxxxMANAGER 只允许一个实例 | 如已有XxxxMANAGER对象,请调用destory方法 |
|
||||
| 2002 | XxxxMANAGER 已经调用过destory方法 | 在一个已经调用destory方法的DETECT_MANAGER对象上,不允许再调用任何方法 |
|
||||
| 2003 | 传入的assets下模型文件路径为null | XxxxConfig.getModelFileAssetPath() 返回为null。由setModelFileAssetPath(null)导致 |
|
||||
| 2011 | libedge-xxxx.so 加载失败 | System.loadLibrary("edge-xxxx"); libedge-xxxx.so 没有在apk中。CPU架构仅支持armeabi-v7a arm-v8a |
|
||||
| 2012 | JNI内存错误 | heap的内存不够 |
|
||||
| 2103 | license过期 | license失效或者系统时间有异常 |
|
||||
| 2601 | assets 目录下模型文件打开失败 | 请根据报错信息检查模型文件是否存在 |
|
||||
| 2611 | 检测图片时,传递至引擎的图片二进制与长宽不符合 | 具体见报错信息 |
|
||||
| 27xx | Sdk内部错误 | 请与百度人员联系 |
|
||||
| 28xx | 引擎内部错误 | 请与百度人员联系 |
|
||||
| 29xx | Sdk内部错误 | 请与百度人员联系 |
|
||||
| 3000 | so加载错误 | 请确认所有so文件存在于apk中 |
|
||||
| 3001 | 模型加载错误 | 请确认模型放置于能被加载到的合法路径中,并确保config.json配置正确 |
|
||||
| 3002 | 模型卸载错误 | 请与百度人员联系 |
|
||||
| 3003 | 调用模型错误 | 在模型未加载正确或者so库未加载正确的情况下调用了分类接口 |
|
||||
| 50xx | 在线模式调用异常 | 请与百度人员联系 |
|
||||
@@ -1,382 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK, 在**Jetson Linux C++** 环境下:(1) 图像和视频 推理部署步骤, (2)介绍推理全流程API,方便开发者了解项目后二次开发。如果开发者对Jetson的服务化部署感兴趣,可以参考[Jetson CPP Serving](./Jetson-Linux-CPP-SDK-Serving.md)文档。
|
||||
|
||||
**注意**:OCR目前只支持**图像**推理部署。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境要求](#环境要求)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1 预测图像](#21-预测图像)
|
||||
* [2.2 预测视频流](#22-预测视频流)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. SDK参数运行配置](#1-sdk参数运行配置)
|
||||
* [2. 初始化Predictor](#2-初始化predictor)
|
||||
* [3. 预测推理](#3-预测推理)
|
||||
* [3.1 预测图像](#31-预测图像)
|
||||
* [3.2 预测视频](#32-预测视频)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境要求
|
||||
|
||||
* Jetpack: 4.6,安装Jetpack,参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]()
|
||||
|
||||
| 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- |
|
||||
| --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
|
||||
| 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- |
|
||||
| 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- |
|
||||
| 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- |
|
||||
| 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- |
|
||||
| 5 | 非官方版本,如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- |
|
||||
|
||||
注意:本项目SDK要求 `CUDA=10.2`、`cuDNN=8.2`、`TensorRT=8.0`、`gcc>=7.5` 、`cmake 在 3.0以上` ,安装 Jetpack4.6系统包后,CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求,无需在进行安装。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-硬件芯片
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
|
||||
├── ReadMe.txt
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
```
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1 预测图像
|
||||
|
||||
```bash
|
||||
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
|
||||
|
||||
```bash
|
||||
> ./easyedge_image_inference ../../../../RES 2.jpeg
|
||||
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
|
||||
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
|
||||
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
|
||||
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
|
||||
Done
|
||||
```
|
||||
|
||||
### 2.2 预测视频流
|
||||
|
||||
```
|
||||
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
|
||||
```
|
||||
|
||||
其中 video_type 支持三种:
|
||||
|
||||
```
|
||||
video_type : 1 // 本地视频文件
|
||||
video_type : 2 // 摄像头的index
|
||||
video_type : 3 // 网络视频流
|
||||
```
|
||||
|
||||
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如:
|
||||
|
||||
```
|
||||
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4
|
||||
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
|
||||
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
|
||||
```
|
||||
|
||||
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,如代码step注释所示:
|
||||
|
||||
> ❗注意:<br>
|
||||
> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
|
||||
> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
```cpp
|
||||
// step 1: SDK配置运行参数
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
|
||||
// step 2: 创建并初始化Predictor;这这里选择合适的引擎
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
|
||||
// step 3-1: 预测图像
|
||||
auto img = cv::imread({图片路径});
|
||||
std::vector<EdgeResultData> results;
|
||||
predictor->infer(img, results);
|
||||
|
||||
// step 3-2: 预测视频
|
||||
std::vector<EdgeResultData> results;
|
||||
FrameTensor frame_tensor;
|
||||
VideoConfig video_config;
|
||||
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
|
||||
video_config.source_value = video_src;
|
||||
/*
|
||||
... more video_configs, 根据需要配置video_config的各选项
|
||||
*/
|
||||
auto video_decoding = CreateVideoDecoding(video_config);
|
||||
while (video_decoding->next(frame_tensor) == EDGE_OK) {
|
||||
results.clear();
|
||||
if (frame_tensor.is_needed) {
|
||||
predictor->infer(frame_tensor.frame, results);
|
||||
render(frame_tensor.frame, results, predictor->model_info().kind);
|
||||
}
|
||||
//video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置
|
||||
//video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置
|
||||
}
|
||||
```
|
||||
|
||||
若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。
|
||||
|
||||
## 1. SDK参数运行配置
|
||||
|
||||
SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
|
||||
|
||||
配置参数使用方法如下:
|
||||
|
||||
```
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
```
|
||||
|
||||
## 2. 初始化Predictor
|
||||
|
||||
* 接口
|
||||
|
||||
```cpp
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
predictor->init();
|
||||
```
|
||||
|
||||
若返回非0,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. 预测推理
|
||||
|
||||
### 3.1 预测图像
|
||||
|
||||
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
|
||||
|
||||
* 接口输入
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief
|
||||
* 通用接口
|
||||
* @param image: must be BGR , HWC format (opencv default)
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
|
||||
```
|
||||
|
||||
图片的格式务必为opencv默认的BGR, HWC格式。
|
||||
|
||||
* 接口返回
|
||||
|
||||
`EdgeResultData`中可以获取对应的分类信息、位置信息。
|
||||
|
||||
```cpp
|
||||
struct EdgeResultData {
|
||||
int index; // 分类结果的index
|
||||
std::string label; // 分类结果的label
|
||||
float prob; // 置信度
|
||||
|
||||
// 物体检测 或 图像分割时使用:
|
||||
float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。
|
||||
|
||||
// 图像分割时使用:
|
||||
cv::Mat mask; // 0, 1 的mask
|
||||
std::string mask_rle; // Run Length Encoding,游程编码的mask
|
||||
};
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
*** 关于图像分割mask_rle ***
|
||||
|
||||
该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
|
||||
|
||||
### 3.2 预测视频
|
||||
|
||||
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
|
||||
|
||||
* 接口输入
|
||||
|
||||
class`VideoDecoding`:
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 获取输入源的下一帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int next(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 显示当前frame_tensor中的视频帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int display(const FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int save(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的fps属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_fps() = 0;
|
||||
/**
|
||||
* @brief 获取视频的width属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_width() = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的height属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_height() = 0;
|
||||
```
|
||||
|
||||
struct `VideoConfig`
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 视频源、抽帧策略、存储策略的设置选项
|
||||
*/
|
||||
struct VideoConfig {
|
||||
SourceType source_type; // 输入源类型
|
||||
std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址
|
||||
int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true
|
||||
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false
|
||||
int input_fps{0}; // 在采取抽帧之前设置视频的fps
|
||||
Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效
|
||||
|
||||
bool enable_display{false}; // 默认不支持。
|
||||
std::string window_name{"EasyEdge"};
|
||||
bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame
|
||||
|
||||
bool enable_save{false};
|
||||
std::string save_path; // frame存储为视频文件的路径
|
||||
bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame
|
||||
|
||||
std::map<std::string, std::string> conf;
|
||||
};
|
||||
```
|
||||
|
||||
| 序号 | 字段 | 含义 |
|
||||
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 |
|
||||
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 |
|
||||
| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 |
|
||||
| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 |
|
||||
| 5 | `input_fps` | 用于抽帧前设置fps |
|
||||
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
|
||||
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
|
||||
|
||||
*** 注意:***
|
||||
|
||||
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。
|
||||
|
||||
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
|
||||
|
||||
```
|
||||
video_config.conf["backend"] = "2";
|
||||
```
|
||||
|
||||
3.部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。
|
||||
|
||||
具体接口调用流程,可以参考SDK中的`demo_video_inference`。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
3. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,293 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在**Jetson Linux C++** 环境下:(1) **服务化**推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。如果开发者对Jetson图像/视频部署感兴趣,可以参考[Jetson CPP Inference](./Jetson-Linux-CPP-SDK-Inference.md)文档。
|
||||
|
||||
**注意**:OCR目前不支持服务化推理部署。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试 HTTP Demo](#2-测试-http-demo)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
|
||||
* [HTTP API介绍](#http-api介绍)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
|
||||
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
|
||||
* [3. http 返回数据](#3-http-返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
* Jetpack: 4.6 。安装Jetpack 4.6,参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]()
|
||||
|
||||
| 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- |
|
||||
| --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
|
||||
| 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- |
|
||||
| 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- |
|
||||
| 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- |
|
||||
| 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- |
|
||||
| 5 | 非官方版本,如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- |
|
||||
|
||||
注意:本项目SDK要求 `CUDA=10.2`、`cuDNN=8.2`、`TensorRT=8.0`、`gcc>=7.5` 、`cmake 在 3.0以上` ,安装 Jetpack4.6系统包后,CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求,无需在进行安装。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-硬件芯片
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
|
||||
├── ReadMe.txt
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
```
|
||||
|
||||
## 2. 测试 HTTP Demo
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1 启动HTTP预测服务
|
||||
|
||||
```
|
||||
./easyedge_serving {模型RES文件夹路径}
|
||||
```
|
||||
|
||||
启动后,日志中会显示如下设备IP和24401端口号信息:
|
||||
|
||||
```
|
||||
HTTP is now serving at 0.0.0.0:24401
|
||||
```
|
||||
|
||||
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
|
||||
|
||||
同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。
|
||||
|
||||
# HTTP API介绍
|
||||
|
||||
本章节主要结合[2.1 HTTP Demo]()的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief 开启一个简单的demo http服务。
|
||||
* 该方法会block直到收到sigint/sigterm。
|
||||
* http服务里,图片的解码运行在cpu之上,可能会降低推理速度。
|
||||
* @tparam ConfigT
|
||||
* @param config
|
||||
* @param host
|
||||
* @param port
|
||||
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
|
||||
* @param instance_num 实例数量,根据内存/显存/时延要求调整
|
||||
* @return
|
||||
*/
|
||||
template<typename ConfigT>
|
||||
int start_http_server(
|
||||
const ConfigT &config,
|
||||
const std::string &host,
|
||||
int port,
|
||||
const std::string &service_id,
|
||||
int instance_num = 1);
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式一:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
|
||||
|
||||
Python请求示例
|
||||
|
||||
```Python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
### 2.2 http 请求方法二:使用图片base64格式
|
||||
|
||||
HTTP方法:POST
|
||||
Header如下:
|
||||
|
||||
| 参数 | 值 |
|
||||
| ------------ | ---------------- |
|
||||
| Content-Type | application/json |
|
||||
|
||||
**Body请求填写**:
|
||||
|
||||
* 分类网络:
|
||||
body 中请求示例
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
"top_num": 5
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
|
||||
|
||||
* 检测和分割网络:
|
||||
Body请求示例:
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情:
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
|
||||
|
||||
Python请求示例:
|
||||
|
||||
```Python
|
||||
import base64
|
||||
import requests
|
||||
def main():
|
||||
with open("图像路径", 'rb') as f:
|
||||
result = requests.post("http://{服务ip地址}:24401/", json={
|
||||
"image": base64.b64encode(f.read()).decode("utf8")
|
||||
})
|
||||
# print(result.request.body)
|
||||
# print(result.request.headers)
|
||||
print(result.content)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
```
|
||||
|
||||
## 3. http 返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于分割模型 ***
|
||||
|
||||
其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 使用libcurl请求http服务时,速度明显变慢
|
||||
|
||||
这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可
|
||||
|
||||
```bash
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
```
|
||||
|
||||
3. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
4. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,412 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在 **Intel X86-CPU/ NVIDIA GPU、Linux** 操作系统下的C++ :(1)图像和视频的推理部署步骤,(2)介绍推理全流程API,方便了解项目后二次开发。如果对Linux操作系统下的 Python部署感兴趣,请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Inference.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. 硬件支持](#1-硬件支持)
|
||||
* [2. 软件环境](#2-软件环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1. 预测图像](#21-预测图像)
|
||||
* [2.2. 预测视频流](#22-预测视频流)
|
||||
* [3. 编译Demo](#3-编译demo)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. SDK参数运行配置](#1-sdk参数运行配置)
|
||||
* [2. 初始化Predictor](#2-初始化predictor)
|
||||
* [3. 预测推理](#3-预测推理)
|
||||
* [3.1 预测图像](#31-预测图像)
|
||||
* [3.2 预测视频](#32-预测视频)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1.硬件支持
|
||||
|
||||
* NVIDIA GPU: x86_64
|
||||
* cuda支持版本:CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5)
|
||||
* cuda支持版本:CUDA11.0 + cuDNN v8.0.4
|
||||
* CPU:Intel x86_64
|
||||
|
||||
## 2. 软件环境
|
||||
|
||||
1.运行二进制文件-环境要求
|
||||
|
||||
* gcc: 5.4 以上 (GLIBCXX_3.4.22)
|
||||
* Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`;
|
||||
* Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
|
||||
* glibc:2.23以上
|
||||
* Linux查看命令:`ldd --version`
|
||||
|
||||
2.二次开发编译-环境要求
|
||||
|
||||
编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。
|
||||
|
||||
* cmake: 3.0 以上
|
||||
|
||||
* Linux查看命令:`cmake --version`
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-硬件芯片
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
|
||||
├── ReadMe.txt
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
└── python # Python SDK 文件
|
||||
```
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
**注意**: OCR算法目前没有提供
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1. 预测图像
|
||||
|
||||
```bash
|
||||
./easyedge_image_inference {模型RES文件夹路径} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855351-68d1a4f0-6226-4484-b190-65f1ac2c7128.png" width="400"></div>
|
||||
|
||||
```bash
|
||||
> ./easyedge_image_inference ../../../../RES 2.jpeg
|
||||
2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213)
|
||||
2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success.
|
||||
2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms
|
||||
1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621
|
||||
Done
|
||||
```
|
||||
|
||||
### 2.2. 预测视频流
|
||||
|
||||
```
|
||||
./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path}
|
||||
```
|
||||
|
||||
其中 video_type 支持三种:
|
||||
|
||||
```
|
||||
video_type : 1 // 本地视频文件
|
||||
video_type : 2 // 摄像头的index
|
||||
video_type : 3 // 网络视频流
|
||||
```
|
||||
|
||||
video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如:
|
||||
|
||||
```
|
||||
本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4
|
||||
本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1
|
||||
网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src
|
||||
```
|
||||
|
||||
注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。
|
||||
|
||||
## 3. 编译Demo
|
||||
|
||||
通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件 由`src`下的对应文件编译得到。 通过以下命令,即可完成`src`下的源码编译。
|
||||
|
||||
```
|
||||
cd src
|
||||
mkdir build && cd build
|
||||
cmake .. && make
|
||||
```
|
||||
|
||||
至此,会在build文件夹下生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_image_inference/easyedge_image_inference`。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,查看下面的cpp代码中的step注释说明。
|
||||
|
||||
> ❗注意:<br>
|
||||
> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 <br>
|
||||
> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
```cpp
|
||||
// step 1: SDK配置运行参数
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
|
||||
// step 2: 创建并初始化Predictor;这这里选择合适的引擎
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
|
||||
// step 3-1: 预测图像
|
||||
auto img = cv::imread({图片路径});
|
||||
std::vector<EdgeResultData> results;
|
||||
predictor->infer(img, results);
|
||||
|
||||
// step 3-2: 预测视频
|
||||
std::vector<EdgeResultData> results;
|
||||
FrameTensor frame_tensor;
|
||||
VideoConfig video_config;
|
||||
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
|
||||
video_config.source_value = video_src;
|
||||
/*
|
||||
... more video_configs, 根据需要配置video_config的各选项
|
||||
*/
|
||||
auto video_decoding = CreateVideoDecoding(video_config);
|
||||
while (video_decoding->next(frame_tensor) == EDGE_OK) {
|
||||
results.clear();
|
||||
if (frame_tensor.is_needed) {
|
||||
predictor->infer(frame_tensor.frame, results);
|
||||
render(frame_tensor.frame, results, predictor->model_info().kind);
|
||||
}
|
||||
//video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置
|
||||
//video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置
|
||||
}
|
||||
```
|
||||
|
||||
若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。
|
||||
|
||||
## 1. SDK参数运行配置
|
||||
|
||||
SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
|
||||
|
||||
配置参数使用方法如下:
|
||||
|
||||
```
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
```
|
||||
|
||||
## 2. 初始化Predictor
|
||||
|
||||
* 接口
|
||||
|
||||
```cpp
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
predictor->init();
|
||||
```
|
||||
|
||||
若返回非0,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. 预测推理
|
||||
|
||||
### 3.1 预测图像
|
||||
|
||||
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector<EdgeResultData>& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
|
||||
|
||||
* 接口输入
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief
|
||||
* 通用接口
|
||||
* @param image: must be BGR , HWC format (opencv default)
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
|
||||
```
|
||||
|
||||
图片的格式务必为opencv默认的BGR, HWC格式。
|
||||
|
||||
* 接口返回
|
||||
|
||||
`EdgeResultData`中可以获取对应的分类信息、位置信息。
|
||||
|
||||
```cpp
|
||||
struct EdgeResultData {
|
||||
int index; // 分类结果的index
|
||||
std::string label; // 分类结果的label
|
||||
float prob; // 置信度
|
||||
|
||||
// 物体检测 或 图像分割时使用:
|
||||
float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。
|
||||
|
||||
// 图像分割时使用:
|
||||
cv::Mat mask; // 0, 1 的mask
|
||||
std::string mask_rle; // Run Length Encoding,游程编码的mask
|
||||
};
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
*** 关于图像分割mask_rle ***
|
||||
|
||||
该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析
|
||||
|
||||
### 3.2 预测视频
|
||||
|
||||
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
|
||||
|
||||
* 接口输入
|
||||
|
||||
class`VideoDecoding`:
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 获取输入源的下一帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int next(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 显示当前frame_tensor中的视频帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int display(const FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int save(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的fps属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_fps() = 0;
|
||||
/**
|
||||
* @brief 获取视频的width属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_width() = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的height属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_height() = 0;
|
||||
```
|
||||
|
||||
struct `VideoConfig`
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 视频源、抽帧策略、存储策略的设置选项
|
||||
*/
|
||||
struct VideoConfig {
|
||||
SourceType source_type; // 输入源类型
|
||||
std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址
|
||||
int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true
|
||||
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false
|
||||
int input_fps{0}; // 在采取抽帧之前设置视频的fps
|
||||
Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效
|
||||
|
||||
bool enable_display{false}; // 默认不支持。
|
||||
std::string window_name{"EasyEdge"};
|
||||
bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame
|
||||
|
||||
bool enable_save{false};
|
||||
std::string save_path; // frame存储为视频文件的路径
|
||||
bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame
|
||||
|
||||
std::map<std::string, std::string> conf;
|
||||
};
|
||||
```
|
||||
|
||||
| 序号 | 字段 | 含义 |
|
||||
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 |
|
||||
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 |
|
||||
| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 |
|
||||
| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 |
|
||||
| 5 | `input_fps` | 用于抽帧前设置fps |
|
||||
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
|
||||
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
|
||||
|
||||
*** 注意:***
|
||||
|
||||
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。
|
||||
|
||||
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
|
||||
|
||||
```
|
||||
video_config.conf["backend"] = "2";
|
||||
```
|
||||
|
||||
3.部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。
|
||||
|
||||
具体接口调用流程,可以参考SDK中的`demo_video_inference`。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
3. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,329 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在**X86 CPU/ NVIDIA GPU、Linux操作系统** 的C++环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。
|
||||
如果开发者对Python语言的相关能力感兴趣,可以参考Linux Python请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Serving.md)文档。
|
||||
|
||||
**【注意】**:OCR Demo 暂不支持服务化部署。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [安装准备](#安装准备)
|
||||
|
||||
* [1. 硬件支持](#1-硬件支持)
|
||||
* [2. 软件环境](#2-软件环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试 HTTP Demo](#2-测试-http-demo)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
* [3. 编译Demo](#3-编译demo)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
|
||||
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
|
||||
* [3. http返回数据](#3-http返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 安装准备
|
||||
|
||||
## 1.硬件支持
|
||||
|
||||
- NVIDIA GPU: x86_64
|
||||
- cuda支持版本:CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5)
|
||||
- cuda支持版本:CUDA11.0 + cuDNN v8.0.4
|
||||
- CPU:Intel x86_64
|
||||
|
||||
## 2. 软件环境
|
||||
|
||||
1.运行二进制文件-环境要求
|
||||
|
||||
- gcc: 5.4 以上 (GLIBCXX_3.4.22)
|
||||
- Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`;
|
||||
- Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX`
|
||||
- glibc:2.23以上
|
||||
- Linux查看命令:`ldd --version`
|
||||
|
||||
2.二次开发编译-环境要求
|
||||
|
||||
编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。
|
||||
|
||||
- cmake: 3.0 以上
|
||||
|
||||
- Linux查看命令:`cmake --version`
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-Linux-硬件芯片
|
||||
├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
├── ReadMe.txt
|
||||
├── cpp # C++ SDK 文件结构
|
||||
└── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz
|
||||
├── ReadMe.txt
|
||||
├── bin # 可直接运行的二进制文件
|
||||
├── include # 二次开发用的头文件
|
||||
├── lib # 二次开发用的所依赖的库
|
||||
├── src # 二次开发用的示例工程
|
||||
└── thirdparty # 第三方依赖
|
||||
└── python # Python SDK 文件
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
## 2. 测试 HTTP Demo
|
||||
|
||||
> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。
|
||||
|
||||
### 2.1. 启动HTTP预测服务
|
||||
|
||||
```
|
||||
./easyedge_serving {模型RES文件夹路径}
|
||||
```
|
||||
|
||||
启动后,日志中会显示如下设备IP和24401端口号信息:
|
||||
|
||||
```
|
||||
HTTP is now serving at 0.0.0.0:24401
|
||||
```
|
||||
|
||||
此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175855495-cd8d46ec-2492-4297-b3e4-2bda4cd6727c.png" width="600"></div>
|
||||
|
||||
同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。
|
||||
|
||||
## 3. 编译Demo
|
||||
|
||||
通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的。 该部分说明C++编译命令。
|
||||
|
||||
```
|
||||
cd src
|
||||
mkdir build && cd build
|
||||
cmake .. && make
|
||||
```
|
||||
|
||||
至此,会在build文件夹下生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。
|
||||
|
||||
# HTTP API流程详解
|
||||
|
||||
本章节主要结合[2.1 HTTP Demo](#4)的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief 开启一个简单的demo http服务。
|
||||
* 该方法会block直到收到sigint/sigterm。
|
||||
* http服务里,图片的解码运行在cpu之上,可能会降低推理速度。
|
||||
* @tparam ConfigT
|
||||
* @param config
|
||||
* @param host
|
||||
* @param port
|
||||
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
|
||||
* @param instance_num 实例数量,根据内存/显存/时延要求调整
|
||||
* @return
|
||||
*/
|
||||
template<typename ConfigT>
|
||||
int start_http_server(
|
||||
const ConfigT &config,
|
||||
const std::string &host,
|
||||
int port,
|
||||
const std::string &service_id,
|
||||
int instance_num = 1);
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式一:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
|
||||
|
||||
Python请求示例
|
||||
|
||||
```Python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
### 2.2 http 请求方法二:使用图片base64格式
|
||||
|
||||
HTTP方法:POST
|
||||
Header如下:
|
||||
|
||||
| 参数 | 值 |
|
||||
| ------------ | ---------------- |
|
||||
| Content-Type | application/json |
|
||||
|
||||
**Body请求填写**:
|
||||
|
||||
* 分类网络:
|
||||
body 中请求示例
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
"top_num": 5
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
|
||||
|
||||
* 检测和分割网络:
|
||||
Body请求示例:
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情:
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
|
||||
|
||||
Python请求示例
|
||||
|
||||
```python
|
||||
import base64
|
||||
import requests
|
||||
|
||||
def main():
|
||||
with open("图像路径", 'rb') as f:
|
||||
result = requests.post("http://{服务ip地址}:24401/", json={
|
||||
"image": base64.b64encode(f.read()).decode("utf8")
|
||||
})
|
||||
# print(result.request.body)
|
||||
# print(result.request.headers)
|
||||
print(result.content)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
```
|
||||
|
||||
### 3. http返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于分割模型 ***
|
||||
|
||||
其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
2. 使用libcurl请求http服务时,速度明显变慢
|
||||
|
||||
这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可
|
||||
|
||||
```bash
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
```
|
||||
|
||||
3. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
4. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。
|
||||
@@ -1,369 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: (1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中Linux C++请参考[Linux CPP环境下的推理部署](./Linux-CPP-SDK-Inference.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. Python环境](#2-python环境)
|
||||
* [3. 安装依赖](#3-安装依赖)
|
||||
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
|
||||
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 文件结构说明](#1-文件结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1 预测图像](#21-预测图像)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. 基础流程](#1-基础流程)
|
||||
* [2. 初始化](#2-初始化)
|
||||
* [3. SDK参数配置](#3-sdk参数配置)
|
||||
* [4. 预测图像](#4-预测图像)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86-[部署芯片]
|
||||
├── RES # 模型文件资源文件夹,可替换为其他模型
|
||||
├── README.md
|
||||
├── cpp # C++ SDK
|
||||
└── python # Python SDK
|
||||
```
|
||||
|
||||
## 2. Python环境
|
||||
|
||||
> 当前SDK仅支持Python 3.5, 3.6, 3.7
|
||||
|
||||
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
|
||||
|
||||
```shell
|
||||
$python3 --version
|
||||
```
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
$python3 -m pip --version
|
||||
```
|
||||
|
||||
## 3. 安装依赖
|
||||
|
||||
### 3.1 安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。
|
||||
|
||||
`x86_64 CPU` 平台可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
NVIDIA GPU平台的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
|
||||
|
||||
> 使用 NVIDIA GPU 预测时,必须满足:
|
||||
>
|
||||
> 1. 机器已安装 cuda, cudnn
|
||||
> 2. 已正确安装对应 cuda 版本的paddle 版本
|
||||
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
|
||||
|
||||
### 3.2 安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl
|
||||
```
|
||||
|
||||
`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{版本号}-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86--[部署芯片]
|
||||
├──...
|
||||
├──python # Linux Python SDK
|
||||
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl
|
||||
├── infer_demo # demo体验完整文件
|
||||
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ └── demo_serving.py # 提供http服务的demo文件
|
||||
├── tensor_demo # tensor in/out demo文件
|
||||
│ └── demo_xxx.py
|
||||
```
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。
|
||||
|
||||
### 2.1 预测图像
|
||||
|
||||
使用infer_demo文件夹下的demo文件。
|
||||
|
||||
```bash
|
||||
python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
|
||||
|
||||
```shell
|
||||
2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine...
|
||||
2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2
|
||||
{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'}
|
||||
```
|
||||
|
||||
可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`infer_demo/demo_xx_xx.py`文件,查看下面的Python代码中的step注释说明。
|
||||
|
||||
## 1. 基础流程
|
||||
|
||||
> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
`infer_demo/demo_xx_xx.py`
|
||||
|
||||
```python
|
||||
# 引入EasyEdge运行库
|
||||
import BaiduAI.EasyEdge as edge
|
||||
|
||||
# 创建并初始化一个预测Progam;选择合适的引擎
|
||||
pred = edge.Program()
|
||||
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
|
||||
# 预测图像
|
||||
res = pred.infer_image({numpy.ndarray的图片})
|
||||
|
||||
# 关闭结束预测Progam
|
||||
pred.close()
|
||||
```
|
||||
|
||||
`infer_demo/demo_serving.py`
|
||||
|
||||
```python
|
||||
import BaiduAI.EasyEdge as edge
|
||||
from BaiduAI.EasyEdge.serving import Serving
|
||||
|
||||
# 创建并初始化Http服务
|
||||
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
|
||||
|
||||
# 运行Http服务
|
||||
# 请参考同级目录下demo_xx_xx.py里:
|
||||
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
|
||||
# 对以下参数device\device_id和engine进行修改
|
||||
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
```
|
||||
|
||||
## 2. 初始化
|
||||
|
||||
- 接口
|
||||
|
||||
```python
|
||||
def init(self,
|
||||
model_dir,
|
||||
device=Device.CPU,
|
||||
engine=Engine.PADDLE_FLUID,
|
||||
config_file='conf.json',
|
||||
preprocess_file='preprocess_args.json',
|
||||
model_file='model',
|
||||
params_file='params',
|
||||
label_file='label_list.txt',
|
||||
infer_cfg_file='infer_cfg.json',
|
||||
device_id=0,
|
||||
thread_num=1
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
model_dir: str
|
||||
device: BaiduAI.EasyEdge.Device,比如:Device.CPU
|
||||
engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID
|
||||
config_file: str
|
||||
preprocess_file: str
|
||||
model_file: str
|
||||
params_file: str
|
||||
label_file: str 标签文件
|
||||
infer_cfg_file: 包含预处理、后处理信息的文件
|
||||
device_id: int 设备ID
|
||||
thread_num: int CPU的线程数
|
||||
|
||||
Raises:
|
||||
RuntimeError, IOError
|
||||
Returns:
|
||||
bool: True if success
|
||||
"""
|
||||
```
|
||||
|
||||
若返回不是True,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. SDK参数配置
|
||||
|
||||
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
|
||||
```
|
||||
|
||||
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
|
||||
```
|
||||
|
||||
## 4. 预测图像
|
||||
|
||||
- 接口
|
||||
|
||||
```python
|
||||
def infer_image(self, img,
|
||||
threshold=0.3,
|
||||
channel_order='HWC',
|
||||
color_format='BGR',
|
||||
data_type='numpy')
|
||||
"""
|
||||
|
||||
Args:
|
||||
img: np.ndarray or bytes
|
||||
threshold: float
|
||||
only return result with confidence larger than threshold
|
||||
channel_order: string
|
||||
channel order HWC or CHW
|
||||
color_format: string
|
||||
color format order RGB or BGR
|
||||
data_type: string
|
||||
仅在图像分割时有意义。 'numpy' or 'string'
|
||||
'numpy': 返回已解析的mask
|
||||
'string': 返回未解析的mask游程编码
|
||||
|
||||
Returns:
|
||||
list
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 取值 | 说明 |
|
||||
| ---------- | -------------------- | --------- | ------------------------ |
|
||||
| confidence | float | 0~1 | 分类或检测的置信度 |
|
||||
| label | string | | 分类或检测的类别 |
|
||||
| index | number | | 分类或检测的类别 |
|
||||
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
|
||||
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
|
||||
| mask | string/numpy.ndarray | 图像分割的mask | |
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
|
||||
|
||||
***结果示例***
|
||||
|
||||
i) 图像分类
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 736,
|
||||
"label": "table",
|
||||
"confidence": 0.9
|
||||
}
|
||||
```
|
||||
|
||||
ii) 物体检测
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 8,
|
||||
"label": "cat",
|
||||
"confidence": 1.0,
|
||||
"x1": 0.21289,
|
||||
"y1": 0.12671,
|
||||
"x2": 0.91504,
|
||||
"y2": 0.91211,
|
||||
}
|
||||
```
|
||||
|
||||
iii) 图像分割
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "cat",
|
||||
"score": 1.0,
|
||||
"location": {
|
||||
"left": ...,
|
||||
"top": ...,
|
||||
"width": ...,
|
||||
"height": ...,
|
||||
},
|
||||
"mask": ...
|
||||
}
|
||||
```
|
||||
|
||||
mask字段中,data_type为`numpy`时,返回图像掩码的二维数组
|
||||
|
||||
```text
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,268 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: (1)SDK **服务化**推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中Linux C++请参考[Linux C++环境下的服务化推理部署](./Linux-CPP-SDK-Serving.md)文档。
|
||||
|
||||
**【注意】**:OCR Demo 暂不支持服务化部署。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. Python环境](#2-python环境)
|
||||
* [3. 安装依赖](#3-安装依赖)
|
||||
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
|
||||
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 文件结构说明](#1-文件结构说明)
|
||||
* [2. 测试Serving服务](#2-测试serving服务)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
|
||||
* [3. http 返回数据](#3-http-返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86-[部署芯片]
|
||||
├── RES # 模型文件资源文件夹,可替换为其他模型
|
||||
├── README.md
|
||||
├── cpp # C++ SDK
|
||||
└── python # Python SDK
|
||||
```
|
||||
|
||||
## 2. Python环境
|
||||
|
||||
> 当前SDK仅支持Python 3.5, 3.6, 3.7
|
||||
|
||||
使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。
|
||||
|
||||
```shell
|
||||
$python3 --version
|
||||
```
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
$python3 -m pip --version
|
||||
```
|
||||
|
||||
## 3. 安装依赖
|
||||
|
||||
### 3.1 安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。
|
||||
|
||||
1.`x86_64 CPU` 平台可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
2.`x86_64 NVIDIA GPU` 支持的CUDA和CUDNN版本与PaddlePaddle框架保持一致,如下:
|
||||
|
||||
* CUDA 工具包10.1/10.2配合cuDNN 7 (cuDNN版本>=7.6.5, 如需多卡支持,需配合NCCL2.7及更高)
|
||||
* CUDA 工具包11.0配合cuDNN v8.0.4(如需多卡支持,需配合NCCL2.7及更高)
|
||||
* CUDA 工具包11.1配合cuDNN v8.1.1(如需多卡支持,需配合NCCL2.7及更高)
|
||||
* CUDA 工具包11.2配合cuDNN v8.1.1(如需多卡支持,需配合NCCL2.7及更高)
|
||||
|
||||
具体安装命令,参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
|
||||
|
||||
> 使用 NVIDIA GPU 预测时,必须满足:
|
||||
>
|
||||
> 1. 机器已安装 cuda, cudnn
|
||||
> 2. 已正确安装对应 cuda 版本的paddle 版本
|
||||
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
|
||||
|
||||
### 3.2 安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl
|
||||
```
|
||||
|
||||
`armv8 CPU平台`可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp36-cp36m-linux_aarch64.whl
|
||||
```
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-Linux-x86--[部署芯片]
|
||||
├──...
|
||||
├──python # Linux Python SDK
|
||||
├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl
|
||||
├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl
|
||||
├── infer_demo # demo体验完整文件
|
||||
│ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ └── demo_serving.py # 提供http服务的demo文件
|
||||
├── tensor_demo # 学习自定义算法前后处理时使用
|
||||
│ └── demo_xxx.py
|
||||
```
|
||||
|
||||
## 2. 测试Serving服务
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。
|
||||
|
||||
### 2.1 启动HTTP预测服务
|
||||
|
||||
指定对应的模型文件夹(默认为`RES`)、设备ip和指定端口号,运行如下命令。
|
||||
|
||||
```shell
|
||||
python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401}
|
||||
```
|
||||
|
||||
成功启动后,终端中会显示如下字样。
|
||||
|
||||
```shell
|
||||
...
|
||||
* Running on {host ip}:24401
|
||||
```
|
||||
|
||||
如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。
|
||||
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
|
||||
|
||||
如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。
|
||||
|
||||
# HTTP API流程详解
|
||||
|
||||
本章节主要结合前文的Demo示例来对API进行介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考对应的Python文件。http服务包含服务端和客户端,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动使用`demo_serving.py`文件
|
||||
|
||||
```python
|
||||
class Serving(object):
|
||||
""" SDK local serving """
|
||||
|
||||
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
|
||||
|
||||
self.program = None
|
||||
self.model_dir = model_dir
|
||||
self.model_filename = model_filename
|
||||
self.params_filename = params_filename
|
||||
self.program_lock = threading.Lock()
|
||||
self.license_key = license
|
||||
# 只有ObjectTracking会初始化video_processor
|
||||
self.video_processor = None
|
||||
|
||||
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
|
||||
""" Args: host : str port : str device : BaiduAI.EasyEdge.Device,比如:Device.CPU engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID """
|
||||
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容。
|
||||
|
||||
Python请求示例
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
## 3. http 返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
***关于分割模型***
|
||||
|
||||
其中,mask为分割模型的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,266 +0,0 @@
|
||||
<a name="0"></a>
|
||||
# 简介
|
||||
|
||||
本文档介绍如何将FastDeploy的Demo模型,替换成开发者自己训练的AI模型。(**注意**:FastDeploy下载的SDK和Demo仅支持相同算法模型的替换)。本文档要求开发者已经将Demo和SDK运行跑通,如果要了解运行跑通Demo和SDK指导文档,可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk使用)
|
||||
|
||||
* [简介](#0)<br>
|
||||
* [模型替换](#1)<br>
|
||||
* [1.模型准备](#2)<br>
|
||||
* [1.1 Paddle模型](#3)<br>
|
||||
* [1.2 Paddle OCR模型增加一步特殊转换](#4)<br>
|
||||
* [1.2.1 下载模型转换工具](#5)<br>
|
||||
* [1.2.2 下载模型转换工具](#6)<br>
|
||||
* [1.3 其他框架模型](#7)<br>
|
||||
* [2.模型名修改和label文件准备](#8)<br>
|
||||
* [2.1 非OCR模型名修改](#9)<br>
|
||||
* [2.2 OCR模型名修改](#10)<br>
|
||||
* [2.3 模型label文件](#11)<br>
|
||||
* [3.修改配置文件](#12)<br>
|
||||
* [测试效果](#13)<br>
|
||||
* [完整配置文件说明](#14)<br>
|
||||
* [1.配置文件字段含义](#15)<br>
|
||||
* [2.预处理顺序](#16)<br>
|
||||
* [FAQ](#17)<br>
|
||||
|
||||
**注意事项:**
|
||||
|
||||
1. PP-PicoDet模型: 在FastDeploy中,支持PP-Picodet模型,是将后处理写到网络里面的方式(即后处理+NMS都在网络结构里面)。Paddle Detection导出静态模型时,有3种方法,选择将后处理和NMS导入到网络里面即可(参考[导出部分](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet#%E5%AF%BC%E5%87%BA%E5%8F%8A%E8%BD%AC%E6%8D%A2%E6%A8%A1%E5%9E%8B))。详细网络区别,可以通过netron工具对比。
|
||||
|
||||
2. PP-Picodet模型:在FastDeploy中,支持PP-Picodet模型,是将前处理写在网络外面的方式。Paddle Detection中的TinyPose算法中,会将PP-PicoDet模型的前处理写入网络中。如果要使用FastDeploy的SDK进行模型替换,需要将前处理写到网络外面。(参考[Detection中的导出命令](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/keypoint/tiny_pose#%E5%B0%86%E8%AE%AD%E7%BB%83%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%AE%9E%E7%8E%B0%E7%AB%AF%E4%BE%A7%E9%83%A8%E7%BD%B2),将TestReader.fuse_normalize=False即可)。
|
||||
|
||||
<a name="1"></a>
|
||||
|
||||
# 模型替换
|
||||
|
||||
开发者从PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等飞桨开发套件导出来的对应模型,完成 [1.模型准备](#)、[1.模型名修改和模型label](#)、[3.修改配置文件](#) 3步操作(需要相同算法才可替换),可完成自定义模型的模型文件,运行时指定新的模型文件,即可在自己训练的模型上实现相应的预测推理任务。
|
||||
|
||||
* Linux下模型资源文件夹路径:`EasyEdge-Linux-**/RES/` 。
|
||||
* Windows下模型资源文件夹路径:`EasyEdge-Windows-**/data/model/`。
|
||||
* Android下模型资源文件夹路径:`EasyEdge-Android-**/app/src/assets/infer/` 和 ` app/src/assets/demo/conf.json`
|
||||
* iOS下模型资源文件夹路径:`EasyEdge-iOS-**/RES/easyedge/`
|
||||
|
||||
主要涉及到下面4个模型相关的文件(mode、params、label_list.txt、infer_cfg.json)和一个APP名相关的配置文件(仅Android、iOS、HTTP需要,APP名字,非必需。)
|
||||
|
||||
* ```
|
||||
├── RES、model、infer # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
```
|
||||
|
||||
> ❗注意:OCR模型在ARM CPU硬件上(包括Android、Linux、iOS 三款操作系统),因为任务的特殊性,替换在 [1.模型准备](#)、[1.模型名修改和模型label](#) 不同于其他任务模型,详细参考下面步骤。
|
||||
|
||||
<a name="2"></a>
|
||||
|
||||
## 1.模型准备
|
||||
|
||||
<a name="3"></a>
|
||||
|
||||
### 1.1 Paddle模型
|
||||
|
||||
* 通过PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等导出来飞桨模型文件,包括如下文件(可能存在导出时修改了名字的情况,后缀`.pdmodel`为模型网络结构文件,后缀`.pdiparams`为模型权重文件):
|
||||
|
||||
```
|
||||
model.pdmodel # 模型网络结构
|
||||
model.pdiparams # 模型权重
|
||||
model.yml # 模型的配置文件(包括预处理参数、模型定义等)
|
||||
```
|
||||
|
||||
<a name="4"></a>
|
||||
|
||||
### 1.2 OCR模型特殊转换(仅在ARM CPU上需要)
|
||||
|
||||
因为推理引擎版本的问题,OCR模型需要在[1.1 Paddle模型](#3)导出`.pdmodel`和`.pdiparams`模型后,多增加一步模型转换的特殊处理,主要执行下面2步:
|
||||
|
||||
<a name="5"></a>
|
||||
|
||||
#### 1.2.1 下载模型转换工具
|
||||
|
||||
Linux 模型转换工具下载链接:[opt_linux](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_linux)</br>
|
||||
M1 模型转换工具下载链接:[opt_m1](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_m1)</br>
|
||||
mac 模型转换工具下载链接:[opt_mac](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_mac)</br>
|
||||
|
||||
<a name="6"></a>
|
||||
|
||||
#### 1.2.2 模型转换
|
||||
|
||||
以下命令,以mac为例,完成模型转换。
|
||||
|
||||
```
|
||||
* 转换 OCR 检测模型命名:
|
||||
./opt_mac --model_dir=./ch_PP-OCRv3_det_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_det
|
||||
|
||||
* 转换 OCR 识别模型命名:
|
||||
./opt_mac --model_dir=./ch_PP-OCRv3_rec_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_rec
|
||||
```
|
||||
|
||||
产出:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175856746-501b05ad-6fba-482e-8e72-fdd68fe52101.png" width="400"></div>
|
||||
|
||||
<a name="7"></a>
|
||||
|
||||
### 1.3 其他框架模型
|
||||
|
||||
* 如果开发着是PyTorch、TensorFLow、Caffe、ONNX等其他框架模型,可以参考[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)官网完成模型转换,即可得到对应的`model.pdmodel`和`model.pdiparams`模型文件。
|
||||
|
||||
<a name="8"></a>
|
||||
|
||||
## 2.模型名修改和label文件准备
|
||||
|
||||
<a name="9"></a>
|
||||
|
||||
### 2.1 非OCR模型名修改
|
||||
|
||||
按照下面的规则,修改套件导出来的模型名和标签文件,并替换到模型资源文件中。
|
||||
|
||||
```
|
||||
1. model.pdmodel 修改成 model
|
||||
2. model.pdiparams 修改成 params
|
||||
```
|
||||
|
||||
<a name="10"></a>
|
||||
|
||||
### 2.2 OCR模型名修改
|
||||
|
||||
```
|
||||
1. ocr_det.nb 修改成 model # 将 检测模型 修改名称成 model
|
||||
2. ocr_rec.nb 修改成 params # 将 识别模型 修改名称成 model
|
||||
```
|
||||
|
||||
<a name="11"></a>
|
||||
|
||||
### 2.3 模型label文件
|
||||
|
||||
同时需要准备模型文件对应的label文件`label_list.txt`。label文件可以参考原Demo中`label_list.txt`的格式准备。
|
||||
|
||||
<a name="12"></a>
|
||||
|
||||
## 3. 修改模型相关配置文件
|
||||
|
||||
(1)infer_cfg.json 文件修改
|
||||
|
||||
所有程序开发者都需要关注该配置文件。开发者在自己数据/任务中训练模型,可能会修改输入图像尺寸、修改阈值等操作,因此需要根据训练情况修改`Res文件夹下的infer_cfg.json`文件中的对应。CV任务涉及到的配置文件修改包括如下字段:
|
||||
|
||||
```
|
||||
1. "best_threshold": 0.3, #网络输出的阈值,根据开发者模型实际情况修改
|
||||
2. "resize": [512, 512], #[w, h]网络输入图像尺寸,用户根据实际情况修改。
|
||||
```
|
||||
|
||||
(2)conf.json 文件修改
|
||||
仅Android、iOS、HTTP服务应用开发者,需要关注该配置文件。开发者根据自己应用程序命名需要,参考已有`conf.json`即可。
|
||||
|
||||
通常,开发者修改FastDeploy项目中的模型,涉及到主要是这几个配置信息的修改。FastDeploy详细的配置文件介绍参考[完整配置文件说明](#8)。
|
||||
|
||||
<a name="13"></a>
|
||||
|
||||
# 测试效果
|
||||
|
||||
将自定义准备的`RES`文件,按照第2、3步完成修改后,参考可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk%E4%BD%BF%E7%94%A8)完成自己模型上的不同预测体验。
|
||||
|
||||
<a name="14"></a>
|
||||
|
||||
# 完整配置文件说明
|
||||
|
||||
<a name="15"></a>
|
||||
|
||||
## 1. 配置文件字段含义
|
||||
|
||||
模型资源文件`infer_cfg.json`涉及到大量不同算法的前后处理等信息,下表是相关的字段介绍,通常开发者如果没有修改算法前出处理,不需要关心这些字段。非标记【必须】的可不填。
|
||||
|
||||
```json
|
||||
{
|
||||
"version": 1,
|
||||
"model_info": {
|
||||
"best_threshold": 0.3, // 默认0.3
|
||||
"model_kind": 1, // 【必须】 1-分类,2-检测,6-实例分割,12-追踪,14-语义分割,401-人脸,402-姿态,10001-决策
|
||||
},
|
||||
"pre_process": { // 【必须】
|
||||
// 归一化, 预处理会把图像 (origin_img - mean) * scale
|
||||
"skip_norm": false, // 默认为false, 如果设置为true,不做mean scale处理
|
||||
"mean": [123, 123, 123], // 【必须,一般不需要动】图像均值,已经根据Paddle套件均值做了转换处理,开发者如果没有修改套件参数,可以不用关注。(X-mean)/ scale
|
||||
"scale": [0.017, 0.017, 0.017], // 【必须,一般不需要动】
|
||||
"color_format": "RGB", // BGR 【必须,一般不需要动】
|
||||
"channel_order": "CHW", // HWC
|
||||
// 大小相关
|
||||
"resize": [300, 300], // w, h 【必须】
|
||||
"rescale_mode": "keep_size", // 默认keep_size, keep_ratio, keep_ratio2, keep_raw_size, warp_affine
|
||||
"max_size": 1366, // keep_ratio 用。如果没有提供,则用 resize[0]
|
||||
"target_size": 800, // keep_ratio 用。如果没有提供,则用 resize[1]
|
||||
"raw_size_range": [100, 10000], // keep_raw_size 用
|
||||
"warp_affine_keep_res": // warp_affine模式使用,默认为false
|
||||
"center_crop_size": [224, 224], // w, h, 如果需要做center_crop,则提供,否则,无需提供该字段
|
||||
"padding": false,
|
||||
"padding_mode": "padding_align32", // 【非必须】默认padding_align32, 其他可指定:padding_fill_size
|
||||
"padding_fill_size": [416, 416], // 【非必须】仅padding_fill_size模式下需要提供, [fill_size_w, fill_size_h], 这里padding fill对齐paddle detection实现,在bottom和right方向实现补齐
|
||||
"padding_fill_value": [114, 114, 114] // 【非必须】仅padding_fill_size模式下需要提供
|
||||
// 其他
|
||||
"letterbox": true,
|
||||
},
|
||||
"post_process": {
|
||||
"box_normed": true, // 默认为true, 如果为false 则表示该模型的box坐标输出不是归一化的
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
<a name="16"></a>
|
||||
|
||||
## 2. 预处理顺序(没有的流程自动略过)
|
||||
|
||||
1. 灰度图 -> rgb图变换
|
||||
2. resize 尺寸变换
|
||||
3. center_crop
|
||||
4. rgb/bgr变换
|
||||
5. padding_fill_size
|
||||
6. letterbox(画个厚边框,填上黑色)
|
||||
7. chw/hwc变换
|
||||
8. 归一化:mean, scale
|
||||
9. padding_align32
|
||||
|
||||
rescale_mode说明:
|
||||
|
||||
* keep_size: 将图片缩放到resize指定的大小
|
||||
* keep_ratio:将图片按比例缩放,长边不超过max_size,短边不超过target_size
|
||||
* keep_raw_size:保持原图尺寸,但必须在raw_size_range之间
|
||||
* warp_affine: 仿射变换,可以设置warp_affine_keep_res指定是否keep_res,在keep_res为false场景下,宽高通过resize字段指定
|
||||
|
||||
<a name="17"></a>
|
||||
|
||||
# FAQ
|
||||
|
||||
### 1. 如何处理一些 undefined reference / error while loading shared libraries?
|
||||
|
||||
> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory
|
||||
|
||||
遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。
|
||||
|
||||
> 示例一:libverify.so.1: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准)
|
||||
|
||||
> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory
|
||||
> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准)
|
||||
|
||||
> 示例三:GLIBCXX_X.X.X not found
|
||||
> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。
|
||||
|
||||
### 2. 使用libcurl请求http服务时,速度明显变慢
|
||||
|
||||
这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可
|
||||
|
||||
```bash
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
```
|
||||
|
||||
### 3. 运行二进制时,提示 libverify.so cannot open shared object file
|
||||
|
||||
可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行:
|
||||
|
||||
```bash
|
||||
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo
|
||||
```
|
||||
|
||||
### 4. 编译时报错:file format not recognized
|
||||
|
||||
可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译
|
||||
@@ -1,389 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows C++** 环境下:(1)SDK 图像和视频推理部署步骤;(2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Inference.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. CPP环境](#2-cpp环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试EasyEdge服务](#2-测试easyedge服务)
|
||||
* [3. 预测图像](#3-预测图像)
|
||||
* [4. 预测视频流](#4-预测视频流)
|
||||
* [5. 编译Demo](#5-编译demo)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. SDK参数运行配置](#1-sdk参数运行配置)
|
||||
* [2. 初始化Predictor](#2-初始化predictor)
|
||||
* [3. 预测推理](#3-预测推理)
|
||||
* [3.1 预测图像](#31-预测图像)
|
||||
* [3.2 预测视频](#32-预测视频)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-项目结构说明)介绍。
|
||||
|
||||
## 2. CPP环境
|
||||
|
||||
> 建议使用Microsoft Visual Studio 2015及以上版本,获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
```shell
|
||||
EasyEdge-win-xxx
|
||||
├── data
|
||||
│ ├── model # 模型文件资源文件夹,可替换为其他模型
|
||||
│ └── config # 配置文件
|
||||
├── bin # demo二进制程序
|
||||
│ ├── xxx_image # 预测图像demo
|
||||
│ ├── xxx_video # 预测视频流demo
|
||||
│ └── xxx_serving # 启动http预测服务demo
|
||||
├── dll # demo二进制程序依赖的动态库
|
||||
├── ... # 二次开发依赖的文件
|
||||
├── python # Python SDK文件
|
||||
├── EasyEdge.exe # EasyEdge服务
|
||||
└── README.md # 环境说明
|
||||
```
|
||||
|
||||
## 2. 测试EasyEdge服务
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中,请先将zip包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK下载完成后,双击打开EasyEdge.exe启动推理服务,输入要绑定的Host ip及端口号Port,点击启动服务。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854086-d507c288-56c8-4fa9-a00c-9d3cfeaac1c8.png" alt="图片" style="zoom: 67%;" />
|
||||
</div>
|
||||
|
||||
服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:67%;" />
|
||||
</div>
|
||||
|
||||
## 3. 预测图像
|
||||
|
||||
除了通过上述方式外,您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下,点击右键,选择"在终端打开",执行如下命令。
|
||||
|
||||
> 需要将bin目录下的可执行文件移动到dll目录下执行,或者将dll目录添加到系统环境变量中。
|
||||
|
||||
```bash
|
||||
.\easyedge_image_inference {模型model文件夹} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
|
||||
|
||||
```shell
|
||||
2022-06-20 10:36:57,602 INFO [EasyEdge] 9788 EasyEdge Windows Development Kit 1.5.2(Build CPU.Generic 20220607) Release
|
||||
e[37m--- Fused 0 subgraphs into layer_norm op.e[0m
|
||||
2022-06-20 10:36:58,008 INFO [EasyEdge] 9788 Allocate graph success.
|
||||
Results of image ..\demo.jpg:
|
||||
8, n01514859 hen, p:0.953429
|
||||
save result image to ..\demo.jpg.result-cpp.jpg
|
||||
Done
|
||||
```
|
||||
|
||||
可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
|
||||
|
||||
## 4. 预测视频流
|
||||
|
||||
```
|
||||
.\easyedge_video_inference {模型model文件夹} {video_type} {video_src}
|
||||
```
|
||||
|
||||
其中video_type支持三种视频流类型,它们分别是:(1)本地视频文件 (2)本地摄像头id(3)网络视频流地址。
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 输入源类型
|
||||
*/
|
||||
enum class SourceType {
|
||||
kVideoFile = 1, // 本地视频文件
|
||||
kCameraId = 2, // 摄像头的index
|
||||
kNetworkStream = 3, // 网络视频流
|
||||
};
|
||||
```
|
||||
|
||||
video_src 即为文件路径。
|
||||
|
||||
## 5. 编译Demo
|
||||
|
||||
在[项目结构说明](#1-项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。
|
||||
|
||||
```
|
||||
cd src
|
||||
mkdir build && cd build
|
||||
cmake .. && make
|
||||
```
|
||||
|
||||
编译完成后,在build文件夹下会生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,查看下面的cpp代码中的step注释说明。
|
||||
|
||||
> ❗注意:
|
||||
> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。
|
||||
> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
```cpp
|
||||
// step 1: SDK配置运行参数
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
|
||||
// step 2: 创建并初始化Predictor;这这里选择合适的引擎
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
|
||||
// step 3-1: 预测图像
|
||||
auto img = cv::imread({图片路径});
|
||||
std::vector<EdgeResultData> results;
|
||||
predictor->infer(img, results);
|
||||
|
||||
// step 3-2: 预测视频
|
||||
std::vector<EdgeResultData> results;
|
||||
FrameTensor frame_tensor;
|
||||
VideoConfig video_config;
|
||||
video_config.source_type = static_cast<SourceType>(video_type); // source_type 定义参考头文件 easyedge_video.h
|
||||
video_config.source_value = video_src;
|
||||
/*
|
||||
... more video_configs, 根据需要配置video_config的各选项
|
||||
*/
|
||||
auto video_decoding = CreateVideoDecoding(video_config);
|
||||
while (video_decoding->next(frame_tensor) == EDGE_OK) {
|
||||
results.clear();
|
||||
if (frame_tensor.is_needed) {
|
||||
predictor->infer(frame_tensor.frame, results);
|
||||
render(frame_tensor.frame, results, predictor->model_info().kind);
|
||||
}
|
||||
//video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置
|
||||
//video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置
|
||||
}
|
||||
```
|
||||
|
||||
若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。
|
||||
|
||||
## 1. SDK参数运行配置
|
||||
|
||||
SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。
|
||||
|
||||
配置参数使用方法如下:
|
||||
|
||||
```
|
||||
EdgePredictorConfig config;
|
||||
config.model_dir = {模型文件目录};
|
||||
```
|
||||
|
||||
## 2. 初始化Predictor
|
||||
|
||||
- 接口
|
||||
|
||||
```cpp
|
||||
auto predictor = global_controller()->CreateEdgePredictor(config);
|
||||
predictor->init();
|
||||
```
|
||||
|
||||
若返回非0,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. 预测推理
|
||||
|
||||
### 3.1 预测图像
|
||||
|
||||
> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理
|
||||
|
||||
- 接口输入
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief
|
||||
* 通用接口
|
||||
* @param image: must be BGR , HWC format (opencv default)
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
virtual int infer(cv::Mat& image, std::vector<EdgeResultData>& result) = 0;
|
||||
```
|
||||
|
||||
图片的格式务必为opencv默认的BGR, HWC格式。
|
||||
|
||||
- 接口返回
|
||||
|
||||
`EdgeResultData`中可以获取对应的分类信息、位置信息。
|
||||
|
||||
```cpp
|
||||
struct EdgeResultData {
|
||||
int index; // 分类结果的index
|
||||
std::string label; // 分类结果的label
|
||||
float prob; // 置信度
|
||||
|
||||
// 物体检测 或 图像分割时使用:
|
||||
float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。
|
||||
|
||||
// 图像分割时使用:
|
||||
cv::Mat mask; // 0, 1 的mask
|
||||
std::string mask_rle; // Run Length Encoding,游程编码的mask
|
||||
};
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于图像分割mask ***
|
||||
|
||||
```
|
||||
cv::Mat mask为图像掩码的二维数组
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
*** 关于图像分割mask_rle ***
|
||||
|
||||
该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析。
|
||||
|
||||
### 3.2 预测视频
|
||||
|
||||
SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。
|
||||
|
||||
- 接口输入
|
||||
|
||||
class`VideoDecoding`:
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 获取输入源的下一帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int next(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 显示当前frame_tensor中的视频帧
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int display(const FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 将当前frame_tensor中的视频帧写为本地视频文件
|
||||
* @param frame_tensor
|
||||
* @return
|
||||
*/
|
||||
virtual int save(FrameTensor &frame_tensor) = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的fps属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_fps() = 0;
|
||||
/**
|
||||
* @brief 获取视频的width属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_width() = 0;
|
||||
|
||||
/**
|
||||
* @brief 获取视频的height属性
|
||||
* @return
|
||||
*/
|
||||
virtual int get_height() = 0;
|
||||
```
|
||||
|
||||
struct `VideoConfig`
|
||||
|
||||
```
|
||||
/**
|
||||
* @brief 视频源、抽帧策略、存储策略的设置选项
|
||||
*/
|
||||
struct VideoConfig {
|
||||
SourceType source_type; // 输入源类型
|
||||
std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址
|
||||
int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true
|
||||
int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false
|
||||
int input_fps{0}; // 在采取抽帧之前设置视频的fps
|
||||
Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效
|
||||
|
||||
bool enable_display{false}; // 默认不支持。
|
||||
std::string window_name{"EasyEdge"};
|
||||
bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame
|
||||
|
||||
bool enable_save{false};
|
||||
std::string save_path; // frame存储为视频文件的路径
|
||||
bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame
|
||||
|
||||
std::map<std::string, std::string> conf;
|
||||
};
|
||||
```
|
||||
|
||||
| 序号 | 字段 | 含义 |
|
||||
| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 |
|
||||
| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 |
|
||||
| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 |
|
||||
| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 |
|
||||
| 5 | `input_fps` | 用于抽帧前设置fps |
|
||||
| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 |
|
||||
| 7 | `conf` | 高级选项。部分配置会通过该map来设置 |
|
||||
|
||||
*** 注意:***
|
||||
|
||||
1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。
|
||||
|
||||
2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项:
|
||||
|
||||
```
|
||||
video_config.conf["backend"] = "2";
|
||||
```
|
||||
|
||||
3. 部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。
|
||||
|
||||
具体接口调用流程,可以参考SDK中的`demo_video_inference`。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,275 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,本文档介绍FastDeploy中的模型SDK,在**Intel x86_64 / NVIDIA GPU、Windows操作系统** 的C++环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。
|
||||
如果开发者对Python语言的相关能力感兴趣,可以参考Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Serving.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. CPP环境](#2-cpp环境)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试EasyEdge服务](#2-测试easyedge服务)
|
||||
* [3. 启动HTTP预测服务](#3-启动http预测服务)
|
||||
* [4. 编译Demo](#4-编译demo)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
* [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式)
|
||||
* [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式)
|
||||
* [3. http 返回数据](#3-http-返回数据)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-%E9%A1%B9%E7%9B%AE%E7%BB%93%E6%9E%84%E8%AF%B4%E6%98%8E)介绍。
|
||||
|
||||
```shell
|
||||
|
||||
```
|
||||
|
||||
## 2. CPP环境
|
||||
|
||||
> 建议使用Microsoft Visual Studio 2015及以上版本,获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
```shell
|
||||
EasyEdge-win-xxx
|
||||
├── data
|
||||
│ ├── model # 模型文件资源文件夹,可替换为其他模型
|
||||
│ └── config # 配置文件
|
||||
├── bin # demo二进制程序
|
||||
│ ├── xxx_image # 预测图像demo
|
||||
│ ├── xxx_video # 预测视频流demo
|
||||
│ └── xxx_serving # 启动http预测服务demo
|
||||
├── dll # demo二进制程序依赖的动态库
|
||||
├── ... # 二次开发依赖的文件
|
||||
├── python # Python SDK文件
|
||||
├── EasyEdge.exe # EasyEdge服务
|
||||
└── README.md # 环境说明
|
||||
```
|
||||
|
||||
## 2. 测试EasyEdge服务
|
||||
|
||||
> 模型资源文件默认已经打包在开发者下载的SDK包中,请先将zip包整体拷贝到具体运行的设备中,再解压缩使用。
|
||||
|
||||
SDK下载完成后,双击打开EasyEdge.exe启动推理服务,输入要绑定的Host ip及端口号Port,点击启动服务。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854086-d507c288-56c8-4fa9-a00c-9d3cfeaac1c8.png" alt="图片" style="zoom: 67%;" />
|
||||
</div>
|
||||
|
||||
服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:67%;" />
|
||||
</div>
|
||||
## 3. 启动HTTP预测服务
|
||||
|
||||
除了通过上述方式外,您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下,点击右键,选择"在终端打开",执行如下命令。
|
||||
|
||||
> 需要将bin目录下的可执行文件移动到dll目录下执行,或者将dll目录添加到系统环境变量中。
|
||||
|
||||
```
|
||||
.\easyedge_serving {模型model文件夹路径}
|
||||
```
|
||||
|
||||
启动后,日志中会显示如下字样。
|
||||
|
||||
```
|
||||
HTTP is now serving at 0.0.0.0:24401
|
||||
```
|
||||
|
||||
此时,开发者可以打开浏览器,`http://127.0.0.1:24401`,执行和之前一样的操作即可。
|
||||
|
||||

|
||||
|
||||
## 4. 编译Demo
|
||||
|
||||
在[项目结构说明](#1项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。
|
||||
|
||||
```
|
||||
cd src
|
||||
mkdir build && cd build
|
||||
cmake .. && make
|
||||
```
|
||||
|
||||
编译完成后,在build文件夹下会生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。
|
||||
|
||||
# HTTP API流程详解
|
||||
|
||||
本章节主要结合[2.1 HTTP Demo](#4)的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑
|
||||
|
||||
```cpp
|
||||
/**
|
||||
* @brief 开启一个简单的demo http服务。
|
||||
* 该方法会block直到收到sigint/sigterm。
|
||||
* http服务里,图片的解码运行在cpu之上,可能会降低推理速度。
|
||||
* @tparam ConfigT
|
||||
* @param config
|
||||
* @param host
|
||||
* @param port
|
||||
* @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain'
|
||||
* @param instance_num 实例数量,根据内存/显存/时延要求调整
|
||||
* @return
|
||||
*/
|
||||
template<typename ConfigT>
|
||||
int start_http_server(
|
||||
const ConfigT &config,
|
||||
const std::string &host,
|
||||
int port,
|
||||
const std::string &service_id,
|
||||
int instance_num = 1);
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式一:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容(无需base64, 无需json)
|
||||
|
||||
Python请求示例
|
||||
|
||||
```Python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
### 2.2 http 请求方法二:使用图片base64格式
|
||||
|
||||
HTTP方法:POST
|
||||
Header如下:
|
||||
|
||||
| 参数 | 值 |
|
||||
| ------------ | ---------------- |
|
||||
| Content-Type | application/json |
|
||||
|
||||
**Body请求填写**:
|
||||
|
||||
- 分类网络:
|
||||
body 中请求示例
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
"top_num": 5
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 |
|
||||
|
||||
- 检测和分割网络:
|
||||
Body请求示例:
|
||||
|
||||
```
|
||||
{
|
||||
"image": "<base64数据>"
|
||||
}
|
||||
```
|
||||
|
||||
body中参数详情:
|
||||
|
||||
| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 |
|
||||
| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- |
|
||||
| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** |
|
||||
| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 |
|
||||
|
||||
## 3. http 返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
*** 关于矩形坐标 ***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
*** 关于分割模型 ***
|
||||
|
||||
其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,381 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows Python** 环境下: (1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。
|
||||
其中Windows Python请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Inference.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. Python环境](#2-python环境)
|
||||
* [3. 安装依赖](#3-安装依赖)
|
||||
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
|
||||
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 文件结构说明](#1-文件结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1 预测图像](#21-预测图像)
|
||||
|
||||
* [预测API流程详解](#预测api流程详解)
|
||||
|
||||
* [1. 基础流程](#1-基础流程)
|
||||
* [2. 初始化](#2-初始化)
|
||||
* [3. SDK参数配置](#3-sdk参数配置)
|
||||
* [4. 预测图像](#4-预测图像)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示:
|
||||
|
||||
```shell
|
||||
EasyEdge-win-[部署芯片]
|
||||
├── data # 模型文件资源文件夹,可替换为其他模型
|
||||
├── ... # C++/C# 相关文件
|
||||
├── python # Python SDK文件
|
||||
├── EasyEdge.exe # 主程序
|
||||
└── README.md # 环境说明
|
||||
```
|
||||
|
||||
<a name="3"></a>
|
||||
|
||||
## 2. Python环境
|
||||
|
||||
> 当前SDK仅支持Python 3.7
|
||||
|
||||
打开命令行工具,使用如下命令获取已安装的Python版本号。如果还没有安装Python环境,可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`,然后点“Install Now”即可完成安装。
|
||||
|
||||
```shell
|
||||
python --version
|
||||
```
|
||||
|
||||
如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
python -m pip --version
|
||||
```
|
||||
|
||||
## 3. 安装依赖
|
||||
|
||||
### 3.1 安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
|
||||
|
||||
> 使用 NVIDIA GPU 预测时,必须满足:
|
||||
>
|
||||
> 1. 机器已安装 cuda, cudnn
|
||||
>
|
||||
> 2. 已正确安装对应 cuda 版本的paddle 版本
|
||||
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
|
||||
|
||||
<a name="6"></a>
|
||||
|
||||
### 3.2 安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
|
||||
|
||||
```shell
|
||||
python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl
|
||||
```
|
||||
|
||||
<a name="7"></a>
|
||||
|
||||
# 快速开始
|
||||
|
||||
<a name="8"></a>
|
||||
|
||||
## 1. 文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-win-[部署芯片]
|
||||
├── data # 模型文件资源文件夹,可替换为其他模型
|
||||
│ ├── model # 模型文件资源文件夹,可替换为其他模型
|
||||
│ └── config # 配置文件
|
||||
├── ... # C++/C# 相关文件
|
||||
├── python # Python SDK文件
|
||||
│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用
|
||||
│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl
|
||||
│ ├── requirements.txt #
|
||||
│ ├── infer_demo # demo体验完整文件
|
||||
│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ │ └── demo_serving.py # 提供http服务的demo文件
|
||||
│ └── tensor_demo # tensor in/out demo文件
|
||||
```
|
||||
|
||||
<a name="9"></a>
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
<a name="10"></a>
|
||||
|
||||
### 2.1 预测图像
|
||||
|
||||
根据部署平台,使用infer_demo文件夹下的demo文件,执行如下命令。
|
||||
|
||||
```shell
|
||||
python demo_x86_cpu.py {模型model文件夹} {测试图片路径}
|
||||
```
|
||||
|
||||
运行效果示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854068-28d27c0a-ef83-43ee-9e89-b65eed99b476.jpg" width="400"></div>
|
||||
|
||||
```shell
|
||||
2022-06-14 18:35:44 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: Config:: w: 256, h: 256; mean: [123.675, 116.28, 103.53]; scale: [0.01712475 0.017507 0.01742919]
|
||||
2022-06-14 18:35:44 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Init paddlefluid engine...
|
||||
2022-06-14 18:35:45 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Paddle version: 2.2.2
|
||||
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: CPU thread num set to 1
|
||||
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: resize to w257, h256
|
||||
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Switch to CHW
|
||||
2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Infer cost: 70.1(66.1) ms
|
||||
{'confidence': 0.9012351036071777, 'index': 8, 'label': 'n01514859 hen'}
|
||||
```
|
||||
|
||||
可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。
|
||||
|
||||
# 预测API流程详解
|
||||
|
||||
本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`infer_demo/demo_xx_xx.py`文件,查看下面的Python代码中的step注释说明。
|
||||
|
||||
## 1. 基础流程
|
||||
|
||||
> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。
|
||||
|
||||
`infer_demo/demo_xx_xx.py`
|
||||
|
||||
```python
|
||||
# 引入EasyEdge运行库
|
||||
import BaiduAI.EasyEdge as edge
|
||||
|
||||
# 创建并初始化一个预测Progam;选择合适的引擎
|
||||
pred = edge.Program()
|
||||
pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
|
||||
# 预测图像
|
||||
res = pred.infer_image({numpy.ndarray的图片})
|
||||
|
||||
# 关闭结束预测Progam
|
||||
pred.close()
|
||||
```
|
||||
|
||||
`infer_demo/demo_serving.py`
|
||||
|
||||
```python
|
||||
import BaiduAI.EasyEdge as edge
|
||||
from BaiduAI.EasyEdge.serving import Serving
|
||||
|
||||
# 创建并初始化Http服务
|
||||
server = Serving(model_dir={RES文件夹路径}, license=serial_key)
|
||||
|
||||
# 运行Http服务
|
||||
# 请参考同级目录下demo_xx_xx.py里:
|
||||
# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx)
|
||||
# 对以下参数device\device_id和engine进行修改
|
||||
server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU
|
||||
# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU
|
||||
# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU
|
||||
```
|
||||
|
||||
## 2. 初始化
|
||||
|
||||
- 接口
|
||||
|
||||
```python
|
||||
def init(self,
|
||||
model_dir,
|
||||
device=Device.CPU,
|
||||
engine=Engine.PADDLE_FLUID,
|
||||
config_file='conf.json',
|
||||
preprocess_file='preprocess_args.json',
|
||||
model_file='model',
|
||||
params_file='params',
|
||||
label_file='label_list.txt',
|
||||
infer_cfg_file='infer_cfg.json',
|
||||
device_id=0,
|
||||
thread_num=1
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
model_dir: str
|
||||
device: BaiduAI.EasyEdge.Device,比如:Device.CPU
|
||||
engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID
|
||||
config_file: str
|
||||
preprocess_file: str
|
||||
model_file: str
|
||||
params_file: str
|
||||
label_file: str 标签文件
|
||||
infer_cfg_file: 包含预处理、后处理信息的文件
|
||||
device_id: int 设备ID
|
||||
thread_num: int CPU的线程数
|
||||
|
||||
Raises:
|
||||
RuntimeError, IOError
|
||||
Returns:
|
||||
bool: True if success
|
||||
"""
|
||||
```
|
||||
|
||||
若返回不是True,请查看输出日志排查错误原因。
|
||||
|
||||
## 3. SDK参数配置
|
||||
|
||||
使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4)
|
||||
```
|
||||
|
||||
使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如:
|
||||
|
||||
```python
|
||||
pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0)
|
||||
```
|
||||
|
||||
## 4. 预测图像
|
||||
|
||||
- 接口
|
||||
|
||||
```python
|
||||
def infer_image(self, img,
|
||||
threshold=0.3,
|
||||
channel_order='HWC',
|
||||
color_format='BGR',
|
||||
data_type='numpy')
|
||||
"""
|
||||
|
||||
Args:
|
||||
img: np.ndarray or bytes
|
||||
threshold: float
|
||||
only return result with confidence larger than threshold
|
||||
channel_order: string
|
||||
channel order HWC or CHW
|
||||
color_format: string
|
||||
color format order RGB or BGR
|
||||
data_type: string
|
||||
仅在图像分割时有意义。 'numpy' or 'string'
|
||||
'numpy': 返回已解析的mask
|
||||
'string': 返回未解析的mask游程编码
|
||||
|
||||
Returns:
|
||||
list
|
||||
|
||||
"""
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 取值 | 说明 |
|
||||
| ---------- | -------------------- | --------- | ------------------------ |
|
||||
| confidence | float | 0~1 | 分类或检测的置信度 |
|
||||
| label | string | | 分类或检测的类别 |
|
||||
| index | number | | 分类或检测的类别 |
|
||||
| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) |
|
||||
| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) |
|
||||
| mask | string/numpy.ndarray | 图像分割的mask | |
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。
|
||||
|
||||
***结果示例***
|
||||
|
||||
i) 图像分类
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 736,
|
||||
"label": "table",
|
||||
"confidence": 0.9
|
||||
}
|
||||
```
|
||||
|
||||
ii) 物体检测
|
||||
|
||||
```json
|
||||
{
|
||||
"index": 8,
|
||||
"label": "cat",
|
||||
"confidence": 1.0,
|
||||
"x1": 0.21289,
|
||||
"y1": 0.12671,
|
||||
"x2": 0.91504,
|
||||
"y2": 0.91211,
|
||||
}
|
||||
```
|
||||
|
||||
iii) 图像分割
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "cat",
|
||||
"score": 1.0,
|
||||
"location": {
|
||||
"left": ...,
|
||||
"top": ...,
|
||||
"width": ...,
|
||||
"height": ...,
|
||||
},
|
||||
"mask": ...
|
||||
}
|
||||
```
|
||||
|
||||
mask字段中,data_type为`numpy`时,返回图像掩码的二维数组
|
||||
|
||||
```text
|
||||
{
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 1, 1, 1, 0, 0, 0, 0},
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
其中1代表为目标区域,0代表非目标区域
|
||||
```
|
||||
|
||||
data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
@@ -1,262 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK,在**Intel x86_64 /NVIDIA GPU、Windows操作系统** 的Python环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。
|
||||
如果开发者对C++语言的相关能力感兴趣,可以参考Windows C++请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Serving.md)文档。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [环境准备](#环境准备)
|
||||
|
||||
* [1. SDK下载](#1-sdk下载)
|
||||
* [2. Python环境](#2-python环境)
|
||||
* [3. 安装依赖](#3-安装依赖)
|
||||
* [3.1 安装paddlepaddle](#31-安装paddlepaddle)
|
||||
* [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 文件结构说明](#1-文件结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
* [2.1 启动HTTP预测服务](#21-启动http预测服务)
|
||||
|
||||
* [HTTP API流程详解](#http-api流程详解)
|
||||
|
||||
* [1. 开启http服务](#1-开启http服务)
|
||||
|
||||
* [2. 请求http服务](#2-请求http服务)
|
||||
|
||||
* [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式)
|
||||
|
||||
* [3. http返回数据](#3-http返回数据)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 环境准备
|
||||
|
||||
## 1. SDK下载
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示:
|
||||
|
||||
```shell
|
||||
EasyEdge-win-[部署芯片]
|
||||
├── data # 模型文件资源文件夹,可替换为其他模型
|
||||
├── ... # C++/C# 相关文件
|
||||
├── python # Python SDK文件
|
||||
├── EasyEdge.exe # 主程序
|
||||
└── README.md # 环境说明
|
||||
```
|
||||
|
||||
## 2. Python环境
|
||||
|
||||
> 当前SDK仅支持Python 3.7
|
||||
|
||||
打开命令行工具,使用如下命令获取已安装的Python版本号。如果还没有安装Python环境,可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`,然后点“Install Now”即可完成安装。
|
||||
|
||||
```shell
|
||||
python --version
|
||||
```
|
||||
|
||||
如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。
|
||||
|
||||
接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。
|
||||
|
||||
```shell
|
||||
python -m pip --version
|
||||
```
|
||||
|
||||
## 3. 安装依赖
|
||||
|
||||
### 3.1 安装paddlepaddle
|
||||
|
||||
根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装:
|
||||
|
||||
```shell
|
||||
python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple
|
||||
```
|
||||
|
||||
`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。
|
||||
|
||||
> 使用 NVIDIA GPU 预测时,必须满足:
|
||||
>
|
||||
> 1. 机器已安装 cuda, cudnn
|
||||
>
|
||||
> 2. 已正确安装对应 cuda 版本的paddle 版本
|
||||
> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例
|
||||
|
||||
### 3.2 安装EasyEdge Python Wheel 包
|
||||
|
||||
在`python`目录下,安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。
|
||||
|
||||
```shell
|
||||
python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl
|
||||
```
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 文件结构说明
|
||||
|
||||
Python SDK文件结构如下:
|
||||
|
||||
```shell
|
||||
EasyEdge-win-[部署芯片]
|
||||
├── data # 模型文件资源文件夹,可替换为其他模型
|
||||
│ ├── model # 模型文件资源文件夹,可替换为其他模型
|
||||
│ └── config # 配置文件
|
||||
├── ... # C++/C# 相关文件
|
||||
├── python # Python SDK文件
|
||||
│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用
|
||||
│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl
|
||||
│ ├── requirements.txt #
|
||||
│ ├── infer_demo # demo体验完整文件
|
||||
│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件
|
||||
│ │ └── demo_serving.py # 提供http服务的demo文件
|
||||
│ └── tensor_demo # tensor in/out demo文件
|
||||
```
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
### 2.1 启动HTTP预测服务
|
||||
|
||||
```shell
|
||||
python demo_serving.py {模型model文件夹} {host, default 0.0.0.0} {port, default 24401}
|
||||
```
|
||||
|
||||
成功启动后,终端中会显示如下字样。
|
||||
|
||||
```shell
|
||||
2022-06-14 18:45:15 INFO [EasyEdge] [demo_serving.py:50] 21212: Init paddlefluid engine...
|
||||
2022-06-14 18:45:16 INFO [EasyEdge] [demo_serving.py:50] 21212: Paddle version: 2.2.2
|
||||
* Serving Flask app 'Serving' (lazy loading)
|
||||
* Environment: production
|
||||
WARNING: This is a development server. Do not use it in a production deployment.
|
||||
Use a production WSGI server instead.
|
||||
* Debug mode: off
|
||||
* Running on all addresses (0.0.0.0)
|
||||
WARNING: This is a development server. Do not use it in a production deployment.
|
||||
* Running on http://127.0.0.1:24401
|
||||
* Running on http://192.168.3.17:24401 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片或者视频来进行测试,运行效果如下。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png" style="zoom:50%;" />
|
||||
</div>
|
||||
|
||||
# HTTP API流程详解
|
||||
|
||||
本章节主要结合前文的Demo示例来对API进行介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考对应的Python文件。http服务包含服务端和客户端,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。
|
||||
|
||||
## 1. 开启http服务
|
||||
|
||||
http服务的启动使用`demo_serving.py`文件
|
||||
|
||||
```python
|
||||
class Serving(object):
|
||||
""" SDK local serving """
|
||||
|
||||
def __init__(self, model_dir, license='', model_filename='model', params_filename='params'):
|
||||
|
||||
self.program = None
|
||||
self.model_dir = model_dir
|
||||
self.model_filename = model_filename
|
||||
self.params_filename = params_filename
|
||||
self.program_lock = threading.Lock()
|
||||
self.license_key = license
|
||||
# 只有ObjectTracking会初始化video_processor
|
||||
self.video_processor = None
|
||||
|
||||
def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs):
|
||||
""" Args: host : str port : str device : BaiduAI.EasyEdge.Device,比如:Device.CPU engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID """
|
||||
self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs)
|
||||
```
|
||||
|
||||
## 2. 请求http服务
|
||||
|
||||
> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。
|
||||
|
||||
### 2.1 http 请求方式:不使用图片base64格式
|
||||
|
||||
URL中的get参数:
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
| --------- | --------- | ---------------- |
|
||||
| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 |
|
||||
|
||||
HTTP POST Body即为图片的二进制内容。
|
||||
|
||||
Python请求示例
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
with open('./1.jpg', 'rb') as f:
|
||||
img = f.read()
|
||||
result = requests.post(
|
||||
'http://127.0.0.1:24401/',
|
||||
params={'threshold': 0.1},
|
||||
data=img).json()
|
||||
```
|
||||
|
||||
## 3. http返回数据
|
||||
|
||||
| 字段 | 类型说明 | 其他 |
|
||||
| ---------- | ------ | ------------------------------------ |
|
||||
| error_code | Number | 0为成功,非0参考message获得具体错误信息 |
|
||||
| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 |
|
||||
| cost_ms | Number | 预测耗时ms,不含网络交互时间 |
|
||||
|
||||
返回示例
|
||||
|
||||
```json
|
||||
{
|
||||
"cost_ms": 52,
|
||||
"error_code": 0,
|
||||
"results": [
|
||||
{
|
||||
"confidence": 0.94482421875,
|
||||
"index": 1,
|
||||
"label": "IronMan",
|
||||
"x1": 0.059185408055782318,
|
||||
"x2": 0.18795496225357056,
|
||||
"y1": 0.14762254059314728,
|
||||
"y2": 0.52510076761245728,
|
||||
"mask": "...", // 图像分割模型字段
|
||||
"trackId": 0, // 目标追踪模型字段
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
***关于矩形坐标***
|
||||
|
||||
x1 * 图片宽度 = 检测框的左上角的横坐标
|
||||
|
||||
y1 * 图片高度 = 检测框的左上角的纵坐标
|
||||
|
||||
x2 * 图片宽度 = 检测框的右下角的横坐标
|
||||
|
||||
y2 * 图片高度 = 检测框的右下角的纵坐标
|
||||
|
||||
***关于分割模型***
|
||||
|
||||
其中,mask为分割模型的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。
|
||||
|
||||
**FAQ**
|
||||
|
||||
1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0
|
||||
|
||||
进入当前项目,首先卸载protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip uninstall protobuf
|
||||
```
|
||||
|
||||
安装低版本protobuf
|
||||
|
||||
```shell
|
||||
python3 -m pip install protobuf==3.19.0
|
||||
```
|
||||
17
docs/compile/README.md
Normal file
17
docs/compile/README.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# FastDeploy编译
|
||||
|
||||
本文档说明编译C++预测库、Python预测库两种编译过程,根据编译的平台参考如下文档
|
||||
|
||||
- [Linux & Mac 编译](linux_and_mac.md)
|
||||
- [Windows编译](windows.md)
|
||||
|
||||
其中编译过程中,各平台上编译选项如下表所示
|
||||
|
||||
| 选项 | 作用 | 备注 |
|
||||
|:---- | :--- | :--- |
|
||||
| ENABLE_ORT_BACKEND | 启用ONNXRuntime推理后端,默认ON | - |
|
||||
| WIGH_GPU | 是否开启GPU使用,默认OFF | 当设为TRUE时,须通过CUDA_DIRECTORY指定cuda目录,如/usr/local/cuda; Mac上不支持设为ON |
|
||||
| ENABLE_TRT_BACKEND | 启用TensorRT推理后端,默认OFF | 当设为TRUE时,需通过TRT_DIRECTORY指定tensorrt目录,如/usr/downloads/TensorRT-8.4.0.1; Mac上不支持设为ON|
|
||||
| ENABLE_VISION | 编译集成视觉模型模块,包括OpenCV的编译集成,默认OFF | - |
|
||||
| ENABLE_PADDLE_FRONTEND | 编译集成Paddle2ONNX,默认ON | - |
|
||||
| ENABLE_DEBUG | 当为ON时,支持输出DEBUG信息,但可能会有性能损耗,默认OFF | - |
|
||||
32
docs/compile/linux_and_mac.md
Normal file
32
docs/compile/linux_and_mac.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# Linux & Mac编译
|
||||
|
||||
## 编译C++
|
||||
```
|
||||
git clone https://gitee.com/jiangjiajun/FastDeploy.git
|
||||
cd FastDeploy
|
||||
git submodule init
|
||||
git submodule update
|
||||
mkdir build & cd build
|
||||
cmake .. -DENABLE_ORT_BACKEND=ON \
|
||||
-DENABLE_VISION=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3
|
||||
make -j8
|
||||
make install
|
||||
```
|
||||
编译后的预测库即在当前目录下的`fastdeploy-0.0.3`
|
||||
|
||||
## 编译Python安装包
|
||||
```
|
||||
git clone https://gitee.com/jiangjiajun/FastDeploy.git
|
||||
cd FastDeploy
|
||||
git submodule init
|
||||
git submodule update
|
||||
# Python通过export环境变量设置编译选项
|
||||
export ENABLE_ORT_BACKEND=ON
|
||||
export ENABLE_VISION=ON
|
||||
python setup.py build
|
||||
python setup.py bdist_wheel
|
||||
```
|
||||
编译后的wheel包即在当前目录下的`dist`目录中
|
||||
|
||||
编译选项说明参考[编译指南](./README.md)
|
||||
3
docs/compile/windows.md
Normal file
3
docs/compile/windows.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Windows编译
|
||||
|
||||
还没写
|
||||
110
docs/cpp/README.md
Normal file
110
docs/cpp/README.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# C++部署
|
||||
|
||||
## 准备预测库
|
||||
|
||||
参考编译文档[FastDeploy编译](../compile/README.md)进行编译,或直接使用如下预编译库
|
||||
|
||||
| 编译库 | 平台 | 支持设备 | 说明 |
|
||||
|:------ | :---- | :------- | :----- |
|
||||
|[fastdeploy-linux-x64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz) | Linux | CPU | 集成ONNXRuntime |
|
||||
|[fastdeploy-linux-x64-gpu-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-gpu-0.0.3.tgz) | Linux | CPU/GPU | 集成ONNXRuntime, TensorRT |
|
||||
|[fastdeploy-osx-x86_64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-x86_64-0.0.3.tgz) | Mac OSX Intel CPU | CPU | 集成ONNXRuntime |
|
||||
|[fastdeploy-osx-arm64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-arm64-0.0.3.tgz) | Mac OSX M1 CPU | CPU | 集成ONNXRuntime |
|
||||
|
||||
|
||||
## 使用
|
||||
|
||||
FastDeploy提供了多种领域内的模型,可快速完成模型的部署,本文档以YOLOv5在Linux上的部署为例
|
||||
|
||||
```
|
||||
# 下载库并解压
|
||||
wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz
|
||||
tar xvf fastdeploy-linux-x64-0.0.3.tgz
|
||||
|
||||
# 下载模型和测试图片
|
||||
wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx
|
||||
wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg
|
||||
```
|
||||
|
||||
### YOLOv5预测代码
|
||||
|
||||
准备如下`yolov5.cc`代码
|
||||
```
|
||||
#include "fastdeploy/vision.h"
|
||||
|
||||
int main() {
|
||||
typedef vis = fastdeploy::vision;
|
||||
|
||||
auto model = vis::ultralytics::YOLOv5("yolov5s.onnx"); // 加载模型
|
||||
|
||||
if (!model.Initialized()) { // 判断模型是否初始化成功
|
||||
std::cerr << "Initialize failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
cv::Mat im = cv::imread("bus.jpg"); // 读入图片
|
||||
|
||||
vis::DetectionResult res;
|
||||
if (!model.Predict(&im, &res)) { // 预测图片
|
||||
std::cerr << "Prediction failed." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::cout << res.Str() << std::endl; // 输出检测结果
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
### 编译代码
|
||||
|
||||
编译前先完成CMakeLists.txt的开发,在`yolov5.cc`同级目录创建`CMakeLists.txt`文件,内容如下
|
||||
```
|
||||
PROJECT(yolov5_demo C CXX)
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
|
||||
# 在低版本ABI环境中,可通过如下代码进行兼容性编译
|
||||
# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
|
||||
|
||||
# 在下面指定下载解压后的fastdeploy库路径
|
||||
set(FASTDEPLOY_INSTALL_DIR /ssd1/download/fastdeploy-linux-x64-0.0.3/)
|
||||
|
||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
|
||||
# 添加FastDeploy依赖头文件
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
|
||||
add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc)
|
||||
message(${FASTDEPLOY_LIBS})
|
||||
# 添加FastDeploy库依赖
|
||||
target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS})
|
||||
~
|
||||
```
|
||||
|
||||
此时当前目录结构如下所示
|
||||
```
|
||||
- demo_directory
|
||||
|___fastdeploy-linux-x64-0.0.3/ # 预测库解压
|
||||
|___yolov5.cc # 示例代码
|
||||
|___CMakeLists.txt # cmake文件
|
||||
|___yolov5s.onnx # 模型文件
|
||||
|___bus.jpeg # 测试图片
|
||||
```
|
||||
|
||||
执行如下命令进行编译
|
||||
```
|
||||
cmake .
|
||||
make -j
|
||||
```
|
||||
编译后可执行二进制即为当前目录下的`yolov5_demo`,使用如下命令执行
|
||||
```
|
||||
./yolov5_demo
|
||||
```
|
||||
|
||||
即会加载模型进行推理,得到结果如下
|
||||
```
|
||||
DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]
|
||||
223.395126,403.948669, 345.337189, 867.339050, 0.856906, 0
|
||||
668.301758,400.781372, 808.441772, 882.534973, 0.829716, 0
|
||||
50.210758,398.571289, 243.123383, 905.016846, 0.805375, 0
|
||||
23.768217,214.979355, 802.627869, 778.840820, 0.756311, 5
|
||||
0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0
|
||||
```
|
||||
212
docs/iOS-SDK.md
212
docs/iOS-SDK.md
@@ -1,212 +0,0 @@
|
||||
# 简介
|
||||
|
||||
本文档介绍FastDeploy中的模型SDK,在iOS环境下:(1)推理部署步骤;(2)介绍SDK使用说明,方便开发者了解项目后二次开发。
|
||||
|
||||
<!--ts-->
|
||||
|
||||
* [简介](#简介)
|
||||
|
||||
* [系统支持说明](#系统支持说明)
|
||||
|
||||
* [1. 系统支持说明](#1-系统支持说明)
|
||||
* [2. SDK大小说明](#2-sdk大小说明)
|
||||
|
||||
* [快速开始](#快速开始)
|
||||
|
||||
* [1. 项目结构说明](#1-项目结构说明)
|
||||
* [2. 测试Demo](#2-测试demo)
|
||||
|
||||
* [SDK使用说明](#sdk使用说明)
|
||||
|
||||
* [1. 集成指南](#1-集成指南)
|
||||
* [1.1 依赖库集成](#11-依赖库集成)
|
||||
* [2. 调用流程示例](#2-调用流程示例)
|
||||
* [2.1 初始化](#21-初始化)
|
||||
* [2.2 预测图像](#22-预测图像)
|
||||
|
||||
* [FAQ](#faq)
|
||||
|
||||
<!--te-->
|
||||
|
||||
# 系统支持说明
|
||||
|
||||
## 1. 系统支持说明
|
||||
|
||||
1. 系统支持:iOS 9.0及以上。
|
||||
|
||||
2. 硬件支持:支持 arm64 (Starndard architectures),暂不支持模拟器。
|
||||
|
||||
* 官方验证过的手机机型:大部分ARM 架构的手机、平板及开发板。
|
||||
|
||||
3.其他说明
|
||||
|
||||
* 3.1 【图像分割类模型】(1)图像分割类Demo暂未提供实时摄像头录制拍摄的能力,开发者可根据自己需要,进行安卓开发完成;(2)PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景,本次安卓开发仅支持述评场景,开发者可根据自己需要,开发横屏的Android功能。<br>
|
||||
|
||||
* 3.2 【OCR模型】OCR任务第一次启动任务,第一张推理时间久,属于正常情况(因为涉及到模型加载、预处理等工作)。<br>
|
||||
|
||||
## 2. SDK大小说明
|
||||
|
||||
1. 模型资源文件大小影响 SDK 大小
|
||||
2. SDK 包及 IPA 安装包虽然比较大,但最终安装到设备后所占大小会缩小很多。这与 multi architechtures、bitcode 和 AppStore 的优化有关。
|
||||
|
||||
# 快速开始
|
||||
|
||||
## 1. 项目结构说明
|
||||
|
||||
根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下:
|
||||
|
||||
```
|
||||
.EasyEdge-iOS-SDK
|
||||
├── EasyDLDemo # Demo工程文件
|
||||
├── LIB # 依赖库
|
||||
├── RES
|
||||
│ ├── easyedge # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式
|
||||
│ ├── conf.json # Android、iOS系统APP名字需要
|
||||
│ ├── model # 模型结构文件
|
||||
│ ├── params # 模型参数文件
|
||||
│ ├── label_list.txt # 模型标签文件
|
||||
│ ├── infer_cfg.json # 模型前后处理等配置文件
|
||||
└── DOC # 文档
|
||||
```
|
||||
|
||||
## 2. 测试Demo
|
||||
|
||||
按如下步骤可直接运行 SDK 体验 Demo:
|
||||
步骤一:用 Xcode 打开 `EasyDLDemo/EasyDLDemo.xcodeproj`
|
||||
步骤二:配置开发者自己的签名(不了解签名机制的,可以看FAQ [iOS签名介绍](#100))</br>
|
||||
步骤三:连接手机运行,不支持模拟器
|
||||
|
||||
检测模型运行示例:
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854078-4f1f761d-0629-411a-92cc-6f4180164ca5.png" width="400"></div>
|
||||
|
||||
# SDK使用说明
|
||||
|
||||
本节介绍如何将 SDK 接入开发者的项目中使用。
|
||||
|
||||
## 1. 集成指南
|
||||
|
||||
步骤一:依赖库集成
|
||||
步骤二:`import <EasyDL/EasyDL.h>`
|
||||
|
||||
### 1.1 依赖库集成
|
||||
|
||||
1. 复制 LIB 目录至项目合适的位置
|
||||
2. 配置 Build Settings 中 Search paths: 以 SDK 中 LIB 目录路径为例
|
||||
- Framework Search Paths:`${PROJECT_DIR}/../LIB/lib`
|
||||
- Header Search Paths:`${PROJECT_DIR}/../LIB/include`
|
||||
- Library Search Paths:`${PROJECT_DIR}/../LIB/lib`
|
||||
|
||||
> 集成过程如出现错误,请参考 Demo 工程对依赖库的引用
|
||||
|
||||
## 2. 调用流程示例
|
||||
|
||||
以通用ARM的图像分类预测流程为例,详细说明请参考后续章节:
|
||||
|
||||
```
|
||||
NSError *err;
|
||||
|
||||
// step 1: 初始化模型
|
||||
EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
|
||||
|
||||
// step 2: 准备待预测的图像
|
||||
UIImage *image = ...;
|
||||
|
||||
// step 3: 预测图像
|
||||
NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err];
|
||||
|
||||
// step 4: 解析结果
|
||||
for (id res in results) {
|
||||
EasyDLClassfiData *clsData = (EasyDLClassfiData *) res;
|
||||
NSLog(@"labelIndex=%d, labelName=%@, confidence=%f", clsData.category, clsData.label, clsData.accuracy);
|
||||
}
|
||||
```
|
||||
|
||||
### 2.1 初始化
|
||||
|
||||
```
|
||||
// 示例
|
||||
// 参数一为模型资源文件夹名称
|
||||
EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
|
||||
```
|
||||
|
||||
> 模型资源文件夹需以 folder reference 方式加入 Xcode 工程,如 `RES/easyedge` 文件夹在 Demo 工程中表现为蓝色
|
||||
|
||||
### 2.2 预测图像
|
||||
|
||||
所有模型类型通过以下接口获取预测结果:
|
||||
|
||||
```
|
||||
// 返回的数组类型不定
|
||||
NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err];
|
||||
```
|
||||
|
||||
返回的数组类型如下,具体可参考 `EasyDLResultData.h` 中的定义:
|
||||
| 模型类型 | 类型 |
|
||||
| --- | ---- |
|
||||
| 图像分类 | EasyDLClassfiData |
|
||||
| 物体检测/人脸检测 | EasyDLObjectDetectionData |
|
||||
| 实例分割 | EasyDLObjSegmentationData |
|
||||
| 姿态估计 | EasyDLPoseData |
|
||||
| 文字识别 | EasyDLOcrData |
|
||||
|
||||
# FAQ
|
||||
|
||||
1. 如何多线程并发预测?
|
||||
|
||||
SDK内部已经能充分利用多核的计算能力。不建议使用并发来预测。
|
||||
|
||||
如果开发者想并发使用,请务必注意`EasyDLModel`所有的方法都不是线程安全的。请初始化多个实例进行并发使用,如
|
||||
|
||||
```c
|
||||
- (void)testMultiThread {
|
||||
UIImage *img = [UIImage imageNamed:@"1.jpeg"];
|
||||
NSError *err;
|
||||
EasyDLModel * model1 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
|
||||
EasyDLModel * model2 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err];
|
||||
|
||||
dispatch_queue_t queue1 = dispatch_queue_create("testQueue", DISPATCH_QUEUE_CONCURRENT);
|
||||
dispatch_queue_t queue2 = dispatch_queue_create("testQueue2", DISPATCH_QUEUE_CONCURRENT);
|
||||
|
||||
dispatch_async(queue1, ^{
|
||||
NSError *detectErr;
|
||||
for(int i = 0; i < 1000; ++i) {
|
||||
NSArray * res = [model1 detectUIImage:img withFilterScore:0 andError:&detectErr];
|
||||
NSLog(@"1: %@", res[0]);
|
||||
}
|
||||
});
|
||||
|
||||
dispatch_async(queue2, ^{
|
||||
NSError *detectErr;
|
||||
for(int i = 0; i < 1000; ++i) {
|
||||
NSArray * res = [model2 detectUIImage:img withFilterScore:0 andError:&detectErr];
|
||||
NSLog(@"2: %@", res[0]);
|
||||
}
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
2. 编译时出现 Undefined symbols for architecture arm64: ...
|
||||
* 出现 `cxx11, vtable` 字样:请引入 `libc++.tbd`
|
||||
* 出现 `cv::Mat` 字样:请引入 `opencv2.framework`
|
||||
* 出现 `CoreML`, `VNRequest` 字样:请引入`CoreML.framework` 并务必`#import <CoreML/CoreML.h> `
|
||||
3. 运行时报错 Image not found: xxx ...
|
||||
|
||||
请Embed具体报错的库。
|
||||
|
||||
4. 编译时报错:Invalid bitcode version
|
||||
|
||||
这个可能是开发者使用的 Xcode 低于12导致,可以升级至12版本。
|
||||
|
||||
5. 错误说明
|
||||
|
||||
SDK 的方法会返回 NSError,直接返回的 NSError 的错误码定义在 `EasyDLDefine.h - EEasyDLErrorCode` 中。NSError 附带 message (有时候会附带 NSUnderlyingError),开发者可根据 code 和 message 进行错误判断和处理。
|
||||
|
||||
6. iOS签名说明
|
||||
|
||||
iOS 签名是苹果生态对 APP 开发者做的限定,对于个人开发者是免费的,对于企业开发者(譬如APP要上架应用市场),是收费的。此处,仅简单说明作为普通开发者,第一次尝试使用 Xcode编译代码,需要进行的签名操作。<br>
|
||||
(1)在Xcode/Preferences/Accounts 中添加个人Apple ID;<br>
|
||||
(2)在对应的EasyDLDemo中做如下图设置:<br>
|
||||
|
||||
<div align=center><img src="https://user-images.githubusercontent.com/54695910/175854089-aa1d1af8-7daa-43ae-868d-32041c27ad86.jpg" width="600"></div>
|
||||
(3)(2)后会在手机上安装好对应APP,还需要在手机上`设置/通用/设备管理/开发者应用/信任appleID`,才能运行该 APP。
|
||||
24
docs/tech/design.md
Normal file
24
docs/tech/design.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# FastDeploy
|
||||
|
||||
FastDeploy分为`Runtime`和`应用`模块。
|
||||
|
||||
## Runtime
|
||||
`Runtime`对应于不同硬件上的不同后端,大部分情况下,一种硬件对应于一种后端,但对于CPU、GPU, 存在多种后端,用户可根据自己的需求进行选择。
|
||||
|
||||
| Runtime | 后端 |
|
||||
| :------ | :---- |
|
||||
| CPU(x86_64) | `fastdeploy::Backend::ORT` |
|
||||
| GPU(Nvidia) | `fastdeploy::Backend::ORT` / `fastdeploy::Backend::TRT` |
|
||||
|
||||
具体文档参考 [Runtime文档](runtime.md)
|
||||
|
||||
|
||||
## 应用
|
||||
|
||||
应用是基于`Runtime`提供的上层模型推理,集成了模型端到端的推理功能
|
||||
|
||||
- Vision
|
||||
- Text
|
||||
- Audio
|
||||
|
||||
具体文档参考 [Vision文档](vision.md)
|
||||
63
docs/tech/models.md
Normal file
63
docs/tech/models.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# 模型开发
|
||||
|
||||
以`ultralytics/yolov5`为例,在`fastdeploy/vision`目录下新增`ultralytics`目录,并创建代码`yolov5.h`
|
||||
|
||||
定义`YOLOv5`类
|
||||
|
||||
```
|
||||
class YOLOv5 : public FastDeployModel {
|
||||
public:
|
||||
// 构造函数指定模型路径,并默认为ONNX格式
|
||||
YOLOv5(const std::string& model_file)
|
||||
: FastDeployModel(model_file, "", Frontend::ONNX) {
|
||||
size = {640, 640}; // 图像预处理resize大小
|
||||
// 图像填充值
|
||||
padding_value = {114.0, 114.0, 114.0};
|
||||
// 是否只填充到满足stride的最小方框即可
|
||||
bool is_mini_pad = false;
|
||||
// 是否支持图像resize超过原图尺寸
|
||||
bool is_scale_up = true;
|
||||
// 步长,padding到长宽为stride的倍数
|
||||
stride = 32;
|
||||
|
||||
// 通过下面的两个参数,来说明模型在CPU/GPU上支持的后端种类
|
||||
// 指定Device后,默认情况下,会优先选择最前的后端
|
||||
valid_cpu_backends = {Backend::ORT};
|
||||
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
||||
}
|
||||
|
||||
std::string ModelName() const; // 返回模型名
|
||||
|
||||
// 模型初始化, 须在此函数中主动调用基类的`InitBackend()`函数
|
||||
// 来初始化runtime
|
||||
// 一些模型前后处理的初始化也可在此函数中,如ppdet/ppcls创建一个
|
||||
// 数据预处理pipeline
|
||||
bool Init();
|
||||
|
||||
// 预处理,其中输入是vision::Mat结构,输出是FDTensor
|
||||
// 输出提供给runtime进行推理使用
|
||||
bool Preprocess(Mat* mat, FDTensor* output);
|
||||
|
||||
// 后处理,输入是runtime的输入FDTensor
|
||||
// 一些跟模型相关的预处理参数
|
||||
bool Postprocess(FDTensor& tensor, DetectionResult* res, float conf_thresh, float nms_iou_thresh);
|
||||
|
||||
// 端到端的推理函数,包含前后处理
|
||||
// 因此一般也建议将后处理的部分参数放在这个接口中
|
||||
bool Predict(cv::Mat* im, DetectionResult* result, float conf_thresh = 0.25, float nms_iou_thresh = 0.5);
|
||||
};
|
||||
```
|
||||
|
||||
模型的实现上,并没有特别强的规范约束,但是
|
||||
- 1. 一定要继承`FastDeployModel`
|
||||
- 2. 确定可用的`valid_cpu_backends`和`valid_gpu_backends`
|
||||
- 3. 要实现`Init()`/`ModelName()`/`Predict()`三个接口
|
||||
- 4. 建议统一为`Preprocess`和`Postprocess`两个接口作为前后处理所用
|
||||
|
||||
|
||||
## 其它
|
||||
|
||||
在`vision`中,会提供几类基础的数据结构使用,包括`vision::ClassifyResult`、`vision::DetectionResult`、`vision::SegmentationResult`等作为模型常见的输出结构。 但难免会遇到新的输出结构不在这几类中,对于一定要定制化的数据结构,默认按照下面方式处理
|
||||
|
||||
- 1. 如果是大量模型通用的结构,仍然实现在`vision/common.h`中,作为通用的输出结构
|
||||
- 2. 如果只是某个模型需要,则实现在如`vision/ultralytics/yolov5.h`中,同时需要自行为此结构体进行pybind封装
|
||||
135
docs/tech/runtime.md
Normal file
135
docs/tech/runtime.md
Normal file
@@ -0,0 +1,135 @@
|
||||
# fastdeploy::Runtime
|
||||
|
||||
## FDTensor Runtime的输入输出数据结构
|
||||
|
||||
```
|
||||
struct FDTensor {
|
||||
std::vector<int64_t> shape; // 形状
|
||||
std::string name; // 命名
|
||||
FDDataType dtype; // 数据类型
|
||||
Device device = Device::CPU; // 数据存放设备
|
||||
|
||||
void* MutableData(); // 获取tensor内存buffer指针
|
||||
|
||||
// 获取tensor数据,如若tensor数据在其它设备
|
||||
// 此函数会先将数据拷贝至CPU,再返回指向
|
||||
// CPU内存buffer的指针
|
||||
void* Data();
|
||||
|
||||
// 初始化Tensor,并复用外部数据指针
|
||||
// Tensor的内存buffer将由外部的调用者来创建或释放
|
||||
void SetExternalData(const std::vector<int>& new_shape,
|
||||
const FDDataType& data_type,
|
||||
void* data_buffer
|
||||
const Device& dev);
|
||||
|
||||
int Nbytes() const; // 返回tensor数据字节大小
|
||||
|
||||
int Numel() const; // 返回tensor元素个数
|
||||
|
||||
// Debug函数,打印tensor的信息,包含mean、max、min等
|
||||
void PrintInfo(const std::string& prefix = "TensorInfo");
|
||||
};
|
||||
```
|
||||
|
||||
FDTensor是前后处理与`Runtime`进行对接的数据结构,大多情况下建议通过`SetExternalData`来共享用户传入的数据,减小内存拷贝带来的开销。
|
||||
|
||||
## Runtime 多后端推理引擎
|
||||
|
||||
### RuntimeOption 引擎配置
|
||||
```
|
||||
struct RuntimeOption {
|
||||
// 模型文件和权重文件
|
||||
std::string model_file;
|
||||
std::string params_file;
|
||||
// 模型格式,当前可支持Frontend::PADDLE / Frontend::ONNX
|
||||
Frontend model_format = Frontend::PADDLE;
|
||||
Backend backend = Backend::ORT:
|
||||
|
||||
// CPU上运行时的线程数
|
||||
int cpu_thread_num = 8;
|
||||
|
||||
// 推理硬件,当前支持Device::CPU / Device::GPU
|
||||
// 在CPU/GPU上需与backend进行搭配选择
|
||||
Device device;
|
||||
|
||||
// Backend::ORT的参数
|
||||
int ort_graph_opt_level;
|
||||
int ort_inter_op_num_threads;
|
||||
int ort_execution_mode;
|
||||
|
||||
// Backend::TRT的参数
|
||||
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
||||
std::string trt_serialize_file = "";
|
||||
bool trt_enable_fp16 = false;
|
||||
bool trt_enable_int8 = false;
|
||||
size_t trt_max_batch_size = 32;
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
### Runtime 引擎
|
||||
|
||||
```
|
||||
struct Runtime {
|
||||
// 加载模型,引擎初始化
|
||||
bool Init(const RuntimeOption& _option);
|
||||
|
||||
// 进行推理
|
||||
// 其中输入须正确配置tensor中的name
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
|
||||
|
||||
int NumInputs(); // 输入个数
|
||||
int NumOutputs(); // 输出个数
|
||||
|
||||
TensorInfo GetInputInfo(int index) // 获取输入信息,包括shape, dtype, name
|
||||
TensorInfo GetOutputInfo(int index) // 获取输出信息,包括shape, dtype, name
|
||||
|
||||
RuntimeOption option; // 引擎的配置信息
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
## Runtime使用示例
|
||||
|
||||
### C++
|
||||
|
||||
```
|
||||
#include "fastdeploy/fastdeploy_runtime.h"
|
||||
|
||||
int main() {
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
option.model_file = "resnet50/inference.pdmodel";
|
||||
option.params_file = "resnet50/inference.pdiparams";
|
||||
|
||||
auto runtime = fastdeploy::Runtime();
|
||||
assert(runtime.Init(option));
|
||||
|
||||
// 需准备好输入tensor
|
||||
std::vector<FDTensor> inputs;
|
||||
|
||||
std::vector<FDTensor> outputs;
|
||||
assert(runtime.Infer(tensors, &outputs));
|
||||
|
||||
// 输出tensor的debug信息查看
|
||||
outputs[0].PrintInfo();
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
|
||||
```
|
||||
import fastdeploy as fd
|
||||
import numpy as np
|
||||
|
||||
option = fd.RuntimeOption();
|
||||
option.model_file = "resnet50/inference.pdmodel"
|
||||
option.params_file = "resnet50/inference.pdiparams";
|
||||
|
||||
runtime = fd.Runtime(option)
|
||||
|
||||
result = runtime.infer({"image": np.random.rand(1, 3, 224, 224)});
|
||||
```
|
||||
74
docs/tech/vision.md
Normal file
74
docs/tech/vision.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# Vision
|
||||
|
||||
Vision是FastDeploy中的视觉模型模块,包含`processors`和`utils`两个公共模块,以及模型模块。
|
||||
|
||||
## processors 图像处理模块
|
||||
|
||||
`processors`提供了常见的图像处理操作,并为各操作实现不同的后端,如当前支持的CPU以及GPU两种处理方式,在模型中预算中,开发者调用`processors`提供的API,即可快速在不同的处理后端进行切换。
|
||||
|
||||
默认在CPU上进行处理
|
||||
```
|
||||
namespace vis = fastdeploy::vision;
|
||||
|
||||
im = cv2.imread("test.jpg");
|
||||
|
||||
vis::Mat mat(im);
|
||||
assert(vis::Resize::Run(&mat, 224, 224));
|
||||
assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
|
||||
assert(vis::HWC2CHW::Run(&mat));
|
||||
```
|
||||
|
||||
切换为CUDA GPU进行处理
|
||||
```
|
||||
namespace vis = fastdeploy::vision;
|
||||
vis::Processor::default_lib = vis::ProcessorLib::OPENCV_CUDA;
|
||||
|
||||
im = cv2.imread("test.jpg");
|
||||
|
||||
vis::Mat mat(im);
|
||||
assert(vis::Resize::Run(&mat, 224, 224));
|
||||
assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5}));
|
||||
assert(vis::HWC2CHW::Run(&mat));
|
||||
```
|
||||
|
||||
在处理过程中,通过`fastdeploy::vision::Mat`作为传递的数据结构
|
||||
```
|
||||
struct Mat {
|
||||
Mat(cv::Mat); // 通过`cv::Mat`进行构造
|
||||
FDDataType Type(); // 数值类型
|
||||
int Channels(); // 通道数
|
||||
int Width(); // 宽
|
||||
int Height(); // 高
|
||||
|
||||
// 获取图像,如Mat在GPU上,则会拷贝到CPU上再返回
|
||||
cv::Mat GetCpuMat();
|
||||
|
||||
// 获取图像,如Mat在CPU上,则会拷贝到GPU上再返回
|
||||
cv::cuda::GpuMat GetGpuMat();
|
||||
|
||||
void ShareWithTensor(FDTensor* tensor); // 构造一个FDTensor,并共享内存
|
||||
bool CopyToTensor(FDTensor* tensor); // 构造一个CPU上的FDTensor,并将数据拷贝过去
|
||||
|
||||
Layout layout; // 数据排布,支持Layout::HWC / Layout::CHW
|
||||
Device device; // 数据存放设备,支持Device::CPU / Device::GPU
|
||||
};
|
||||
```
|
||||
|
||||
## utilities模块 工具模块
|
||||
|
||||
提供一些常见的函数,如分类模型常用的`TopK`选择,检测模型的`NMS`操作。同样后面可以考虑将后处理的实现也有不同后端
|
||||
|
||||
|
||||
## visualize 可视化模块
|
||||
|
||||
提供一些可视化函数,如检测、分割、OCR都需要这种函数来看可视化的效果
|
||||
|
||||
## 模型模块
|
||||
|
||||
这个是`Vision`中最重要的模块,所有的模块均通过`域名` + `模型名`来划分,如
|
||||
|
||||
- vision::ppdet::YOLOv3 // PaddleDetection的YOLOv3模型
|
||||
- vision::ppdet::RCNN // PaddleDetection的RCNN类模型
|
||||
- vision::ultralytics::YOLOv5 // https://github.com/ultralytics/yolov5 YOLOv5模型
|
||||
|
||||
模型的增加参考[模型开发](models.md)
|
||||
57
docs/usage/model.md
Normal file
57
docs/usage/model.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# FastDeploy模型
|
||||
|
||||
目前支持的模型如下
|
||||
- [fastdeploy.vision.ppcls.Model](vision/ppcls.md) PaddleClas里的所有分类模型
|
||||
- [fastdeploy.vision.ultralytics/YOLOv5](vision/ultralytics.md) [ultralytics/yolov5](https://github.com/ultralytics/yolov5)模型
|
||||
|
||||
具体模型使用方式可参考各模型文档API和示例说明。 各模型在运行时均有默认的Runtime配置,本文档说明如何修改模型的后端配置,其中如下代码为跑YOLOv5的模型Python示例代码
|
||||
```
|
||||
import fastdeploy as fd
|
||||
model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx")
|
||||
|
||||
import cv2
|
||||
im = cv2.imread('bus.jpg')
|
||||
|
||||
result = model.predict(im)
|
||||
|
||||
print(model.runtime_option)
|
||||
```
|
||||
通过`print(model.runtime_option)`可以看到如下信息
|
||||
```
|
||||
RuntimeOption(
|
||||
backend : Backend.ORT # 当前推理后端为ONNXRuntime
|
||||
cpu_thread_num : 8 # 推理时CPU线程数设置(仅当模型在CPU上推理时有效)
|
||||
device : Device.GPU # 当前推理设备为GPU
|
||||
device_id : 0 # 当前推理设备id为0
|
||||
model_file : yolov5s.onnx # 模型文件路径
|
||||
model_format : Frontend.ONNX # 模型格式,当前为ONNX格式
|
||||
ort_execution_mode : -1 # ONNXRuntime后端的配置参数,-1表示默认
|
||||
ort_graph_opt_level : -1 # ONNXRuntime后端的配置参数, -1表示默认
|
||||
ort_inter_op_num_threads : -1 # ONNXRuntime后端的配置参数,-1表示默认
|
||||
params_file : # 参数文件(ONNX模型无此文件)
|
||||
trt_enable_fp16 : False # TensorRT参数
|
||||
trt_enable_int8 : False # TensorRT参数
|
||||
trt_fixed_shape : {} # TensorRT参数
|
||||
trt_max_batch_size : 32 # TensorRT参数
|
||||
trt_max_shape : {} # TensorRT参数
|
||||
trt_max_workspace_size : 1073741824 # TensorRT参数
|
||||
trt_min_shape : {} # TensorRT参数
|
||||
trt_opt_shape : {} # TensorRT参数
|
||||
trt_serialize_file : # TensorRT参数
|
||||
)
|
||||
```
|
||||
|
||||
会注意到参数名以`ort`开头的,均为ONNXRuntime后端专有的参数;以`trt`的则为TensorRT后端专有的参数。各后端与参数的配置,可参考[RuntimeOption](runtime_option.md)说明。
|
||||
|
||||
## 切换模型推理方式
|
||||
|
||||
一般而言,用户只需关注推理是在哪种Device下即可。 当然有更进一步需求,可以再为Device选择不同的Backend,但配置时注意Device与Backend的搭配。 如Backend::TRT只支持Device为GPU, 而Backend::ORT则同时支持CPU和GPU
|
||||
|
||||
```
|
||||
import fastdeploy as fd
|
||||
option = fd.RuntimeOption()
|
||||
option.device = fd.Device.CPU
|
||||
option.cpu_thread_num = 12
|
||||
model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx", option)
|
||||
print(model.runtime_option)
|
||||
```
|
||||
104
docs/usage/vision/ppcls.md
Normal file
104
docs/usage/vision/ppcls.md
Normal file
@@ -0,0 +1,104 @@
|
||||
# PaddleClas分类模型推理
|
||||
|
||||
PaddleClas模型导出参考[PaddleClas](https://github.com/PaddlePaddle/PaddleClas.git)
|
||||
|
||||
## Python API说明
|
||||
|
||||
### Model类
|
||||
```
|
||||
fastdeploy.vision.ppcls.Model(model_file, params_file, config_file, runtime_option=None, model_format=fastdeploy.Frontend.PADDLE)
|
||||
```
|
||||
|
||||
**参数**
|
||||
|
||||
> * **model_file**(str): 模型文件,如resnet50/inference.pdmodel
|
||||
> * **params_file**(str): 参数文件,如resnet50/inference.pdiparams
|
||||
> * **config_file**(str): 配置文件,来源于PaddleClas提供的推理配置文件,如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)
|
||||
> * **runtime_option**(fd.RuntimeOption): 后端推理的配置, 默认为None,即采用默认配置
|
||||
> * **model_format**(fd.Frontend): 模型格式说明,PaddleClas的模型格式均为Frontend.PADDLE
|
||||
|
||||
#### predict接口
|
||||
```
|
||||
Model.predict(image_data, topk=1)
|
||||
```
|
||||
|
||||
> **参数**
|
||||
>
|
||||
> > * **image_data**(np.ndarray): 输入数据, 注意需为HWC,RGB格式
|
||||
> > * **topk**(int): 取前top的分类
|
||||
|
||||
> **返回结果**
|
||||
>
|
||||
> > * **result**(ClassifyResult):结构体包含`label_ids`和`scores`两个list成员变量,表示类别,和各类别对应的置信度
|
||||
|
||||
### 示例
|
||||
|
||||
> ```
|
||||
> import fastdeploy.vision as vis
|
||||
> import cv2
|
||||
> model = vis.ppcls.Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml")
|
||||
> im = cv2.imread("test.jpeg")
|
||||
> result = model.predict(im, topk=5)
|
||||
> print(result.label_ids[0], result.scores[0])
|
||||
> ```
|
||||
|
||||
## C++ API说明
|
||||
|
||||
需添加头文件`#include "fastdeploy/vision.h"`
|
||||
|
||||
### Model类
|
||||
|
||||
```
|
||||
fastdeploy::vision::ppcls::Model(
|
||||
const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const std::string& config_file,
|
||||
const RuntimeOption& custom_option = RuntimeOption(),
|
||||
const Frontend& model_format = Frontend::PADDLE)
|
||||
```
|
||||
|
||||
**参数**
|
||||
> * **model_file**: 模型文件,如resnet50/inference.pdmodel
|
||||
> * **params_file**: 参数文件,如resnet50/inference.pdiparams
|
||||
> * **config_file**: 配置文件,来源于PaddleClas提供的推理配置文件,如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml)
|
||||
> * **runtime_option**: 后端推理的配置, 不设置的情况下,采用默认配置
|
||||
> * **model_format**: 模型格式说明,PaddleClas的模型格式均为Frontend.PADDLE
|
||||
|
||||
#### Predict接口
|
||||
```
|
||||
bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk = 1)
|
||||
```
|
||||
|
||||
> **参数**
|
||||
> > * **im**: 输入图像数据,须为HWC,RGB格式(注意传入的im在预处理过程中会被修改)
|
||||
> > * **result**: 分类结果
|
||||
> > * **topk**: 取分类结果前topk
|
||||
|
||||
> **返回结果**
|
||||
> > true或false,表示预测成功与否
|
||||
|
||||
### 示例
|
||||
> ```
|
||||
> #include "fastdeploy/vision.h"
|
||||
>
|
||||
> int main() {
|
||||
> typedef vis = fastdeploy::vision;
|
||||
> auto model = vis::ppcls::Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml");
|
||||
>
|
||||
> if (!model.Initialized()) {
|
||||
> std::cerr << "Initialize failed." << std::endl;
|
||||
> return -1;
|
||||
> }
|
||||
>
|
||||
> cv::Mat im = cv::imread("test.jpeg");
|
||||
>
|
||||
> vis::ClassifyResult res;
|
||||
> if (!model.Predict(&im, &res, 5)) {
|
||||
> std::cerr << "Prediction failed." << std::endl;
|
||||
> return -1;
|
||||
> }
|
||||
>
|
||||
> std::cout << res.label_ids[0] << " " << res.scores[0] << std::endl;
|
||||
> return 0;
|
||||
> }
|
||||
```
|
||||
90
external/onnxruntime.cmake
vendored
Normal file
90
external/onnxruntime.cmake
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
set(ONNXRUNTIME_PROJECT "extern_onnxruntime")
|
||||
set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime)
|
||||
set(ONNXRUNTIME_SOURCE_DIR
|
||||
${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT})
|
||||
set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime)
|
||||
set(ONNXRUNTIME_INC_DIR
|
||||
"${ONNXRUNTIME_INSTALL_DIR}/include"
|
||||
CACHE PATH "onnxruntime include directory." FORCE)
|
||||
set(ONNXRUNTIME_LIB_DIR
|
||||
"${ONNXRUNTIME_INSTALL_DIR}/lib"
|
||||
CACHE PATH "onnxruntime lib directory." FORCE)
|
||||
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
|
||||
|
||||
set(ONNXRUNTIME_VERSION "1.11.1")
|
||||
set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/")
|
||||
|
||||
if(WIN32)
|
||||
if(WITH_GPU)
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip")
|
||||
else()
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip")
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}.tgz")
|
||||
else()
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-osx-x86_64-${ONNXRUNTIME_VERSION}.tgz")
|
||||
endif()
|
||||
else()
|
||||
if(WITH_GPU)
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}.tgz")
|
||||
else()
|
||||
set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")
|
||||
endif()
|
||||
endif()
|
||||
set(ONNXRUNTIME_URL "${ONNXRUNTIME_URL_PREFIX}${ONNXRUNTIME_FILENAME}")
|
||||
|
||||
include_directories(${ONNXRUNTIME_INC_DIR}
|
||||
)# For ONNXRUNTIME code to include internal headers.
|
||||
|
||||
if(WIN32)
|
||||
set(ONNXRUNTIME_LIB
|
||||
"${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib"
|
||||
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
|
||||
elseif(APPLE)
|
||||
set(ONNXRUNTIME_LIB
|
||||
"${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.dylib"
|
||||
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
|
||||
else()
|
||||
set(ONNXRUNTIME_LIB
|
||||
"${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so"
|
||||
CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(
|
||||
${ONNXRUNTIME_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
URL ${ONNXRUNTIME_URL}
|
||||
PREFIX ${ONNXRUNTIME_PREFIX_DIR}
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
INSTALL_COMMAND
|
||||
${CMAKE_COMMAND} -E remove_directory ${ONNXRUNTIME_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E make_directory ${ONNXRUNTIME_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E rename ${ONNXRUNTIME_SOURCE_DIR}/lib/ ${ONNXRUNTIME_INSTALL_DIR}/lib &&
|
||||
${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include
|
||||
${ONNXRUNTIME_INC_DIR}
|
||||
BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB})
|
||||
|
||||
add_library(external_onnxruntime STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB})
|
||||
add_dependencies(external_onnxruntime ${ONNXRUNTIME_PROJECT})
|
||||
121
external/opencv.cmake
vendored
Normal file
121
external/opencv.cmake
vendored
Normal file
@@ -0,0 +1,121 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
if(WIN32)
|
||||
find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR})
|
||||
list(APPEND DEPEND_LIBS ${OpenCV_LIBS})
|
||||
else()
|
||||
|
||||
include(ExternalProject)
|
||||
|
||||
set(OPENCV_PROJECT "extern_opencv")
|
||||
set(OPENCV_PREFIX_DIR ${THIRD_PARTY_PATH}/opencv)
|
||||
set(OPENCV_SOURCE_DIR
|
||||
${THIRD_PARTY_PATH}/opencv/src/${OPENCV_PROJECT})
|
||||
set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/opencv)
|
||||
set(OPENCV_INC_DIR
|
||||
"${OPENCV_INSTALL_DIR}/include/"
|
||||
CACHE PATH "opencv include directory." FORCE)
|
||||
set(OPENCV_LIB_DIR
|
||||
"${OPENCV_INSTALL_DIR}/lib"
|
||||
CACHE PATH "opencv lib directory." FORCE)
|
||||
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENCV_LIB_DIR}")
|
||||
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "NOT SUPPORT WINDOWS NOW, OPENCV")
|
||||
elseif(APPLE)
|
||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-arm64-3.4.16.tgz")
|
||||
else()
|
||||
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-x86_64-3.4.16.tgz")
|
||||
endif()
|
||||
else()
|
||||
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-3.4.16.tgz")
|
||||
if(ENABLE_OPENCV_CUDA)
|
||||
set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-gpu-3.4.16.tgz")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include_directories(${OPENCV_INC_DIR}
|
||||
)# For OPENCV code to include internal headers.
|
||||
|
||||
set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib/)
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "NOT SUPPORT WEINDOWS, OPENCV")
|
||||
elseif(APPLE)
|
||||
set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.dylib)
|
||||
set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.dylib)
|
||||
set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.dylib)
|
||||
set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.dylib)
|
||||
else()
|
||||
set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib64)
|
||||
set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.so)
|
||||
set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.so)
|
||||
set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.so)
|
||||
set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.so)
|
||||
set(OPENCV_CUDAARITHM_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaarithm.so)
|
||||
set(OPENCV_CUDAIMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaimgproc.so)
|
||||
set(OPENCV_CUDAWARPING_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudawarping.so)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "NOT SUPPORT WINDOWS, OPENCV")
|
||||
else()
|
||||
ExternalProject_Add(
|
||||
${OPENCV_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
URL ${OPENCV_URL}
|
||||
PREFIX ${OPENCV_PREFIX_DIR}
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
INSTALL_COMMAND
|
||||
${CMAKE_COMMAND} -E remove_directory ${OPENCV_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E make_directory ${OPENCV_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E rename ${OPENCV_SOURCE_LIB} ${OPENCV_INSTALL_DIR}/lib &&
|
||||
${CMAKE_COMMAND} -E copy_directory ${OPENCV_SOURCE_DIR}/include/
|
||||
${OPENCV_INC_DIR}
|
||||
BUILD_BYPRODUCTS ${OPENCV_LIB})
|
||||
endif()
|
||||
|
||||
add_library(external_opencv_core STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_opencv_core PROPERTY IMPORTED_LOCATION ${OPENCV_CORE_LIB})
|
||||
add_library(external_opencv_highgui STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_opencv_highgui PROPERTY IMPORTED_LOCATION ${OPENCV_HIGHGUI_LIB})
|
||||
add_library(external_opencv_imgproc STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_opencv_imgproc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGPROC_LIB})
|
||||
add_library(external_opencv_imgcodesc STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_opencv_imgcodesc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGCODESC_LIB})
|
||||
|
||||
add_dependencies(external_opencv_core ${OPENCV_PROJECT})
|
||||
add_dependencies(external_opencv_highgui ${OPENCV_PROJECT})
|
||||
add_dependencies(external_opencv_imgproc ${OPENCV_PROJECT})
|
||||
add_dependencies(external_opencv_imgcodesc ${OPENCV_PROJECT})
|
||||
|
||||
list(APPEND DEPEND_LIBS external_opencv_core external_opencv_highgui external_opencv_imgproc external_opencv_imgcodesc)
|
||||
|
||||
if(ENABLE_OPENCV_CUDA)
|
||||
add_library(extern_opencv_cudawarping STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET extern_opencv_cudawarping PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAWARPING_LIB})
|
||||
add_dependencies(extern_opencv_cudawarping ${OPENCV_PROJECT})
|
||||
add_library(extern_opencv_cudaarithm STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET extern_opencv_cudaarithm PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAARITHM_LIB})
|
||||
add_dependencies(extern_opencv_cudaarithm ${OPENCV_PROJECT})
|
||||
add_library(extern_opencv_cudaimgproc STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET extern_opencv_cudaimgproc PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAIMGPROC_LIB})
|
||||
add_dependencies(extern_opencv_cudaimgproc ${OPENCV_PROJECT})
|
||||
list(APPEND DEPEND_LIBS extern_opencv_cudawarping extern_opencv_cudaarithm extern_opencv_cudaimgproc)
|
||||
endif()
|
||||
endif(WIN32)
|
||||
80
external/paddle2onnx.cmake
vendored
Normal file
80
external/paddle2onnx.cmake
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
include(ExternalProject)
|
||||
|
||||
set(PADDLE2ONNX_PROJECT "extern_paddle2onnx")
|
||||
set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx)
|
||||
set(PADDLE2ONNX_SOURCE_DIR
|
||||
${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT})
|
||||
set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx)
|
||||
set(PADDLE2ONNX_INC_DIR
|
||||
"${PADDLE2ONNX_INSTALL_DIR}/include"
|
||||
CACHE PATH "paddle2onnx include directory." FORCE)
|
||||
set(PADDLE2ONNX_LIB_DIR
|
||||
"${PADDLE2ONNX_INSTALL_DIR}/lib/"
|
||||
CACHE PATH "onnxruntime lib directory." FORCE)
|
||||
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
|
||||
"${PADDLE2ONNX_LIB_DIR}")
|
||||
|
||||
include_directories(${PADDLE2ONNX_INC_DIR})
|
||||
if(WIN32)
|
||||
set(PADDLE2ONNX_COMPILE_LIB
|
||||
"${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib"
|
||||
CACHE FILEPATH "paddle2onnx compile library." FORCE)
|
||||
elseif(APPLE)
|
||||
set(PADDLE2ONNX_COMPILE_LIB
|
||||
"${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib"
|
||||
CACHE FILEPATH "paddle2onnx compile library." FORCE)
|
||||
else()
|
||||
set(PADDLE2ONNX_COMPILE_LIB
|
||||
"${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so"
|
||||
CACHE FILEPATH "paddle2onnx compile library." FORCE)
|
||||
endif(WIN32)
|
||||
|
||||
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
|
||||
set(PADDLE2ONNX_VERSION "0.9.9")
|
||||
if(WIN32)
|
||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
||||
elseif(APPLE)
|
||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
|
||||
else()
|
||||
set(PADDLE2ONNX_FILE "paddle2onnx-osx-x86_64-${PADDLE2ONNX_VERSION}.tgz")
|
||||
endif()
|
||||
else()
|
||||
set(PADDLE2ONNX_FILE "paddle2onnx-linux-x64-${PADDLE2ONNX_VERSION}.tgz")
|
||||
endif()
|
||||
set(PADDLE2ONNX_URL "${PADDLE2ONNX_URL_BASE}${PADDLE2ONNX_FILE}")
|
||||
|
||||
ExternalProject_Add(
|
||||
${PADDLE2ONNX_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
URL ${PADDLE2ONNX_URL}
|
||||
PREFIX ${PADDLE2ONNX_PREFIX_DIR}
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
UPDATE_COMMAND ""
|
||||
INSTALL_COMMAND
|
||||
${CMAKE_COMMAND} -E remove_directory ${PADDLE2ONNX_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E make_directory ${PADDLE2ONNX_INSTALL_DIR} &&
|
||||
${CMAKE_COMMAND} -E rename ${PADDLE2ONNX_SOURCE_DIR}/lib/
|
||||
${PADDLE2ONNX_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory
|
||||
${PADDLE2ONNX_SOURCE_DIR}/include ${PADDLE2ONNX_INC_DIR}
|
||||
BUILD_BYPRODUCTS ${PADDLE2ONNX_COMPILE_LIB})
|
||||
|
||||
add_library(external_paddle2onnx STATIC IMPORTED GLOBAL)
|
||||
set_property(TARGET external_paddle2onnx PROPERTY IMPORTED_LOCATION
|
||||
${PADDLE2ONNX_COMPILE_LIB})
|
||||
add_dependencies(external_paddle2onnx ${PADDLE2ONNX_PROJECT})
|
||||
44
external/summary.cmake
vendored
Normal file
44
external/summary.cmake
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
function(fastdeploy_summary)
|
||||
message(STATUS "")
|
||||
message(STATUS "*************FastDeploy Building Summary**********")
|
||||
message(STATUS " CMake version : ${CMAKE_VERSION}")
|
||||
message(STATUS " CMake command : ${CMAKE_COMMAND}")
|
||||
message(STATUS " System : ${CMAKE_SYSTEM_NAME}")
|
||||
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
|
||||
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
|
||||
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
|
||||
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
|
||||
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
|
||||
message(STATUS " Compile definitions : ${tmp}")
|
||||
message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}")
|
||||
message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}")
|
||||
message(STATUS "")
|
||||
message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}")
|
||||
message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}")
|
||||
message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}")
|
||||
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
|
||||
if(WITH_GPU)
|
||||
message(STATUS " WITH_GPU : ${WITH_GPU}")
|
||||
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
|
||||
message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}")
|
||||
message(STATUS " TRT_DRECTORY : ${TRT_DIRECTORY}")
|
||||
endif()
|
||||
message(STATUS " ENABLE_VISION : ${ENABLE_VISION}")
|
||||
message(STATUS " ENABLE_DEBUG : ${ENABLE_DEBUG}")
|
||||
message(STATUS " ENABLE_VISION_VISUALIZE : ${ENABLE_VISION_VISUALIZE}")
|
||||
endfunction()
|
||||
15
external/utils.cmake
vendored
Normal file
15
external/utils.cmake
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# This function comes from https://blog.csdn.net/yindongjie1221/article/details/90614261
|
||||
function(redefine_file_macro targetname)
|
||||
get_target_property(source_files "${targetname}" SOURCES)
|
||||
foreach(sourcefile ${source_files})
|
||||
get_property(defs SOURCE "${sourcefile}"
|
||||
PROPERTY COMPILE_DEFINITIONS)
|
||||
get_filename_component(filepath "${sourcefile}" ABSOLUTE)
|
||||
string(REPLACE ${PROJECT_SOURCE_DIR}/ "" relpath ${filepath})
|
||||
list(APPEND defs "__REL_FILE__=\"${relpath}\"")
|
||||
set_property(
|
||||
SOURCE "${sourcefile}"
|
||||
PROPERTY COMPILE_DEFINITIONS ${defs}
|
||||
)
|
||||
endforeach()
|
||||
endfunction()
|
||||
0
fastdeploy/CMakeLists.txt
Normal file
0
fastdeploy/CMakeLists.txt
Normal file
201
fastdeploy/LICENSE
Normal file
201
fastdeploy/LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
734
fastdeploy/ThirdPartyNotices.txt
Normal file
734
fastdeploy/ThirdPartyNotices.txt
Normal file
@@ -0,0 +1,734 @@
|
||||
This project depends on some open source projects, list as below
|
||||
|
||||
--------
|
||||
1. https://github.com/protocolbuffers/protobuf
|
||||
|
||||
Copyright 2008 Google Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Code generated by the Protocol Buffer compiler is owned by the owner
|
||||
of the input file used when generating it. This code is not
|
||||
standalone and requires a support library to be linked with it. This
|
||||
support library is itself covered by the above license.
|
||||
|
||||
--------
|
||||
2. https://github.com/onnx/onnx
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
3. https://github.com/microsoft/onnxruntime
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
--------
|
||||
4. https://github.com/pybind/pybind11
|
||||
|
||||
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
|
||||
external contributions to this project including patches, pull requests, etc.
|
||||
|
||||
--------
|
||||
4. https://github.com/onnx/onnx-tensorrt
|
||||
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2021 NVIDIA Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
5. https://github.com/opencv/opencv
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
--------
|
||||
6. https://github.com/jbeder/yaml-cpp
|
||||
|
||||
Copyright (c) 2008-2015 Jesse Beder.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -11,189 +11,31 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from six import text_type as _text_type
|
||||
from .download import download, download_and_decompress
|
||||
|
||||
import argparse
|
||||
|
||||
# Since the source code is not fully open sourced,
|
||||
# currently we will provide the prebuilt library
|
||||
# and demo codes
|
||||
import os
|
||||
|
||||
__version__ = "0.1.0"
|
||||
import logging
|
||||
from .fastdeploy_main import Frontend, Backend, FDDataType, TensorInfo, RuntimeOption, Device
|
||||
from .fastdeploy_runtime import *
|
||||
from . import fastdeploy_main as C
|
||||
from . import vision
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=_text_type,
|
||||
default=None,
|
||||
help='Name of model, which can be listed by --list_models')
|
||||
parser.add_argument(
|
||||
'--platform',
|
||||
type=_text_type,
|
||||
default=None,
|
||||
help='Define platform, supports Windows/Linux/Android/iOS.')
|
||||
parser.add_argument(
|
||||
'--soc',
|
||||
type=_text_type,
|
||||
default=None,
|
||||
help='Define soc for the platform, supports x86/x86-NVIDIA_GPU/ARM/jetson.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=_text_type,
|
||||
default=".",
|
||||
help='Path to download and extract deployment SDK.')
|
||||
parser.add_argument(
|
||||
'--list_models',
|
||||
required=False,
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='List all the supported models.')
|
||||
parser.add_argument(
|
||||
'--download_sdk',
|
||||
required=False,
|
||||
action="store_true",
|
||||
default=False,
|
||||
help='List all the supported models.')
|
||||
|
||||
return parser.parse_args()
|
||||
def TensorInfoStr(tensor_info):
|
||||
message = "TensorInfo(name : '{}', dtype : '{}', shape : '{}')".format(
|
||||
tensor_info.name, tensor_info.dtype, tensor_info.shape)
|
||||
return message
|
||||
|
||||
|
||||
def read_sources():
|
||||
user_dir = os.path.expanduser('~')
|
||||
print("Updating the newest sdk information...")
|
||||
source_cfgs = "https://bj.bcebos.com/paddlehub/fastdeploy/fastdeploy_newest_sources.cfg.1"
|
||||
if os.path.exists(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")):
|
||||
os.remove(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1"))
|
||||
download(source_cfgs, user_dir)
|
||||
categories = dict()
|
||||
res = dict()
|
||||
with open(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) as f:
|
||||
for line in f:
|
||||
if line.strip().startswith("#"):
|
||||
continue
|
||||
if line.strip() == "":
|
||||
continue
|
||||
category, model, plat, soc, url = line.strip().split('\t')
|
||||
if category not in categories:
|
||||
categories[category] = set()
|
||||
categories[category].add(model)
|
||||
if model not in res:
|
||||
res[model] = dict()
|
||||
if plat not in res[model]:
|
||||
res[model][plat] = dict()
|
||||
if soc not in res[model][plat]:
|
||||
res[model][plat][soc] = dict()
|
||||
res[model][plat][soc] = url
|
||||
return categories, res
|
||||
def RuntimeOptionStr(runtime_option):
|
||||
attrs = dir(runtime_option)
|
||||
message = "RuntimeOption(\n"
|
||||
for attr in attrs:
|
||||
if attr.startswith("__"):
|
||||
continue
|
||||
message += " {} : {}\t\n".format(attr, getattr(runtime_option, attr))
|
||||
message.strip("\n")
|
||||
message += ")"
|
||||
return message
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
if not args.list_models and not args.download_sdk:
|
||||
print(
|
||||
"Please use flag --list_models to show all the supported models, or use flag --download_sdk to download the specify SDK to deploy you model."
|
||||
)
|
||||
return
|
||||
|
||||
categories, all_sources = read_sources()
|
||||
all_models = list(all_sources.keys())
|
||||
all_models.sort()
|
||||
|
||||
if args.list_models:
|
||||
print("Currently, FastDeploy supports {} models, list as below,\n".format(
|
||||
len(all_models)))
|
||||
|
||||
for k, v in categories.items():
|
||||
print("\nModel Category: {}".format(k))
|
||||
print("_"*100)
|
||||
models = list(categories[k])
|
||||
models.sort()
|
||||
i = 0
|
||||
while i < len(models):
|
||||
if i == len(models) - 1:
|
||||
print(models[i].center(30))
|
||||
i += 1
|
||||
elif i == len(models) - 2:
|
||||
print(models[i].center(30), models[i+1].center(30))
|
||||
i += 2
|
||||
else:
|
||||
print(models[i].center(30), models[i+1].center(30), models[i+2].center(30))
|
||||
i += 3
|
||||
return
|
||||
|
||||
if not os.path.exists(args.save_dir):
|
||||
print("The specified save_dir: {} is not exist.".format(args.save_dir))
|
||||
return
|
||||
|
||||
if args.model is None or args.model == "":
|
||||
print(
|
||||
"Please define --model to choose which kind of model to deploy, use --list_models to show all the supported models."
|
||||
)
|
||||
return
|
||||
|
||||
if args.model not in all_sources:
|
||||
print(
|
||||
"{} is not supported, use --list_models to list all the models FastDeploy supported.".
|
||||
format(args.model))
|
||||
return
|
||||
|
||||
if args.platform is None or args.platform == "":
|
||||
print(
|
||||
"Please define --platform to choose which platform to deploy, supports windows/linux/android/ios."
|
||||
)
|
||||
return
|
||||
|
||||
if args.platform not in all_sources[args.model]:
|
||||
print(
|
||||
"The model:{} only supports platform of {}, {} is not supported now.".
|
||||
format(args.model,
|
||||
list(all_sources[args.model].keys()), args.platform))
|
||||
return
|
||||
|
||||
if args.soc is None or args.soc == "":
|
||||
print(
|
||||
"Please define --soc to choose which hardware to deploy, for model:{} and platform:{}, the available socs are {}.".
|
||||
format(args.model, args.platform,
|
||||
list(all_sources[args.model][args.platform].keys())))
|
||||
return
|
||||
|
||||
if args.soc not in all_sources[args.model][args.platform]:
|
||||
print(
|
||||
"The model:{} in platform:{} only supports soc of {}, {} is not supported now.".
|
||||
format(args.model, args.platform,
|
||||
list(all_sources[args.model][args.platform].keys()),
|
||||
args.soc))
|
||||
return
|
||||
|
||||
print("\nDownloading SDK:",
|
||||
all_sources[args.model][args.platform][args.soc])
|
||||
|
||||
save_dir = args.save_dir
|
||||
sdk_name = os.path.split(all_sources[args.model][args.platform][args.soc])[
|
||||
-1].strip()
|
||||
if all_sources[args.model][args.platform][args.soc].count(".zip") > 0:
|
||||
sdk_name = os.path.split(all_sources[args.model][args.platform][
|
||||
args.soc])[-1].strip().split(".zip")[0]
|
||||
new_save_dir = os.path.join(args.save_dir, sdk_name)
|
||||
if not os.path.exists(new_save_dir):
|
||||
os.mkdir(new_save_dir)
|
||||
save_dir = new_save_dir
|
||||
download_and_decompress(
|
||||
all_sources[args.model][args.platform][args.soc],
|
||||
new_save_dir,
|
||||
rename=sdk_name + ".zip")
|
||||
os.remove(os.path.join(new_save_dir, sdk_name + ".zip"))
|
||||
print("Done. All the files of SDK have been extracted in {}.".format(
|
||||
new_save_dir))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
C.TensorInfo.__repr__ = TensorInfoStr
|
||||
C.RuntimeOption.__repr__ = RuntimeOptionStr
|
||||
|
||||
48
fastdeploy/backends/backend.h
Normal file
48
fastdeploy/backends/backend.h
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
struct TensorInfo {
|
||||
std::string name;
|
||||
std::vector<int> shape;
|
||||
FDDataType dtype;
|
||||
};
|
||||
|
||||
class BaseBackend {
|
||||
public:
|
||||
bool initialized_ = false;
|
||||
|
||||
BaseBackend() {}
|
||||
|
||||
virtual bool Initialized() const { return initialized_; }
|
||||
|
||||
virtual int NumInputs() const = 0;
|
||||
virtual int NumOutputs() const = 0;
|
||||
virtual TensorInfo GetInputInfo(int index) = 0;
|
||||
virtual TensorInfo GetOutputInfo(int index) = 0;
|
||||
virtual bool Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs) = 0;
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
278
fastdeploy/backends/ort/ort_backend.cc
Normal file
278
fastdeploy/backends/ort/ort_backend.cc
Normal file
@@ -0,0 +1,278 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/backends/ort/ort_backend.h"
|
||||
#include "fastdeploy/backends/ort/utils.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#include <memory>
|
||||
#ifdef ENABLE_PADDLE_FRONTEND
|
||||
#include "paddle2onnx/converter.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
ONNXTensorElementDataType GetOrtDtype(FDDataType fd_dtype) {
|
||||
if (fd_dtype == FDDataType::FP32) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
|
||||
} else if (fd_dtype == FDDataType::FP64) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
|
||||
} else if (fd_dtype == FDDataType::INT32) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
|
||||
} else if (fd_dtype == FDDataType::INT64) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
|
||||
}
|
||||
FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype)
|
||||
<< "." << std::endl;
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
FDDataType GetFdDtype(ONNXTensorElementDataType ort_dtype) {
|
||||
if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
|
||||
return FDDataType::FP32;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
|
||||
return FDDataType::FP64;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
|
||||
return FDDataType::INT32;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
|
||||
return FDDataType::INT64;
|
||||
}
|
||||
FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
|
||||
return FDDataType::FP32;
|
||||
}
|
||||
|
||||
void OrtBackend::BuildOption(const OrtBackendOption& option) {
|
||||
option_ = option;
|
||||
if (option.graph_optimization_level >= 0) {
|
||||
session_options_.SetGraphOptimizationLevel(
|
||||
GraphOptimizationLevel(option.graph_optimization_level));
|
||||
}
|
||||
if (option.intra_op_num_threads >= 0) {
|
||||
session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
|
||||
}
|
||||
if (option.inter_op_num_threads >= 0) {
|
||||
session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
|
||||
}
|
||||
if (option.execution_mode >= 0) {
|
||||
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
|
||||
}
|
||||
if (option.use_gpu) {
|
||||
auto all_providers = Ort::GetAvailableProviders();
|
||||
bool support_cuda = false;
|
||||
std::string providers_msg = "";
|
||||
for (size_t i = 0; i < all_providers.size(); ++i) {
|
||||
providers_msg = providers_msg + all_providers[i] + ", ";
|
||||
if (all_providers[i] == "CUDAExecutionProvider") {
|
||||
support_cuda = true;
|
||||
}
|
||||
}
|
||||
if (!support_cuda) {
|
||||
FDLogger() << "[WARN] Compiled fastdeploy with onnxruntime doesn't "
|
||||
"support GPU, the available providers are "
|
||||
<< providers_msg << "will fallback to CPUExecutionProvider."
|
||||
<< std::endl;
|
||||
option_.use_gpu = false;
|
||||
} else {
|
||||
FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " +
|
||||
std::to_string(option.gpu_id) + ".");
|
||||
OrtCUDAProviderOptions cuda_options;
|
||||
cuda_options.device_id = option.gpu_id;
|
||||
session_options_.AppendExecutionProvider_CUDA(cuda_options);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const OrtBackendOption& option, bool verbose) {
|
||||
if (initialized_) {
|
||||
FDERROR << "OrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
#ifdef ENABLE_PADDLE_FRONTEND
|
||||
char* model_content_ptr;
|
||||
int model_content_size = 0;
|
||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||
&model_content_ptr, &model_content_size, 11, true,
|
||||
verbose, true, true, true)) {
|
||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
std::string onnx_model_proto(model_content_ptr,
|
||||
model_content_ptr + model_content_size);
|
||||
delete model_content_ptr;
|
||||
model_content_ptr = nullptr;
|
||||
return InitFromOnnx(onnx_model_proto, option, true);
|
||||
#else
|
||||
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
|
||||
"call `InitFromOnnx` instead."
|
||||
<< std::endl;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
const OrtBackendOption& option,
|
||||
bool from_memory_buffer) {
|
||||
if (initialized_) {
|
||||
FDERROR << "OrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
BuildOption(option);
|
||||
if (from_memory_buffer) {
|
||||
session_ = {env_, model_file.data(), model_file.size(), session_options_};
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
session_ = {env_,
|
||||
std::wstring(model_file.begin(), model_file.end()).c_str(),
|
||||
session_options_};
|
||||
#else
|
||||
session_ = {env_, model_file.c_str(), session_options_};
|
||||
#endif
|
||||
}
|
||||
binding_ = std::make_shared<Ort::IoBinding>(session_);
|
||||
|
||||
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
Ort::Allocator allocator(session_, memory_info);
|
||||
size_t n_inputs = session_.GetInputCount();
|
||||
for (size_t i = 0; i < n_inputs; ++i) {
|
||||
auto input_name = session_.GetInputName(i, allocator);
|
||||
auto type_info = session_.GetInputTypeInfo(i);
|
||||
std::vector<int64_t> shape =
|
||||
type_info.GetTensorTypeAndShapeInfo().GetShape();
|
||||
ONNXTensorElementDataType data_type =
|
||||
type_info.GetTensorTypeAndShapeInfo().GetElementType();
|
||||
inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type});
|
||||
allocator.Free(input_name);
|
||||
}
|
||||
|
||||
size_t n_outputs = session_.GetOutputCount();
|
||||
for (size_t i = 0; i < n_outputs; ++i) {
|
||||
auto output_name = session_.GetOutputName(i, allocator);
|
||||
auto type_info = session_.GetOutputTypeInfo(i);
|
||||
std::vector<int64_t> shape =
|
||||
type_info.GetTensorTypeAndShapeInfo().GetShape();
|
||||
ONNXTensorElementDataType data_type =
|
||||
type_info.GetTensorTypeAndShapeInfo().GetElementType();
|
||||
outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type});
|
||||
|
||||
Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0,
|
||||
OrtMemTypeDefault);
|
||||
binding_->BindOutput(output_name, out_memory_info);
|
||||
|
||||
allocator.Free(output_name);
|
||||
}
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) {
|
||||
const auto info = value.GetTensorTypeAndShapeInfo();
|
||||
const auto data_type = info.GetElementType();
|
||||
size_t numel = info.GetElementCount();
|
||||
tensor->shape = info.GetShape();
|
||||
|
||||
if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
|
||||
tensor->data.resize(numel * sizeof(float));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(float));
|
||||
tensor->dtype = FDDataType::FP32;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
|
||||
tensor->data.resize(numel * sizeof(int32_t));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(int32_t));
|
||||
tensor->dtype = FDDataType::INT32;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
|
||||
tensor->data.resize(numel * sizeof(int64_t));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(int64_t));
|
||||
tensor->dtype = FDDataType::INT64;
|
||||
} else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
|
||||
tensor->data.resize(numel * sizeof(double));
|
||||
memcpy(static_cast<void*>(tensor->Data()), value.GetTensorData<void*>(),
|
||||
numel * sizeof(double));
|
||||
tensor->dtype = FDDataType::FP64;
|
||||
} else {
|
||||
FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) +
|
||||
" while calling OrtBackend::CopyToCpu().");
|
||||
}
|
||||
}
|
||||
|
||||
bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs) {
|
||||
if (inputs.size() != inputs_desc_.size()) {
|
||||
FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size()
|
||||
<< ") should keep same with the inputs of this model("
|
||||
<< inputs_desc_.size() << ")." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// from FDTensor to Ort Inputs
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
|
||||
binding_->BindInput(inputs[i].name.c_str(), ort_value);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0,
|
||||
OrtMemTypeDefault);
|
||||
binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info);
|
||||
}
|
||||
|
||||
// Inference with inputs
|
||||
try {
|
||||
session_.Run({}, *(binding_.get()));
|
||||
} catch (const std::exception& e) {
|
||||
FDERROR << "Failed to Infer: " << e.what() << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy result after inference
|
||||
std::vector<Ort::Value> ort_outputs = binding_->GetOutputValues();
|
||||
outputs->resize(ort_outputs.size());
|
||||
for (size_t i = 0; i < ort_outputs.size(); ++i) {
|
||||
(*outputs)[i].name = outputs_desc_[i].name;
|
||||
CopyToCpu(ort_outputs[i], &((*outputs)[i]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TensorInfo OrtBackend::GetInputInfo(int index) {
|
||||
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
|
||||
" should less than the number of inputs:" +
|
||||
std::to_string(NumInputs()) + ".");
|
||||
TensorInfo info;
|
||||
info.name = inputs_desc_[index].name;
|
||||
info.shape.assign(inputs_desc_[index].shape.begin(),
|
||||
inputs_desc_[index].shape.end());
|
||||
info.dtype = GetFdDtype(inputs_desc_[index].dtype);
|
||||
return info;
|
||||
}
|
||||
|
||||
TensorInfo OrtBackend::GetOutputInfo(int index) {
|
||||
FDASSERT(index < NumOutputs(),
|
||||
"The index:" + std::to_string(index) +
|
||||
" should less than the number of outputs:" +
|
||||
std::to_string(NumOutputs()) + ".");
|
||||
TensorInfo info;
|
||||
info.name = outputs_desc_[index].name;
|
||||
info.shape.assign(outputs_desc_[index].shape.begin(),
|
||||
outputs_desc_[index].shape.end());
|
||||
info.dtype = GetFdDtype(outputs_desc_[index].dtype);
|
||||
return info;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
84
fastdeploy/backends/ort/ort_backend.h
Normal file
84
fastdeploy/backends/ort/ort_backend.h
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
struct OrtValueInfo {
|
||||
std::string name;
|
||||
std::vector<int64_t> shape;
|
||||
ONNXTensorElementDataType dtype;
|
||||
};
|
||||
|
||||
struct OrtBackendOption {
|
||||
// -1 means default
|
||||
// 0: ORT_DISABLE_ALL
|
||||
// 1: ORT_ENABLE_BASIC
|
||||
// 2: ORT_ENABLE_EXTENDED
|
||||
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
|
||||
int graph_optimization_level = -1;
|
||||
int intra_op_num_threads = -1;
|
||||
int inter_op_num_threads = -1;
|
||||
// 0: ORT_SEQUENTIAL
|
||||
// 1: ORT_PARALLEL
|
||||
int execution_mode = -1;
|
||||
bool use_gpu = false;
|
||||
int gpu_id = 0;
|
||||
};
|
||||
|
||||
class OrtBackend : public BaseBackend {
|
||||
public:
|
||||
OrtBackend() {}
|
||||
void BuildOption(const OrtBackendOption& option);
|
||||
|
||||
bool InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const OrtBackendOption& option = OrtBackendOption(),
|
||||
bool verbose = false);
|
||||
|
||||
bool InitFromOnnx(const std::string& model_file,
|
||||
const OrtBackendOption& option = OrtBackendOption(),
|
||||
bool from_memory_buffer = false);
|
||||
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
|
||||
|
||||
int NumInputs() const { return inputs_desc_.size(); }
|
||||
|
||||
int NumOutputs() const { return outputs_desc_.size(); }
|
||||
|
||||
TensorInfo GetInputInfo(int index);
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
|
||||
private:
|
||||
Ort::Env env_;
|
||||
Ort::Session session_{nullptr};
|
||||
Ort::SessionOptions session_options_;
|
||||
std::shared_ptr<Ort::IoBinding> binding_;
|
||||
std::vector<OrtValueInfo> inputs_desc_;
|
||||
std::vector<OrtValueInfo> outputs_desc_;
|
||||
|
||||
OrtBackendOption option_;
|
||||
|
||||
void CopyToCpu(const Ort::Value& value, FDTensor* tensor);
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
67
fastdeploy/backends/ort/utils.cc
Normal file
67
fastdeploy/backends/ort/utils.cc
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/backends/ort/utils.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) {
|
||||
if (fd_dtype == FDDataType::FP32) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
|
||||
} else if (fd_dtype == FDDataType::FP64) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
|
||||
} else if (fd_dtype == FDDataType::INT32) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32;
|
||||
} else if (fd_dtype == FDDataType::INT64) {
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
|
||||
}
|
||||
FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype)
|
||||
<< "." << std::endl;
|
||||
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) {
|
||||
if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
|
||||
return FDDataType::FP32;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) {
|
||||
return FDDataType::FP64;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
|
||||
return FDDataType::INT32;
|
||||
} else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
|
||||
return FDDataType::INT64;
|
||||
}
|
||||
FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl;
|
||||
return FDDataType::FP32;
|
||||
}
|
||||
|
||||
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) {
|
||||
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
|
||||
"Only support tensor which device is CPU or GPU for OrtBackend.");
|
||||
if (tensor.device == Device::GPU && is_backend_cuda) {
|
||||
Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0,
|
||||
OrtMemTypeDefault);
|
||||
auto ort_value = Ort::Value::CreateTensor(
|
||||
memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(),
|
||||
tensor.shape.size(), GetOrtDtype(tensor.dtype));
|
||||
return ort_value;
|
||||
}
|
||||
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
auto ort_value = Ort::Value::CreateTensor(
|
||||
memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(),
|
||||
tensor.shape.size(), GetOrtDtype(tensor.dtype));
|
||||
return ort_value;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
39
fastdeploy/backends/ort/utils.h
Normal file
39
fastdeploy/backends/ort/utils.h
Normal file
@@ -0,0 +1,39 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
// Convert FDDataType to OrtDataType
|
||||
ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype);
|
||||
|
||||
// Convert OrtDataType to FDDataType
|
||||
FDDataType GetFdDtype(const ONNXTensorElementDataType* ort_dtype);
|
||||
|
||||
// Create Ort::Value
|
||||
// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider
|
||||
// While is_backend_cuda = true, and tensor.device = Device::GPU
|
||||
// Will directly share the cuda data in tensor to OrtValue
|
||||
Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false);
|
||||
|
||||
} // namespace fastdeploy
|
||||
342
fastdeploy/backends/tensorrt/common/BatchStream.h
Normal file
342
fastdeploy/backends/tensorrt/common/BatchStream.h
Normal file
@@ -0,0 +1,342 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef BATCH_STREAM_H
|
||||
#define BATCH_STREAM_H
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "common.h"
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
class IBatchStream {
|
||||
public:
|
||||
virtual void reset(int firstBatch) = 0;
|
||||
virtual bool next() = 0;
|
||||
virtual void skip(int skipCount) = 0;
|
||||
virtual float* getBatch() = 0;
|
||||
virtual float* getLabels() = 0;
|
||||
virtual int getBatchesRead() const = 0;
|
||||
virtual int getBatchSize() const = 0;
|
||||
virtual nvinfer1::Dims getDims() const = 0;
|
||||
};
|
||||
|
||||
class MNISTBatchStream : public IBatchStream {
|
||||
public:
|
||||
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile,
|
||||
const std::string& labelsFile,
|
||||
const std::vector<std::string>& directories)
|
||||
: mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}}
|
||||
//!< We already know the dimensions of MNIST images.
|
||||
{
|
||||
readDataFile(locateFile(dataFile, directories));
|
||||
readLabelsFile(locateFile(labelsFile, directories));
|
||||
}
|
||||
|
||||
void reset(int firstBatch) override { mBatchCount = firstBatch; }
|
||||
|
||||
bool next() override {
|
||||
if (mBatchCount >= mMaxBatches) {
|
||||
return false;
|
||||
}
|
||||
++mBatchCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
void skip(int skipCount) override { mBatchCount += skipCount; }
|
||||
|
||||
float* getBatch() override {
|
||||
return mData.data() +
|
||||
(mBatchCount * mBatchSize * samplesCommon::volume(mDims));
|
||||
}
|
||||
|
||||
float* getLabels() override {
|
||||
return mLabels.data() + (mBatchCount * mBatchSize);
|
||||
}
|
||||
|
||||
int getBatchesRead() const override { return mBatchCount; }
|
||||
|
||||
int getBatchSize() const override { return mBatchSize; }
|
||||
|
||||
nvinfer1::Dims getDims() const override {
|
||||
return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
|
||||
}
|
||||
|
||||
private:
|
||||
void readDataFile(const std::string& dataFilePath) {
|
||||
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
|
||||
|
||||
int magicNumber, numImages, imageH, imageW;
|
||||
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
|
||||
// All values in the MNIST files are big endian.
|
||||
magicNumber = samplesCommon::swapEndianness(magicNumber);
|
||||
ASSERT(magicNumber == 2051 &&
|
||||
"Magic Number does not match the expected value for an MNIST image "
|
||||
"set");
|
||||
|
||||
// Read number of images and dimensions
|
||||
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
|
||||
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
|
||||
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
|
||||
|
||||
numImages = samplesCommon::swapEndianness(numImages);
|
||||
imageH = samplesCommon::swapEndianness(imageH);
|
||||
imageW = samplesCommon::swapEndianness(imageW);
|
||||
|
||||
// The MNIST data is made up of unsigned bytes, so we need to cast to float
|
||||
// and normalize.
|
||||
int numElements = numImages * imageH * imageW;
|
||||
std::vector<uint8_t> rawData(numElements);
|
||||
file.read(reinterpret_cast<char*>(rawData.data()),
|
||||
numElements * sizeof(uint8_t));
|
||||
mData.resize(numElements);
|
||||
std::transform(rawData.begin(), rawData.end(), mData.begin(),
|
||||
[](uint8_t val) { return static_cast<float>(val) / 255.f; });
|
||||
}
|
||||
|
||||
void readLabelsFile(const std::string& labelsFilePath) {
|
||||
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
|
||||
int magicNumber, numImages;
|
||||
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
|
||||
// All values in the MNIST files are big endian.
|
||||
magicNumber = samplesCommon::swapEndianness(magicNumber);
|
||||
ASSERT(magicNumber == 2049 &&
|
||||
"Magic Number does not match the expected value for an MNIST labels "
|
||||
"file");
|
||||
|
||||
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
|
||||
numImages = samplesCommon::swapEndianness(numImages);
|
||||
|
||||
std::vector<uint8_t> rawLabels(numImages);
|
||||
file.read(reinterpret_cast<char*>(rawLabels.data()),
|
||||
numImages * sizeof(uint8_t));
|
||||
mLabels.resize(numImages);
|
||||
std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(),
|
||||
[](uint8_t val) { return static_cast<float>(val); });
|
||||
}
|
||||
|
||||
int mBatchSize{0};
|
||||
int mBatchCount{
|
||||
0}; //!< The batch that will be read on the next invocation of next()
|
||||
int mMaxBatches{0};
|
||||
Dims mDims{};
|
||||
std::vector<float> mData{};
|
||||
std::vector<float> mLabels{};
|
||||
};
|
||||
|
||||
class BatchStream : public IBatchStream {
|
||||
public:
|
||||
BatchStream(int batchSize, int maxBatches, std::string prefix,
|
||||
std::string suffix, std::vector<std::string> directories)
|
||||
: mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix),
|
||||
mSuffix(suffix), mDataDir(directories) {
|
||||
FILE* file = fopen(
|
||||
locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(),
|
||||
"rb");
|
||||
ASSERT(file != nullptr);
|
||||
int d[4];
|
||||
size_t readSize = fread(d, sizeof(int), 4, file);
|
||||
ASSERT(readSize == 4);
|
||||
mDims.nbDims = 4; // The number of dimensions.
|
||||
mDims.d[0] = d[0]; // Batch Size
|
||||
mDims.d[1] = d[1]; // Channels
|
||||
mDims.d[2] = d[2]; // Height
|
||||
mDims.d[3] = d[3]; // Width
|
||||
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 &&
|
||||
mDims.d[3] > 0);
|
||||
fclose(file);
|
||||
|
||||
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
|
||||
mBatch.resize(mBatchSize * mImageSize, 0);
|
||||
mLabels.resize(mBatchSize, 0);
|
||||
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
|
||||
mFileLabels.resize(mDims.d[0], 0);
|
||||
reset(0);
|
||||
}
|
||||
|
||||
BatchStream(int batchSize, int maxBatches, std::string prefix,
|
||||
std::vector<std::string> directories)
|
||||
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {}
|
||||
|
||||
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims,
|
||||
std::string listFile, std::vector<std::string> directories)
|
||||
: mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims),
|
||||
mListFile(listFile), mDataDir(directories) {
|
||||
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
|
||||
mBatch.resize(mBatchSize * mImageSize, 0);
|
||||
mLabels.resize(mBatchSize, 0);
|
||||
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
|
||||
mFileLabels.resize(mDims.d[0], 0);
|
||||
reset(0);
|
||||
}
|
||||
|
||||
// Resets data members
|
||||
void reset(int firstBatch) override {
|
||||
mBatchCount = 0;
|
||||
mFileCount = 0;
|
||||
mFileBatchPos = mDims.d[0];
|
||||
skip(firstBatch);
|
||||
}
|
||||
|
||||
// Advance to next batch and return true, or return false if there is no batch
|
||||
// left.
|
||||
bool next() override {
|
||||
if (mBatchCount == mMaxBatches) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
|
||||
batchPos += csize, mFileBatchPos += csize) {
|
||||
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
|
||||
if (mFileBatchPos == mDims.d[0] && !update()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// copy the smaller of: elements left to fulfill the request, or elements
|
||||
// left in the file buffer.
|
||||
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
|
||||
std::copy_n(getFileBatch() + mFileBatchPos * mImageSize,
|
||||
csize * mImageSize, getBatch() + batchPos * mImageSize);
|
||||
std::copy_n(getFileLabels() + mFileBatchPos, csize,
|
||||
getLabels() + batchPos);
|
||||
}
|
||||
mBatchCount++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Skips the batches
|
||||
void skip(int skipCount) override {
|
||||
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
|
||||
mFileBatchPos == mDims.d[0]) {
|
||||
mFileCount += skipCount * mBatchSize / mDims.d[0];
|
||||
return;
|
||||
}
|
||||
|
||||
int x = mBatchCount;
|
||||
for (int i = 0; i < skipCount; i++) {
|
||||
next();
|
||||
}
|
||||
mBatchCount = x;
|
||||
}
|
||||
|
||||
float* getBatch() override { return mBatch.data(); }
|
||||
|
||||
float* getLabels() override { return mLabels.data(); }
|
||||
|
||||
int getBatchesRead() const override { return mBatchCount; }
|
||||
|
||||
int getBatchSize() const override { return mBatchSize; }
|
||||
|
||||
nvinfer1::Dims getDims() const override { return mDims; }
|
||||
|
||||
private:
|
||||
float* getFileBatch() { return mFileBatch.data(); }
|
||||
|
||||
float* getFileLabels() { return mFileLabels.data(); }
|
||||
|
||||
bool update() {
|
||||
if (mListFile.empty()) {
|
||||
std::string inputFileName = locateFile(
|
||||
mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
|
||||
FILE* file = fopen(inputFileName.c_str(), "rb");
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int d[4];
|
||||
size_t readSize = fread(d, sizeof(int), 4, file);
|
||||
ASSERT(readSize == 4);
|
||||
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] &&
|
||||
mDims.d[3] == d[3]);
|
||||
size_t readInputCount =
|
||||
fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
|
||||
ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize));
|
||||
size_t readLabelCount =
|
||||
fread(getFileLabels(), sizeof(float), mDims.d[0], file);
|
||||
ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0]));
|
||||
|
||||
fclose(file);
|
||||
} else {
|
||||
std::vector<std::string> fNames;
|
||||
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
|
||||
file.seekg(((mBatchCount * mBatchSize)) * 7);
|
||||
|
||||
for (int i = 1; i <= mBatchSize; i++) {
|
||||
std::string sName;
|
||||
std::getline(file, sName);
|
||||
sName = sName + ".ppm";
|
||||
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
|
||||
fNames.emplace_back(sName);
|
||||
}
|
||||
|
||||
mFileCount++;
|
||||
|
||||
const int imageC = 3;
|
||||
const int imageH = 300;
|
||||
const int imageW = 300;
|
||||
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(
|
||||
fNames.size());
|
||||
for (uint32_t i = 0; i < fNames.size(); ++i) {
|
||||
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
|
||||
}
|
||||
|
||||
std::vector<float> data(samplesCommon::volume(mDims));
|
||||
const float scale = 2.0 / 255.0;
|
||||
const float bias = 1.0;
|
||||
long int volChl = mDims.d[2] * mDims.d[3];
|
||||
|
||||
// Normalize input data
|
||||
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3];
|
||||
i < mBatchSize; ++i) {
|
||||
for (int c = 0; c < mDims.d[1]; ++c) {
|
||||
for (int j = 0; j < volChl; ++j) {
|
||||
data[i * volImg + c * volChl + j] =
|
||||
scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
|
||||
}
|
||||
|
||||
mFileBatchPos = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
int mBatchSize{0};
|
||||
int mMaxBatches{0};
|
||||
int mBatchCount{0};
|
||||
int mFileCount{0};
|
||||
int mFileBatchPos{0};
|
||||
int mImageSize{0};
|
||||
std::vector<float> mBatch; //!< Data for the batch
|
||||
std::vector<float> mLabels; //!< Labels for the batch
|
||||
std::vector<float> mFileBatch; //!< List of image files
|
||||
std::vector<float> mFileLabels; //!< List of label files
|
||||
std::string mPrefix; //!< Batch file name prefix
|
||||
std::string mSuffix; //!< Batch file name suffix
|
||||
nvinfer1::Dims mDims; //!< Input dimensions
|
||||
std::string mListFile; //!< File name of the list of image names
|
||||
std::vector<std::string>
|
||||
mDataDir; //!< Directories where the files can be found
|
||||
};
|
||||
|
||||
#endif
|
||||
1
fastdeploy/backends/tensorrt/common/CPPLINT.cfg
Normal file
1
fastdeploy/backends/tensorrt/common/CPPLINT.cfg
Normal file
@@ -0,0 +1 @@
|
||||
exclude_files=.*
|
||||
118
fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
Normal file
118
fastdeploy/backends/tensorrt/common/EntropyCalibrator.h
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ENTROPY_CALIBRATOR_H
|
||||
#define ENTROPY_CALIBRATOR_H
|
||||
|
||||
#include "BatchStream.h"
|
||||
#include "NvInfer.h"
|
||||
|
||||
//! \class EntropyCalibratorImpl
|
||||
//!
|
||||
//! \brief Implements common functionality for Entropy calibrators.
|
||||
//!
|
||||
template <typename TBatchStream> class EntropyCalibratorImpl {
|
||||
public:
|
||||
EntropyCalibratorImpl(TBatchStream stream, int firstBatch,
|
||||
std::string networkName, const char* inputBlobName,
|
||||
bool readCache = true)
|
||||
: mStream{stream},
|
||||
mCalibrationTableName("CalibrationTable" + networkName),
|
||||
mInputBlobName(inputBlobName), mReadCache(readCache) {
|
||||
nvinfer1::Dims dims = mStream.getDims();
|
||||
mInputCount = samplesCommon::volume(dims);
|
||||
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
|
||||
mStream.reset(firstBatch);
|
||||
}
|
||||
|
||||
virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); }
|
||||
|
||||
int getBatchSize() const noexcept { return mStream.getBatchSize(); }
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[],
|
||||
int nbBindings) noexcept {
|
||||
if (!mStream.next()) {
|
||||
return false;
|
||||
}
|
||||
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(),
|
||||
mInputCount * sizeof(float), cudaMemcpyHostToDevice));
|
||||
ASSERT(!strcmp(names[0], mInputBlobName));
|
||||
bindings[0] = mDeviceInput;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void* readCalibrationCache(size_t& length) noexcept {
|
||||
mCalibrationCache.clear();
|
||||
std::ifstream input(mCalibrationTableName, std::ios::binary);
|
||||
input >> std::noskipws;
|
||||
if (mReadCache && input.good()) {
|
||||
std::copy(std::istream_iterator<char>(input),
|
||||
std::istream_iterator<char>(),
|
||||
std::back_inserter(mCalibrationCache));
|
||||
}
|
||||
length = mCalibrationCache.size();
|
||||
return length ? mCalibrationCache.data() : nullptr;
|
||||
}
|
||||
|
||||
void writeCalibrationCache(const void* cache, size_t length) noexcept {
|
||||
std::ofstream output(mCalibrationTableName, std::ios::binary);
|
||||
output.write(reinterpret_cast<const char*>(cache), length);
|
||||
}
|
||||
|
||||
private:
|
||||
TBatchStream mStream;
|
||||
size_t mInputCount;
|
||||
std::string mCalibrationTableName;
|
||||
const char* mInputBlobName;
|
||||
bool mReadCache{true};
|
||||
void* mDeviceInput{nullptr};
|
||||
std::vector<char> mCalibrationCache;
|
||||
};
|
||||
|
||||
//! \class Int8EntropyCalibrator2
|
||||
//!
|
||||
//! \brief Implements Entropy calibrator 2.
|
||||
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
|
||||
//!
|
||||
template <typename TBatchStream>
|
||||
class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
Int8EntropyCalibrator2(TBatchStream stream, int firstBatch,
|
||||
const char* networkName, const char* inputBlobName,
|
||||
bool readCache = true)
|
||||
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {}
|
||||
|
||||
int getBatchSize() const noexcept override { return mImpl.getBatchSize(); }
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[],
|
||||
int nbBindings) noexcept override {
|
||||
return mImpl.getBatch(bindings, names, nbBindings);
|
||||
}
|
||||
|
||||
const void* readCalibrationCache(size_t& length) noexcept override {
|
||||
return mImpl.readCalibrationCache(length);
|
||||
}
|
||||
|
||||
void writeCalibrationCache(const void* cache,
|
||||
size_t length) noexcept override {
|
||||
mImpl.writeCalibrationCache(cache, length);
|
||||
}
|
||||
|
||||
private:
|
||||
EntropyCalibratorImpl<TBatchStream> mImpl;
|
||||
};
|
||||
|
||||
#endif // ENTROPY_CALIBRATOR_H
|
||||
115
fastdeploy/backends/tensorrt/common/ErrorRecorder.h
Normal file
115
fastdeploy/backends/tensorrt/common/ErrorRecorder.h
Normal file
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ERROR_RECORDER_H
|
||||
#define ERROR_RECORDER_H
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "logger.h"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
using nvinfer1::ErrorCode;
|
||||
using nvinfer1::IErrorRecorder;
|
||||
|
||||
//!
|
||||
//! A simple implementation of the IErrorRecorder interface for
|
||||
//! use by samples. This interface also can be used as a reference
|
||||
//! implementation.
|
||||
//! The sample Error recorder is based on a vector that pairs the error
|
||||
//! code and the error string into a single element. It also uses
|
||||
//! standard mutex's and atomics in order to make sure that the code
|
||||
//! works in a multi-threaded environment.
|
||||
//!
|
||||
class SampleErrorRecorder : public IErrorRecorder {
|
||||
using errorPair = std::pair<ErrorCode, std::string>;
|
||||
using errorStack = std::vector<errorPair>;
|
||||
|
||||
public:
|
||||
SampleErrorRecorder() = default;
|
||||
|
||||
virtual ~SampleErrorRecorder() noexcept {}
|
||||
int32_t getNbErrors() const noexcept final { return mErrorStack.size(); }
|
||||
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final {
|
||||
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT
|
||||
: (*this)[errorIdx].first;
|
||||
};
|
||||
IErrorRecorder::ErrorDesc
|
||||
getErrorDesc(int32_t errorIdx) const noexcept final {
|
||||
return invalidIndexCheck(errorIdx) ? "errorIdx out of range."
|
||||
: (*this)[errorIdx].second.c_str();
|
||||
}
|
||||
// This class can never overflow since we have dynamic resize via std::vector
|
||||
// usage.
|
||||
bool hasOverflowed() const noexcept final { return false; }
|
||||
|
||||
// Empty the errorStack.
|
||||
void clear() noexcept final {
|
||||
try {
|
||||
// grab a lock so that there is no addition while clearing.
|
||||
std::lock_guard<std::mutex> guard(mStackLock);
|
||||
mErrorStack.clear();
|
||||
} catch (const std::exception& e) {
|
||||
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
//! Simple helper function that
|
||||
bool empty() const noexcept { return mErrorStack.empty(); }
|
||||
|
||||
bool reportError(ErrorCode val,
|
||||
IErrorRecorder::ErrorDesc desc) noexcept final {
|
||||
try {
|
||||
std::lock_guard<std::mutex> guard(mStackLock);
|
||||
sample::gLogError << "Error[" << static_cast<int32_t>(val)
|
||||
<< "]: " << desc << std::endl;
|
||||
mErrorStack.push_back(errorPair(val, desc));
|
||||
} catch (const std::exception& e) {
|
||||
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
|
||||
}
|
||||
// All errors are considered fatal.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Atomically increment or decrement the ref counter.
|
||||
IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; }
|
||||
IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; }
|
||||
|
||||
private:
|
||||
// Simple helper functions.
|
||||
const errorPair& operator[](size_t index) const noexcept {
|
||||
return mErrorStack[index];
|
||||
}
|
||||
|
||||
bool invalidIndexCheck(int32_t index) const noexcept {
|
||||
// By converting signed to unsigned, we only need a single check since
|
||||
// negative numbers turn into large positive greater than the size.
|
||||
size_t sIndex = index;
|
||||
return sIndex >= mErrorStack.size();
|
||||
}
|
||||
// Mutex to hold when locking mErrorStack.
|
||||
std::mutex mStackLock;
|
||||
|
||||
// Reference count of the class. Destruction of the class when mRefCount
|
||||
// is not zero causes undefined behavior.
|
||||
std::atomic<int32_t> mRefCount{0};
|
||||
|
||||
// The error stack that holds the errors recorded by TensorRT.
|
||||
errorStack mErrorStack;
|
||||
}; // class SampleErrorRecorder
|
||||
#endif // ERROR_RECORDER_H
|
||||
1
fastdeploy/backends/tensorrt/common/README.md
Normal file
1
fastdeploy/backends/tensorrt/common/README.md
Normal file
@@ -0,0 +1 @@
|
||||
目录代码来源自 https://github.com/NVIDIA/TensorRT
|
||||
169
fastdeploy/backends/tensorrt/common/argsParser.h
Normal file
169
fastdeploy/backends/tensorrt/common/argsParser.h
Normal file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef TENSORRT_ARGS_PARSER_H
|
||||
#define TENSORRT_ARGS_PARSER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#ifdef _MSC_VER
|
||||
#include ".\windows\getopt.h"
|
||||
#else
|
||||
#include <getopt.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
|
||||
namespace samplesCommon {
|
||||
|
||||
//!
|
||||
//! \brief The SampleParams structure groups the basic parameters required by
|
||||
//! all sample networks.
|
||||
//!
|
||||
struct SampleParams {
|
||||
int32_t batchSize{1}; //!< Number of inputs in a batch
|
||||
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
|
||||
bool int8{false}; //!< Allow runnning the network in Int8 mode.
|
||||
bool fp16{false}; //!< Allow running the network in FP16 mode.
|
||||
std::vector<std::string>
|
||||
dataDirs; //!< Directory paths where sample data files are stored
|
||||
std::vector<std::string> inputTensorNames;
|
||||
std::vector<std::string> outputTensorNames;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The CaffeSampleParams structure groups the additional parameters
|
||||
//! required by
|
||||
//! networks that use caffe
|
||||
//!
|
||||
struct CaffeSampleParams : public SampleParams {
|
||||
std::string
|
||||
prototxtFileName; //!< Filename of prototxt design file of a network
|
||||
std::string
|
||||
weightsFileName; //!< Filename of trained weights file of a network
|
||||
std::string meanFileName; //!< Filename of mean file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The OnnxSampleParams structure groups the additional parameters
|
||||
//! required by
|
||||
//! networks that use ONNX
|
||||
//!
|
||||
struct OnnxSampleParams : public SampleParams {
|
||||
std::string onnxFileName; //!< Filename of ONNX file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The UffSampleParams structure groups the additional parameters
|
||||
//! required by
|
||||
//! networks that use Uff
|
||||
//!
|
||||
struct UffSampleParams : public SampleParams {
|
||||
std::string uffFileName; //!< Filename of uff file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! /brief Struct to maintain command-line arguments.
|
||||
//!
|
||||
struct Args {
|
||||
bool runInInt8{false};
|
||||
bool runInFp16{false};
|
||||
bool help{false};
|
||||
int32_t useDLACore{-1};
|
||||
int32_t batch{1};
|
||||
std::vector<std::string> dataDirs;
|
||||
std::string saveEngine;
|
||||
std::string loadEngine;
|
||||
bool useILoop{false};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Populates the Args struct with the provided command-line parameters.
|
||||
//!
|
||||
//! \throw invalid_argument if any of the arguments are not valid
|
||||
//!
|
||||
//! \return boolean If return value is true, execution can continue, otherwise
|
||||
//! program should exit
|
||||
//!
|
||||
inline bool parseArgs(Args& args, int32_t argc, char* argv[]) {
|
||||
while (1) {
|
||||
int32_t arg;
|
||||
static struct option long_options[] = {
|
||||
{"help", no_argument, 0, 'h'},
|
||||
{"datadir", required_argument, 0, 'd'},
|
||||
{"int8", no_argument, 0, 'i'},
|
||||
{"fp16", no_argument, 0, 'f'},
|
||||
{"useILoop", no_argument, 0, 'l'},
|
||||
{"saveEngine", required_argument, 0, 's'},
|
||||
{"loadEngine", no_argument, 0, 'o'},
|
||||
{"useDLACore", required_argument, 0, 'u'},
|
||||
{"batch", required_argument, 0, 'b'},
|
||||
{nullptr, 0, nullptr, 0}};
|
||||
int32_t option_index = 0;
|
||||
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
|
||||
if (arg == -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
switch (arg) {
|
||||
case 'h':
|
||||
args.help = true;
|
||||
return true;
|
||||
case 'd':
|
||||
if (optarg) {
|
||||
args.dataDirs.push_back(optarg);
|
||||
} else {
|
||||
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (optarg) {
|
||||
args.saveEngine = optarg;
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if (optarg) {
|
||||
args.loadEngine = optarg;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
args.runInInt8 = true;
|
||||
break;
|
||||
case 'f':
|
||||
args.runInFp16 = true;
|
||||
break;
|
||||
case 'l':
|
||||
args.useILoop = true;
|
||||
break;
|
||||
case 'u':
|
||||
if (optarg) {
|
||||
args.useDLACore = std::stoi(optarg);
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
if (optarg) {
|
||||
args.batch = std::stoi(optarg);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_ARGS_PARSER_H
|
||||
426
fastdeploy/backends/tensorrt/common/buffers.h
Normal file
426
fastdeploy/backends/tensorrt/common/buffers.h
Normal file
@@ -0,0 +1,426 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef TENSORRT_BUFFERS_H
|
||||
#define TENSORRT_BUFFERS_H
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "common.h"
|
||||
#include "half.h"
|
||||
#include <cassert>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <new>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace samplesCommon {
|
||||
|
||||
//!
|
||||
//! \brief The GenericBuffer class is a templated class for buffers.
|
||||
//!
|
||||
//! \details This templated RAII (Resource Acquisition Is Initialization) class
|
||||
//! handles the allocation,
|
||||
//! deallocation, querying of buffers on both the device and the host.
|
||||
//! It can handle data of arbitrary types because it stores byte
|
||||
//! buffers.
|
||||
//! The template parameters AllocFunc and FreeFunc are used for the
|
||||
//! allocation and deallocation of the buffer.
|
||||
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
|
||||
//! and returns bool. ptr is a pointer to where the allocated buffer
|
||||
//! address should be stored.
|
||||
//! size is the amount of memory in bytes to allocate.
|
||||
//! The boolean indicates whether or not the memory allocation was
|
||||
//! successful.
|
||||
//! FreeFunc must be a functor that takes in (void* ptr) and returns
|
||||
//! void.
|
||||
//! ptr is the allocated buffer address. It must work with nullptr
|
||||
//! input.
|
||||
//!
|
||||
template <typename AllocFunc, typename FreeFunc> class GenericBuffer {
|
||||
public:
|
||||
//!
|
||||
//! \brief Construct an empty buffer.
|
||||
//!
|
||||
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
|
||||
: mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {}
|
||||
|
||||
//!
|
||||
//! \brief Construct a buffer with the specified allocation size in bytes.
|
||||
//!
|
||||
GenericBuffer(size_t size, nvinfer1::DataType type)
|
||||
: mSize(size), mCapacity(size), mType(type) {
|
||||
if (!allocFn(&mBuffer, this->nbBytes())) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
}
|
||||
|
||||
GenericBuffer(GenericBuffer&& buf)
|
||||
: mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType),
|
||||
mBuffer(buf.mBuffer) {
|
||||
buf.mSize = 0;
|
||||
buf.mCapacity = 0;
|
||||
buf.mType = nvinfer1::DataType::kFLOAT;
|
||||
buf.mBuffer = nullptr;
|
||||
}
|
||||
|
||||
GenericBuffer& operator=(GenericBuffer&& buf) {
|
||||
if (this != &buf) {
|
||||
freeFn(mBuffer);
|
||||
mSize = buf.mSize;
|
||||
mCapacity = buf.mCapacity;
|
||||
mType = buf.mType;
|
||||
mBuffer = buf.mBuffer;
|
||||
// Reset buf.
|
||||
buf.mSize = 0;
|
||||
buf.mCapacity = 0;
|
||||
buf.mBuffer = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
void* data() { return mBuffer; }
|
||||
|
||||
//!
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
const void* data() const { return mBuffer; }
|
||||
|
||||
//!
|
||||
//! \brief Returns the size (in number of elements) of the buffer.
|
||||
//!
|
||||
size_t size() const { return mSize; }
|
||||
|
||||
//!
|
||||
//! \brief Returns the size (in bytes) of the buffer.
|
||||
//!
|
||||
size_t nbBytes() const {
|
||||
return this->size() * samplesCommon::getElementSize(mType);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
|
||||
//! or equal to the current capacity.
|
||||
//!
|
||||
void resize(size_t newSize) {
|
||||
mSize = newSize;
|
||||
if (mCapacity < newSize) {
|
||||
freeFn(mBuffer);
|
||||
if (!allocFn(&mBuffer, this->nbBytes())) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
mCapacity = newSize;
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Overload of resize that accepts Dims
|
||||
//!
|
||||
void resize(const nvinfer1::Dims& dims) {
|
||||
return this->resize(samplesCommon::volume(dims));
|
||||
}
|
||||
|
||||
~GenericBuffer() { freeFn(mBuffer); }
|
||||
|
||||
private:
|
||||
size_t mSize{0}, mCapacity{0};
|
||||
nvinfer1::DataType mType;
|
||||
void* mBuffer;
|
||||
AllocFunc allocFn;
|
||||
FreeFunc freeFn;
|
||||
};
|
||||
|
||||
class DeviceAllocator {
|
||||
public:
|
||||
bool operator()(void** ptr, size_t size) const {
|
||||
return cudaMalloc(ptr, size) == cudaSuccess;
|
||||
}
|
||||
};
|
||||
|
||||
class DeviceFree {
|
||||
public:
|
||||
void operator()(void* ptr) const { cudaFree(ptr); }
|
||||
};
|
||||
|
||||
class HostAllocator {
|
||||
public:
|
||||
bool operator()(void** ptr, size_t size) const {
|
||||
*ptr = malloc(size);
|
||||
return *ptr != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class HostFree {
|
||||
public:
|
||||
void operator()(void* ptr) const { free(ptr); }
|
||||
};
|
||||
|
||||
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
|
||||
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
|
||||
|
||||
//!
|
||||
//! \brief The ManagedBuffer class groups together a pair of corresponding
|
||||
//! device and host buffers.
|
||||
//!
|
||||
class ManagedBuffer {
|
||||
public:
|
||||
DeviceBuffer deviceBuffer;
|
||||
HostBuffer hostBuffer;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The BufferManager class handles host and device buffer allocation
|
||||
//! and deallocation.
|
||||
//!
|
||||
//! \details This RAII class handles host and device buffer allocation and
|
||||
//! deallocation,
|
||||
//! memcpy between host and device buffers to aid with inference,
|
||||
//! and debugging dumps to validate inference. The BufferManager class
|
||||
//! is meant to be
|
||||
//! used to simplify buffer management and any interactions between
|
||||
//! buffers and the engine.
|
||||
//!
|
||||
class BufferManager {
|
||||
public:
|
||||
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
|
||||
|
||||
//!
|
||||
//! \brief Create a BufferManager for handling buffer interactions with
|
||||
//! engine.
|
||||
//!
|
||||
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine,
|
||||
const int batchSize = 0,
|
||||
const nvinfer1::IExecutionContext* context = nullptr)
|
||||
: mEngine(engine), mBatchSize(batchSize) {
|
||||
// Full Dims implies no batch size.
|
||||
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
|
||||
// Create host and device buffers
|
||||
for (int i = 0; i < mEngine->getNbBindings(); i++) {
|
||||
auto dims = context ? context->getBindingDimensions(i)
|
||||
: mEngine->getBindingDimensions(i);
|
||||
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
|
||||
nvinfer1::DataType type = mEngine->getBindingDataType(i);
|
||||
int vecDim = mEngine->getBindingVectorizedDim(i);
|
||||
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
|
||||
{
|
||||
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
|
||||
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
|
||||
vol *= scalarsPerVec;
|
||||
}
|
||||
vol *= samplesCommon::volume(dims);
|
||||
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
|
||||
manBuf->deviceBuffer = DeviceBuffer(vol, type);
|
||||
manBuf->hostBuffer = HostBuffer(vol, type);
|
||||
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
|
||||
mManagedBuffers.emplace_back(std::move(manBuf));
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns a vector of device buffers that you can use directly as
|
||||
//! bindings for the execute and enqueue methods of IExecutionContext.
|
||||
//!
|
||||
std::vector<void*>& getDeviceBindings() { return mDeviceBindings; }
|
||||
|
||||
//!
|
||||
//! \brief Returns a vector of device buffers.
|
||||
//!
|
||||
const std::vector<void*>& getDeviceBindings() const {
|
||||
return mDeviceBindings;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the device buffer corresponding to tensorName.
|
||||
//! Returns nullptr if no such tensor can be found.
|
||||
//!
|
||||
void* getDeviceBuffer(const std::string& tensorName) const {
|
||||
return getBuffer(false, tensorName);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the host buffer corresponding to tensorName.
|
||||
//! Returns nullptr if no such tensor can be found.
|
||||
//!
|
||||
void* getHostBuffer(const std::string& tensorName) const {
|
||||
return getBuffer(true, tensorName);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the size of the host and device buffers that correspond to
|
||||
//! tensorName.
|
||||
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
|
||||
//!
|
||||
size_t size(const std::string& tensorName) const {
|
||||
int index = mEngine->getBindingIndex(tensorName.c_str());
|
||||
if (index == -1)
|
||||
return kINVALID_SIZE_VALUE;
|
||||
return mManagedBuffers[index]->hostBuffer.nbBytes();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Dump host buffer with specified tensorName to ostream.
|
||||
//! Prints error message to std::ostream if no such tensor can be
|
||||
//! found.
|
||||
//!
|
||||
void dumpBuffer(std::ostream& os, const std::string& tensorName) {
|
||||
int index = mEngine->getBindingIndex(tensorName.c_str());
|
||||
if (index == -1) {
|
||||
os << "Invalid tensor name" << std::endl;
|
||||
return;
|
||||
}
|
||||
void* buf = mManagedBuffers[index]->hostBuffer.data();
|
||||
size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes();
|
||||
nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index);
|
||||
size_t rowCount = static_cast<size_t>(
|
||||
bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize);
|
||||
int leadDim = mBatchSize;
|
||||
int* trailDims = bufDims.d;
|
||||
int nbDims = bufDims.nbDims;
|
||||
|
||||
// Fix explicit Dimension networks
|
||||
if (!leadDim && nbDims > 0) {
|
||||
leadDim = bufDims.d[0];
|
||||
++trailDims;
|
||||
--nbDims;
|
||||
}
|
||||
|
||||
os << "[" << leadDim;
|
||||
for (int i = 0; i < nbDims; i++)
|
||||
os << ", " << trailDims[i];
|
||||
os << "]" << std::endl;
|
||||
switch (mEngine->getBindingDataType(index)) {
|
||||
case nvinfer1::DataType::kINT32:
|
||||
print<int32_t>(os, buf, bufSize, rowCount);
|
||||
break;
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
print<float>(os, buf, bufSize, rowCount);
|
||||
break;
|
||||
case nvinfer1::DataType::kHALF:
|
||||
print<half_float::half>(os, buf, bufSize, rowCount);
|
||||
break;
|
||||
case nvinfer1::DataType::kINT8:
|
||||
assert(0 && "Int8 network-level input and output is not supported");
|
||||
break;
|
||||
case nvinfer1::DataType::kBOOL:
|
||||
assert(0 && "Bool network-level input and output are not supported");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Templated print function that dumps buffers of arbitrary type to
|
||||
//! std::ostream.
|
||||
//! rowCount parameter controls how many elements are on each line.
|
||||
//! A rowCount of 1 means that there is only 1 element on each line.
|
||||
//!
|
||||
template <typename T>
|
||||
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) {
|
||||
assert(rowCount != 0);
|
||||
assert(bufSize % sizeof(T) == 0);
|
||||
T* typedBuf = static_cast<T*>(buf);
|
||||
size_t numItems = bufSize / sizeof(T);
|
||||
for (int i = 0; i < static_cast<int>(numItems); i++) {
|
||||
// Handle rowCount == 1 case
|
||||
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
|
||||
os << typedBuf[i] << std::endl;
|
||||
else if (rowCount == 1)
|
||||
os << typedBuf[i];
|
||||
// Handle rowCount > 1 case
|
||||
else if (i % rowCount == 0)
|
||||
os << typedBuf[i];
|
||||
else if (i % rowCount == rowCount - 1)
|
||||
os << " " << typedBuf[i] << std::endl;
|
||||
else
|
||||
os << " " << typedBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of input host buffers to input device buffers
|
||||
//! synchronously.
|
||||
//!
|
||||
void copyInputToDevice() { memcpyBuffers(true, false, false); }
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of output device buffers to output host buffers
|
||||
//! synchronously.
|
||||
//!
|
||||
void copyOutputToHost() { memcpyBuffers(false, true, false); }
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of input host buffers to input device buffers
|
||||
//! asynchronously.
|
||||
//!
|
||||
void copyInputToDeviceAsync(const cudaStream_t& stream = 0) {
|
||||
memcpyBuffers(true, false, true, stream);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of output device buffers to output host buffers
|
||||
//! asynchronously.
|
||||
//!
|
||||
void copyOutputToHostAsync(const cudaStream_t& stream = 0) {
|
||||
memcpyBuffers(false, true, true, stream);
|
||||
}
|
||||
|
||||
~BufferManager() = default;
|
||||
|
||||
private:
|
||||
void* getBuffer(const bool isHost, const std::string& tensorName) const {
|
||||
int index = mEngine->getBindingIndex(tensorName.c_str());
|
||||
if (index == -1)
|
||||
return nullptr;
|
||||
return (isHost ? mManagedBuffers[index]->hostBuffer.data()
|
||||
: mManagedBuffers[index]->deviceBuffer.data());
|
||||
}
|
||||
|
||||
void memcpyBuffers(const bool copyInput, const bool deviceToHost,
|
||||
const bool async, const cudaStream_t& stream = 0) {
|
||||
for (int i = 0; i < mEngine->getNbBindings(); i++) {
|
||||
void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data()
|
||||
: mManagedBuffers[i]->deviceBuffer.data();
|
||||
const void* srcPtr = deviceToHost
|
||||
? mManagedBuffers[i]->deviceBuffer.data()
|
||||
: mManagedBuffers[i]->hostBuffer.data();
|
||||
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
|
||||
const cudaMemcpyKind memcpyType =
|
||||
deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
|
||||
if ((copyInput && mEngine->bindingIsInput(i)) ||
|
||||
(!copyInput && !mEngine->bindingIsInput(i))) {
|
||||
if (async)
|
||||
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
|
||||
else
|
||||
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
|
||||
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
|
||||
std::vector<std::unique_ptr<ManagedBuffer>>
|
||||
mManagedBuffers; //!< The vector of pointers to managed buffers
|
||||
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed
|
||||
//! for engine execution
|
||||
};
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_BUFFERS_H
|
||||
844
fastdeploy/backends/tensorrt/common/common.h
Normal file
844
fastdeploy/backends/tensorrt/common/common.h
Normal file
@@ -0,0 +1,844 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_COMMON_H
|
||||
#define TENSORRT_COMMON_H
|
||||
|
||||
// For loadLibrary
|
||||
#ifdef _MSC_VER
|
||||
// Needed so that the max/min definitions in windows.h do not conflict with
|
||||
// std::max/min.
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#undef NOMINMAX
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvInferPlugin.h"
|
||||
#include "logger.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <new>
|
||||
#include <numeric>
|
||||
#include <ratio>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "safeCommon.h"
|
||||
|
||||
using namespace nvinfer1;
|
||||
using namespace plugin;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define FN_NAME __FUNCTION__
|
||||
#else
|
||||
#define FN_NAME __func__
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(__QNX__)
|
||||
#define ENABLE_DLA_API 1
|
||||
#endif
|
||||
|
||||
#define CHECK_RETURN_W_MSG(status, val, errMsg) \
|
||||
do { \
|
||||
if (!(status)) { \
|
||||
sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \
|
||||
<< FN_NAME << "(), line " << __LINE__ << std::endl; \
|
||||
return val; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#undef ASSERT
|
||||
#define ASSERT(condition) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
sample::gLogError << "Assertion failure: " << #condition << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
|
||||
|
||||
#define OBJ_GUARD(A) std::unique_ptr<A, void (*)(A * t)>
|
||||
|
||||
template <typename T, typename T_> OBJ_GUARD(T) makeObjGuard(T_* t) {
|
||||
CHECK(!(std::is_base_of<T, T_>::value || std::is_same<T, T_>::value));
|
||||
auto deleter = [](T* t) { t->destroy(); };
|
||||
return std::unique_ptr<T, decltype(deleter)>{static_cast<T*>(t), deleter};
|
||||
}
|
||||
|
||||
constexpr long double operator"" _GiB(long double val) {
|
||||
return val * (1 << 30);
|
||||
}
|
||||
constexpr long double operator"" _MiB(long double val) {
|
||||
return val * (1 << 20);
|
||||
}
|
||||
constexpr long double operator"" _KiB(long double val) {
|
||||
return val * (1 << 10);
|
||||
}
|
||||
|
||||
// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB.
|
||||
// Since the return type is signed, -1_GiB will work as expected.
|
||||
constexpr long long int operator"" _GiB(unsigned long long val) {
|
||||
return val * (1 << 30);
|
||||
}
|
||||
constexpr long long int operator"" _MiB(unsigned long long val) {
|
||||
return val * (1 << 20);
|
||||
}
|
||||
constexpr long long int operator"" _KiB(unsigned long long val) {
|
||||
return val * (1 << 10);
|
||||
}
|
||||
|
||||
struct SimpleProfiler : public nvinfer1::IProfiler {
|
||||
struct Record {
|
||||
float time{0};
|
||||
int count{0};
|
||||
};
|
||||
|
||||
virtual void reportLayerTime(const char* layerName, float ms) noexcept {
|
||||
mProfile[layerName].count++;
|
||||
mProfile[layerName].time += ms;
|
||||
if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) ==
|
||||
mLayerNames.end()) {
|
||||
mLayerNames.push_back(layerName);
|
||||
}
|
||||
}
|
||||
|
||||
SimpleProfiler(const char* name,
|
||||
const std::vector<SimpleProfiler>& srcProfilers =
|
||||
std::vector<SimpleProfiler>())
|
||||
: mName(name) {
|
||||
for (const auto& srcProfiler : srcProfilers) {
|
||||
for (const auto& rec : srcProfiler.mProfile) {
|
||||
auto it = mProfile.find(rec.first);
|
||||
if (it == mProfile.end()) {
|
||||
mProfile.insert(rec);
|
||||
} else {
|
||||
it->second.time += rec.second.time;
|
||||
it->second.count += rec.second.count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out,
|
||||
const SimpleProfiler& value) {
|
||||
out << "========== " << value.mName << " profile ==========" << std::endl;
|
||||
float totalTime = 0;
|
||||
std::string layerNameStr = "TensorRT layer name";
|
||||
int maxLayerNameLength =
|
||||
std::max(static_cast<int>(layerNameStr.size()), 70);
|
||||
for (const auto& elem : value.mProfile) {
|
||||
totalTime += elem.second.time;
|
||||
maxLayerNameLength =
|
||||
std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
|
||||
}
|
||||
|
||||
auto old_settings = out.flags();
|
||||
auto old_precision = out.precision();
|
||||
// Output header
|
||||
{
|
||||
out << std::setw(maxLayerNameLength) << layerNameStr << " ";
|
||||
out << std::setw(12) << "Runtime, "
|
||||
<< "%"
|
||||
<< " ";
|
||||
out << std::setw(12) << "Invocations"
|
||||
<< " ";
|
||||
out << std::setw(12) << "Runtime, ms" << std::endl;
|
||||
}
|
||||
for (size_t i = 0; i < value.mLayerNames.size(); i++) {
|
||||
const std::string layerName = value.mLayerNames[i];
|
||||
auto elem = value.mProfile.at(layerName);
|
||||
out << std::setw(maxLayerNameLength) << layerName << " ";
|
||||
out << std::setw(12) << std::fixed << std::setprecision(1)
|
||||
<< (elem.time * 100.0F / totalTime) << "%"
|
||||
<< " ";
|
||||
out << std::setw(12) << elem.count << " ";
|
||||
out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time
|
||||
<< std::endl;
|
||||
}
|
||||
out.flags(old_settings);
|
||||
out.precision(old_precision);
|
||||
out << "========== " << value.mName << " total runtime = " << totalTime
|
||||
<< " ms ==========" << std::endl;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string mName;
|
||||
std::vector<std::string> mLayerNames;
|
||||
std::map<std::string, Record> mProfile;
|
||||
};
|
||||
|
||||
//! Locate path to file, given its filename or filepath suffix and possible dirs
|
||||
//! it might lie in.
|
||||
//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a
|
||||
//! file path.
|
||||
inline std::string locateFile(const std::string& filepathSuffix,
|
||||
const std::vector<std::string>& directories,
|
||||
bool reportError = true) {
|
||||
const int MAX_DEPTH{10};
|
||||
bool found{false};
|
||||
std::string filepath;
|
||||
|
||||
for (auto& dir : directories) {
|
||||
if (!dir.empty() && dir.back() != '/') {
|
||||
#ifdef _MSC_VER
|
||||
filepath = dir + "\\" + filepathSuffix;
|
||||
#else
|
||||
filepath = dir + "/" + filepathSuffix;
|
||||
#endif
|
||||
} else {
|
||||
filepath = dir + filepathSuffix;
|
||||
}
|
||||
|
||||
for (int i = 0; i < MAX_DEPTH && !found; i++) {
|
||||
const std::ifstream checkFile(filepath);
|
||||
found = checkFile.is_open();
|
||||
if (found) {
|
||||
break;
|
||||
}
|
||||
|
||||
filepath = "../" + filepath; // Try again in parent dir
|
||||
}
|
||||
|
||||
if (found) {
|
||||
break;
|
||||
}
|
||||
|
||||
filepath.clear();
|
||||
}
|
||||
|
||||
// Could not find the file
|
||||
if (filepath.empty()) {
|
||||
const std::string dirList = std::accumulate(
|
||||
directories.begin() + 1, directories.end(), directories.front(),
|
||||
[](const std::string& a, const std::string& b) {
|
||||
return a + "\n\t" + b;
|
||||
});
|
||||
std::cout << "Could not find " << filepathSuffix
|
||||
<< " in data directories:\n\t" << dirList << std::endl;
|
||||
|
||||
if (reportError) {
|
||||
std::cout << "&&&& FAILED" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
return filepath;
|
||||
}
|
||||
|
||||
inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH,
|
||||
int inW) {
|
||||
std::ifstream infile(fileName, std::ifstream::binary);
|
||||
assert(infile.is_open() &&
|
||||
"Attempting to read from a file that is not open.");
|
||||
std::string magic, h, w, max;
|
||||
infile >> magic >> h >> w >> max;
|
||||
infile.seekg(1, infile.cur);
|
||||
infile.read(reinterpret_cast<char*>(buffer), inH * inW);
|
||||
}
|
||||
|
||||
namespace samplesCommon {
|
||||
|
||||
// Swaps endianness of an integral type.
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
|
||||
inline T swapEndianness(const T& value) {
|
||||
uint8_t bytes[sizeof(T)];
|
||||
for (int i = 0; i < static_cast<int>(sizeof(T)); ++i) {
|
||||
bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
|
||||
}
|
||||
return *reinterpret_cast<T*>(bytes);
|
||||
}
|
||||
|
||||
class HostMemory {
|
||||
public:
|
||||
HostMemory() = delete;
|
||||
virtual void* data() const noexcept { return mData; }
|
||||
virtual std::size_t size() const noexcept { return mSize; }
|
||||
virtual DataType type() const noexcept { return mType; }
|
||||
virtual ~HostMemory() {}
|
||||
|
||||
protected:
|
||||
HostMemory(std::size_t size, DataType type)
|
||||
: mData{nullptr}, mSize(size), mType(type) {}
|
||||
void* mData;
|
||||
std::size_t mSize;
|
||||
DataType mType;
|
||||
};
|
||||
|
||||
template <typename ElemType, DataType dataType>
|
||||
class TypedHostMemory : public HostMemory {
|
||||
public:
|
||||
explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) {
|
||||
mData = new ElemType[size];
|
||||
};
|
||||
~TypedHostMemory() noexcept { delete[](ElemType*) mData; }
|
||||
ElemType* raw() noexcept { return static_cast<ElemType*>(data()); }
|
||||
};
|
||||
|
||||
using FloatMemory = TypedHostMemory<float, DataType::kFLOAT>;
|
||||
using HalfMemory = TypedHostMemory<uint16_t, DataType::kHALF>;
|
||||
using ByteMemory = TypedHostMemory<uint8_t, DataType::kINT8>;
|
||||
|
||||
inline void* safeCudaMalloc(size_t memSize) {
|
||||
void* deviceMem;
|
||||
CHECK(cudaMalloc(&deviceMem, memSize));
|
||||
if (deviceMem == nullptr) {
|
||||
std::cerr << "Out of memory" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
return deviceMem;
|
||||
}
|
||||
|
||||
inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); }
|
||||
|
||||
struct InferDeleter {
|
||||
template <typename T> void operator()(T* obj) const { delete obj; }
|
||||
};
|
||||
|
||||
template <typename T> using SampleUniquePtr = std::unique_ptr<T, InferDeleter>;
|
||||
|
||||
static auto StreamDeleter = [](cudaStream_t* pStream) {
|
||||
if (pStream) {
|
||||
cudaStreamDestroy(*pStream);
|
||||
delete pStream;
|
||||
}
|
||||
};
|
||||
|
||||
inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream() {
|
||||
std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(
|
||||
new cudaStream_t, StreamDeleter);
|
||||
if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) !=
|
||||
cudaSuccess) {
|
||||
pStream.reset(nullptr);
|
||||
}
|
||||
|
||||
return pStream;
|
||||
}
|
||||
|
||||
//! Return vector of indices that puts magnitudes of sequence in descending
|
||||
//! order.
|
||||
template <class Iter>
|
||||
std::vector<size_t> argMagnitudeSort(Iter begin, Iter end) {
|
||||
std::vector<size_t> indices(end - begin);
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) {
|
||||
return std::abs(begin[j]) < std::abs(begin[i]);
|
||||
});
|
||||
return indices;
|
||||
}
|
||||
|
||||
inline bool readReferenceFile(const std::string& fileName,
|
||||
std::vector<std::string>& refVector) {
|
||||
std::ifstream infile(fileName);
|
||||
if (!infile.is_open()) {
|
||||
std::cout << "ERROR: readReferenceFile: Attempting to read from a file "
|
||||
"that is not open."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
std::string line;
|
||||
while (std::getline(infile, line)) {
|
||||
if (line.empty())
|
||||
continue;
|
||||
refVector.push_back(line);
|
||||
}
|
||||
infile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<std::string> classify(const std::vector<std::string>& refVector,
|
||||
const std::vector<T>& output,
|
||||
const size_t topK) {
|
||||
const auto inds =
|
||||
samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
|
||||
std::vector<std::string> result;
|
||||
result.reserve(topK);
|
||||
for (size_t k = 0; k < topK; ++k) {
|
||||
result.push_back(refVector[inds[k]]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns indices of highest K magnitudes in v.
|
||||
template <typename T>
|
||||
std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k) {
|
||||
std::vector<size_t> indices =
|
||||
samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
|
||||
indices.resize(k);
|
||||
return indices;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool readASCIIFile(const std::string& fileName, const size_t size,
|
||||
std::vector<T>& out) {
|
||||
std::ifstream infile(fileName);
|
||||
if (!infile.is_open()) {
|
||||
std::cout << "ERROR readASCIIFile: Attempting to read from a file that is "
|
||||
"not open."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
out.clear();
|
||||
out.reserve(size);
|
||||
out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
|
||||
infile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in) {
|
||||
std::ofstream outfile(fileName);
|
||||
if (!outfile.is_open()) {
|
||||
std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is "
|
||||
"not open."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
for (auto fn : in) {
|
||||
outfile << fn << "\n";
|
||||
}
|
||||
outfile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void print_version() {
|
||||
std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "."
|
||||
<< NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "."
|
||||
<< NV_TENSORRT_BUILD << std::endl;
|
||||
}
|
||||
|
||||
inline std::string getFileType(const std::string& filepath) {
|
||||
return filepath.substr(filepath.find_last_of(".") + 1);
|
||||
}
|
||||
|
||||
inline std::string toLower(const std::string& inp) {
|
||||
std::string out = inp;
|
||||
std::transform(out.begin(), out.end(), out.begin(), ::tolower);
|
||||
return out;
|
||||
}
|
||||
|
||||
inline float getMaxValue(const float* buffer, int64_t size) {
|
||||
assert(buffer != nullptr);
|
||||
assert(size > 0);
|
||||
return *std::max_element(buffer, buffer + size);
|
||||
}
|
||||
|
||||
// Ensures that every tensor used by a network has a dynamic range set.
|
||||
//
|
||||
// All tensors in a network must have a dynamic range specified if a calibrator
|
||||
// is not used.
|
||||
// This function is just a utility to globally fill in missing scales and
|
||||
// zero-points for the entire network.
|
||||
//
|
||||
// If a tensor does not have a dyanamic range set, it is assigned inRange or
|
||||
// outRange as follows:
|
||||
//
|
||||
// * If the tensor is the input to a layer or output of a pooling node, its
|
||||
// dynamic range is derived from inRange.
|
||||
// * Otherwise its dynamic range is derived from outRange.
|
||||
//
|
||||
// The default parameter values are intended to demonstrate, for final layers in
|
||||
// the network,
|
||||
// cases where dynamic ranges are asymmetric.
|
||||
//
|
||||
// The default parameter values choosen arbitrarily. Range values should be
|
||||
// choosen such that
|
||||
// we avoid underflow or overflow. Also range value should be non zero to avoid
|
||||
// uniform zero scale tensor.
|
||||
inline void setAllDynamicRanges(INetworkDefinition* network,
|
||||
float inRange = 2.0f, float outRange = 4.0f) {
|
||||
// Ensure that all layer inputs have a scale.
|
||||
for (int i = 0; i < network->getNbLayers(); i++) {
|
||||
auto layer = network->getLayer(i);
|
||||
for (int j = 0; j < layer->getNbInputs(); j++) {
|
||||
ITensor* input{layer->getInput(j)};
|
||||
// Optional inputs are nullptr here and are from RNN layers.
|
||||
if (input != nullptr && !input->dynamicRangeIsSet()) {
|
||||
ASSERT(input->setDynamicRange(-inRange, inRange));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that all layer outputs have a scale.
|
||||
// Tensors that are also inputs to layers are ingored here
|
||||
// since the previous loop nest assigned scales to them.
|
||||
for (int i = 0; i < network->getNbLayers(); i++) {
|
||||
auto layer = network->getLayer(i);
|
||||
for (int j = 0; j < layer->getNbOutputs(); j++) {
|
||||
ITensor* output{layer->getOutput(j)};
|
||||
// Optional outputs are nullptr here and are from RNN layers.
|
||||
if (output != nullptr && !output->dynamicRangeIsSet()) {
|
||||
// Pooling must have the same input and output scales.
|
||||
if (layer->getType() == LayerType::kPOOLING) {
|
||||
ASSERT(output->setDynamicRange(-inRange, inRange));
|
||||
} else {
|
||||
ASSERT(output->setDynamicRange(-outRange, outRange));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void setDummyInt8DynamicRanges(const IBuilderConfig* c,
|
||||
INetworkDefinition* n) {
|
||||
// Set dummy per-tensor dynamic range if Int8 mode is requested.
|
||||
if (c->getFlag(BuilderFlag::kINT8)) {
|
||||
sample::gLogWarning << "Int8 calibrator not provided. Generating dummy "
|
||||
"per-tensor dynamic range. Int8 accuracy is not "
|
||||
"guaranteed."
|
||||
<< std::endl;
|
||||
setAllDynamicRanges(n);
|
||||
}
|
||||
}
|
||||
|
||||
inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore,
|
||||
bool allowGPUFallback = true) {
|
||||
if (useDLACore >= 0) {
|
||||
if (builder->getNbDLACores() == 0) {
|
||||
std::cerr << "Trying to use DLA core " << useDLACore
|
||||
<< " on a platform that doesn't have any DLA cores"
|
||||
<< std::endl;
|
||||
assert(
|
||||
"Error: use DLA core on a platfrom that doesn't have any DLA cores" &&
|
||||
false);
|
||||
}
|
||||
if (allowGPUFallback) {
|
||||
config->setFlag(BuilderFlag::kGPU_FALLBACK);
|
||||
}
|
||||
if (!config->getFlag(BuilderFlag::kINT8)) {
|
||||
// User has not requested INT8 Mode.
|
||||
// By default run in FP16 mode. FP32 mode is not permitted.
|
||||
config->setFlag(BuilderFlag::kFP16);
|
||||
}
|
||||
config->setDefaultDeviceType(DeviceType::kDLA);
|
||||
config->setDLACore(useDLACore);
|
||||
}
|
||||
}
|
||||
|
||||
inline int32_t parseDLA(int32_t argc, char** argv) {
|
||||
for (int32_t i = 1; i < argc; i++) {
|
||||
if (strncmp(argv[i], "--useDLACore=", 13) == 0) {
|
||||
return std::stoi(argv[i] + 13);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline uint32_t getElementSize(nvinfer1::DataType t) noexcept {
|
||||
switch (t) {
|
||||
case nvinfer1::DataType::kINT32:
|
||||
return 4;
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
return 4;
|
||||
case nvinfer1::DataType::kHALF:
|
||||
return 2;
|
||||
case nvinfer1::DataType::kBOOL:
|
||||
case nvinfer1::DataType::kINT8:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int64_t volume(const nvinfer1::Dims& d) {
|
||||
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
|
||||
}
|
||||
|
||||
template <int C, int H, int W> struct PPM {
|
||||
std::string magic, fileName;
|
||||
int h, w, max;
|
||||
uint8_t buffer[C * H * W];
|
||||
};
|
||||
|
||||
// New vPPM(variable sized PPM) class with variable dimensions.
|
||||
struct vPPM {
|
||||
std::string magic, fileName;
|
||||
int h, w, max;
|
||||
std::vector<uint8_t> buffer;
|
||||
};
|
||||
|
||||
struct BBox {
|
||||
float x1, y1, x2, y2;
|
||||
};
|
||||
|
||||
template <int C, int H, int W>
|
||||
void readPPMFile(const std::string& filename,
|
||||
samplesCommon::PPM<C, H, W>& ppm) {
|
||||
ppm.fileName = filename;
|
||||
std::ifstream infile(filename, std::ifstream::binary);
|
||||
assert(infile.is_open() &&
|
||||
"Attempting to read from a file that is not open.");
|
||||
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
|
||||
infile.seekg(1, infile.cur);
|
||||
infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
|
||||
}
|
||||
|
||||
inline void readPPMFile(const std::string& filename, vPPM& ppm,
|
||||
std::vector<std::string>& input_dir) {
|
||||
ppm.fileName = filename;
|
||||
std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
|
||||
infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
|
||||
infile.seekg(1, infile.cur);
|
||||
|
||||
for (int i = 0; i < ppm.w * ppm.h * 3; ++i) {
|
||||
ppm.buffer.push_back(0);
|
||||
}
|
||||
|
||||
infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
|
||||
}
|
||||
|
||||
template <int C, int H, int W>
|
||||
void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm,
|
||||
const BBox& bbox) {
|
||||
std::ofstream outfile("./" + filename, std::ofstream::binary);
|
||||
assert(!outfile.fail());
|
||||
outfile << "P6"
|
||||
<< "\n"
|
||||
<< ppm.w << " " << ppm.h << "\n"
|
||||
<< ppm.max << "\n";
|
||||
|
||||
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
|
||||
const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
|
||||
const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
|
||||
const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
|
||||
const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
|
||||
|
||||
for (int x = x1; x <= x2; ++x) {
|
||||
// bbox top border
|
||||
ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
|
||||
ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
|
||||
ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
|
||||
// bbox bottom border
|
||||
ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
|
||||
ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
|
||||
ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
|
||||
}
|
||||
|
||||
for (int y = y1; y <= y2; ++y) {
|
||||
// bbox left border
|
||||
ppm.buffer[(y * ppm.w + x1) * 3] = 255;
|
||||
ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
|
||||
ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
|
||||
// bbox right border
|
||||
ppm.buffer[(y * ppm.w + x2) * 3] = 255;
|
||||
ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
|
||||
ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
|
||||
}
|
||||
|
||||
outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
|
||||
}
|
||||
|
||||
inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm,
|
||||
std::vector<BBox>& dets) {
|
||||
std::ofstream outfile("./" + filename, std::ofstream::binary);
|
||||
assert(!outfile.fail());
|
||||
outfile << "P6"
|
||||
<< "\n"
|
||||
<< ppm.w << " " << ppm.h << "\n"
|
||||
<< ppm.max << "\n";
|
||||
auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); };
|
||||
|
||||
for (auto bbox : dets) {
|
||||
for (int x = int(bbox.x1); x < int(bbox.x2); ++x) {
|
||||
// bbox top border
|
||||
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
|
||||
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
|
||||
ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
|
||||
// bbox bottom border
|
||||
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
|
||||
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
|
||||
ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
|
||||
}
|
||||
|
||||
for (int y = int(bbox.y1); y < int(bbox.y2); ++y) {
|
||||
// bbox left border
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
|
||||
// bbox right border
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
|
||||
ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
|
||||
}
|
||||
|
||||
class TimerBase {
|
||||
public:
|
||||
virtual void start() {}
|
||||
virtual void stop() {}
|
||||
float microseconds() const noexcept { return mMs * 1000.f; }
|
||||
float milliseconds() const noexcept { return mMs; }
|
||||
float seconds() const noexcept { return mMs / 1000.f; }
|
||||
void reset() noexcept { mMs = 0.f; }
|
||||
|
||||
protected:
|
||||
float mMs{0.0f};
|
||||
};
|
||||
|
||||
class GpuTimer : public TimerBase {
|
||||
public:
|
||||
explicit GpuTimer(cudaStream_t stream) : mStream(stream) {
|
||||
CHECK(cudaEventCreate(&mStart));
|
||||
CHECK(cudaEventCreate(&mStop));
|
||||
}
|
||||
~GpuTimer() {
|
||||
CHECK(cudaEventDestroy(mStart));
|
||||
CHECK(cudaEventDestroy(mStop));
|
||||
}
|
||||
void start() { CHECK(cudaEventRecord(mStart, mStream)); }
|
||||
void stop() {
|
||||
CHECK(cudaEventRecord(mStop, mStream));
|
||||
float ms{0.0f};
|
||||
CHECK(cudaEventSynchronize(mStop));
|
||||
CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
|
||||
mMs += ms;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaEvent_t mStart, mStop;
|
||||
cudaStream_t mStream;
|
||||
}; // class GpuTimer
|
||||
|
||||
template <typename Clock> class CpuTimer : public TimerBase {
|
||||
public:
|
||||
using clock_type = Clock;
|
||||
|
||||
void start() { mStart = Clock::now(); }
|
||||
void stop() {
|
||||
mStop = Clock::now();
|
||||
mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::time_point<Clock> mStart, mStop;
|
||||
}; // class CpuTimer
|
||||
|
||||
using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
|
||||
|
||||
inline std::vector<std::string> splitString(std::string str,
|
||||
char delimiter = ',') {
|
||||
std::vector<std::string> splitVect;
|
||||
std::stringstream ss(str);
|
||||
std::string substr;
|
||||
|
||||
while (ss.good()) {
|
||||
getline(ss, substr, delimiter);
|
||||
splitVect.emplace_back(std::move(substr));
|
||||
}
|
||||
return splitVect;
|
||||
}
|
||||
|
||||
// Return m rounded up to nearest multiple of n
|
||||
inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; }
|
||||
|
||||
inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; }
|
||||
|
||||
inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; }
|
||||
|
||||
inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; }
|
||||
|
||||
inline void loadLibrary(const std::string& path) {
|
||||
#ifdef _MSC_VER
|
||||
void* handle = LoadLibrary(path.c_str());
|
||||
#else
|
||||
int32_t flags{RTLD_LAZY};
|
||||
#if ENABLE_ASAN
|
||||
// https://github.com/google/sanitizers/issues/89
|
||||
// asan doesn't handle module unloading correctly and there are no plans on
|
||||
// doing
|
||||
// so. In order to get proper stack traces, don't delete the shared library on
|
||||
// close so that asan can resolve the symbols correctly.
|
||||
flags |= RTLD_NODELETE;
|
||||
#endif // ENABLE_ASAN
|
||||
|
||||
void* handle = dlopen(path.c_str(), flags);
|
||||
#endif
|
||||
if (handle == nullptr) {
|
||||
#ifdef _MSC_VER
|
||||
sample::gLogError << "Could not load plugin library: " << path << std::endl;
|
||||
#else
|
||||
sample::gLogError << "Could not load plugin library: " << path
|
||||
<< ", due to: " << dlerror() << std::endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
inline int32_t getSMVersion() {
|
||||
int32_t deviceIndex = 0;
|
||||
CHECK(cudaGetDevice(&deviceIndex));
|
||||
|
||||
int32_t major, minor;
|
||||
CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
|
||||
deviceIndex));
|
||||
CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
|
||||
deviceIndex));
|
||||
|
||||
return ((major << 8) | minor);
|
||||
}
|
||||
|
||||
inline bool isSMSafe() {
|
||||
const int32_t smVersion = getSMVersion();
|
||||
return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 ||
|
||||
smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
|
||||
}
|
||||
|
||||
inline bool isDataTypeSupported(DataType dataType) {
|
||||
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
|
||||
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
|
||||
if (!builder) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) ||
|
||||
(dataType == DataType::kHALF && !builder->platformHasFastFp16())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
|
||||
os << "(";
|
||||
for (int i = 0; i < dims.nbDims; ++i) {
|
||||
os << (i ? ", " : "") << dims.d[i];
|
||||
}
|
||||
return os << ")";
|
||||
}
|
||||
|
||||
#endif // TENSORRT_COMMON_H
|
||||
223
fastdeploy/backends/tensorrt/common/getOptions.cpp
Normal file
223
fastdeploy/backends/tensorrt/common/getOptions.cpp
Normal file
@@ -0,0 +1,223 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "getOptions.h"
|
||||
#include "logger.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
namespace nvinfer1 {
|
||||
namespace utility {
|
||||
|
||||
//! Matching for TRTOptions is defined as follows:
|
||||
//!
|
||||
//! If A and B both have longName set, A matches B if and only if A.longName ==
|
||||
//! B.longName and (A.shortName == B.shortName if both have short name set).
|
||||
//!
|
||||
//! If A only has shortName set and B only has longName set, then A does not
|
||||
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
|
||||
//! the definition of a TRTOption in the input to getOptions. As such, if the
|
||||
//! definition only has shortName set, it will never be equal to a TRTOption
|
||||
//! that does not have shortName set (and same for longName).
|
||||
//!
|
||||
//! If A and B both have shortName set but B does not have longName set, A
|
||||
//! matches B if and only if A.shortName == B.shortName.
|
||||
//!
|
||||
//! If A has neither long or short name set, A matches B if and only if B has
|
||||
//! neither long or short name set.
|
||||
bool matches(const TRTOption& a, const TRTOption& b) {
|
||||
if (!a.longName.empty() && !b.longName.empty()) {
|
||||
if (a.shortName && b.shortName) {
|
||||
return (a.longName == b.longName) && (a.shortName == b.shortName);
|
||||
}
|
||||
return a.longName == b.longName;
|
||||
}
|
||||
|
||||
// If only one of them is not set, this will return false anyway.
|
||||
return a.shortName == b.shortName;
|
||||
}
|
||||
|
||||
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
|
||||
//! TRTOptions, -1 if not found.
|
||||
int getTRTOptionIndex(const std::vector<TRTOption>& options,
|
||||
const TRTOption& opt) {
|
||||
for (size_t i = 0; i < options.size(); ++i) {
|
||||
if (matches(opt, options[i])) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
//! validateTRTOption will return a string containing an error message if
|
||||
//! options
|
||||
//! contain non-numeric characters, or if there are duplicate option names
|
||||
//! found.
|
||||
//! Otherwise, returns the empty string.
|
||||
std::string validateTRTOption(const std::set<char>& seenShortNames,
|
||||
const std::set<std::string>& seenLongNames,
|
||||
const TRTOption& opt) {
|
||||
if (opt.shortName != 0) {
|
||||
if (!std::isalnum(opt.shortName)) {
|
||||
return "Short name '" + std::to_string(opt.shortName) +
|
||||
"' is non-alphanumeric";
|
||||
}
|
||||
|
||||
if (seenShortNames.find(opt.shortName) != seenShortNames.end()) {
|
||||
return "Short name '" + std::to_string(opt.shortName) +
|
||||
"' is a duplicate";
|
||||
}
|
||||
}
|
||||
|
||||
if (!opt.longName.empty()) {
|
||||
for (const char& c : opt.longName) {
|
||||
if (!std::isalnum(c) && c != '-' && c != '_') {
|
||||
return "Long name '" + opt.longName +
|
||||
"' contains characters that are not '-', '_', or alphanumeric";
|
||||
}
|
||||
}
|
||||
|
||||
if (seenLongNames.find(opt.longName) != seenLongNames.end()) {
|
||||
return "Long name '" + opt.longName + "' is a duplicate";
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
//! validateTRTOptions will return a string containing an error message if any
|
||||
//! options contain non-numeric characters, or if there are duplicate option
|
||||
//! names found. Otherwise, returns the empty string.
|
||||
std::string validateTRTOptions(const std::vector<TRTOption>& options) {
|
||||
std::set<char> seenShortNames;
|
||||
std::set<std::string> seenLongNames;
|
||||
for (size_t i = 0; i < options.size(); ++i) {
|
||||
const std::string errMsg =
|
||||
validateTRTOption(seenShortNames, seenLongNames, options[i]);
|
||||
if (!errMsg.empty()) {
|
||||
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
|
||||
}
|
||||
|
||||
seenShortNames.insert(options[i].shortName);
|
||||
seenLongNames.insert(options[i].longName);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
|
||||
//! fields set accordingly. Assumes that options is validated.
|
||||
//! ErrMsg will be set if:
|
||||
//! - an argument is null
|
||||
//! - an argument is empty
|
||||
//! - an argument does not have option (i.e. "-" and "--")
|
||||
//! - a short argument has more than 1 character
|
||||
//! - the last argument in the list requires a value
|
||||
TRTParsedArgs parseArgs(int argc, const char* const* argv,
|
||||
const std::vector<TRTOption>& options) {
|
||||
TRTParsedArgs parsedArgs;
|
||||
parsedArgs.values.resize(options.size());
|
||||
|
||||
for (int i = 1; i < argc; ++i) // index of current command-line argument
|
||||
{
|
||||
if (argv[i] == nullptr) {
|
||||
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
|
||||
}
|
||||
|
||||
const std::string argStr(argv[i]);
|
||||
if (argStr.empty()) {
|
||||
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
|
||||
}
|
||||
|
||||
// No starting hyphen means it is a positional argument
|
||||
if (argStr[0] != '-') {
|
||||
parsedArgs.positionalArgs.push_back(argStr);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argStr == "-" || argStr == "--") {
|
||||
return TRTParsedArgs{"Argument does not specify an option at index " +
|
||||
std::to_string(i)};
|
||||
}
|
||||
|
||||
// If only 1 hyphen, char after is the flag.
|
||||
TRTOption opt{' ', "", false, ""};
|
||||
std::string value;
|
||||
if (argStr[1] != '-') {
|
||||
// Must only have 1 char after the hyphen
|
||||
if (argStr.size() > 2) {
|
||||
return TRTParsedArgs{
|
||||
"Short arg contains more than 1 character at index " +
|
||||
std::to_string(i)};
|
||||
}
|
||||
opt.shortName = argStr[1];
|
||||
} else {
|
||||
opt.longName = argStr.substr(2);
|
||||
|
||||
// We need to support --foo=bar syntax, so look for '='
|
||||
const size_t eqIndex = opt.longName.find('=');
|
||||
if (eqIndex < opt.longName.size()) {
|
||||
value = opt.longName.substr(eqIndex + 1);
|
||||
opt.longName = opt.longName.substr(0, eqIndex);
|
||||
}
|
||||
}
|
||||
|
||||
const int idx = getTRTOptionIndex(options, opt);
|
||||
if (idx < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options[idx].valueRequired) {
|
||||
if (!value.empty()) {
|
||||
parsedArgs.values[idx].second.push_back(value);
|
||||
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i + 1 >= argc) {
|
||||
return TRTParsedArgs{"Last argument requires value, but none given"};
|
||||
}
|
||||
|
||||
const std::string nextArg(argv[i + 1]);
|
||||
if (nextArg.size() >= 1 && nextArg[0] == '-') {
|
||||
sample::gLogWarning << "Warning: Using '" << nextArg
|
||||
<< "' as a value for '" << argStr
|
||||
<< "', Should this be its own flag?" << std::endl;
|
||||
}
|
||||
|
||||
parsedArgs.values[idx].second.push_back(nextArg);
|
||||
i += 1; // Next argument already consumed
|
||||
|
||||
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
|
||||
} else {
|
||||
parsedArgs.values[idx].first += 1;
|
||||
}
|
||||
}
|
||||
return parsedArgs;
|
||||
}
|
||||
|
||||
TRTParsedArgs getOptions(int argc, const char* const* argv,
|
||||
const std::vector<TRTOption>& options) {
|
||||
const std::string errMsg = validateTRTOptions(options);
|
||||
if (!errMsg.empty()) {
|
||||
return TRTParsedArgs{errMsg};
|
||||
}
|
||||
return parseArgs(argc, argv, options);
|
||||
}
|
||||
} // namespace utility
|
||||
} // namespace nvinfer1
|
||||
128
fastdeploy/backends/tensorrt/common/getOptions.h
Normal file
128
fastdeploy/backends/tensorrt/common/getOptions.h
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_GET_OPTIONS_H
|
||||
#define TRT_GET_OPTIONS_H
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace nvinfer1 {
|
||||
namespace utility {
|
||||
|
||||
//! TRTOption defines a command line option. At least 1 of shortName and
|
||||
//! longName
|
||||
//! must be defined.
|
||||
//! If bool initialization is undefined behavior on your system, valueRequired
|
||||
//! must also be explicitly defined.
|
||||
//! helpText is optional.
|
||||
struct TRTOption {
|
||||
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
|
||||
std::string longName; //!< Option name in long (double hyphen) form (i.e.
|
||||
//!--foo, --bar)
|
||||
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4,
|
||||
//!--foo bar)
|
||||
std::string helpText; //!< Text to show when printing out the command usage
|
||||
};
|
||||
|
||||
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
|
||||
//! argument list (argv).
|
||||
//!
|
||||
//! errMsg is a string containing an error message if any errors occurred. If it
|
||||
//! is empty, no errors occurred.
|
||||
//!
|
||||
//! values stores a vector of pairs for each option (ordered by order in the
|
||||
//! input). Each pair contains an int (the number of occurrences) and a vector
|
||||
//! of strings (a list of values). The user should know which of these to use,
|
||||
//! and which options required values. For non-value options, only occurrences
|
||||
//! is
|
||||
//! populated. For value-required options, occurrences == # of values. Values do
|
||||
//! not need to be unique.
|
||||
//!
|
||||
//! positionalArgs stores additional arguments that are passed in without an
|
||||
//! option (these must not start with a hyphen).
|
||||
struct TRTParsedArgs {
|
||||
std::string errMsg;
|
||||
std::vector<std::pair<int, std::vector<std::string>>> values;
|
||||
std::vector<std::string> positionalArgs;
|
||||
};
|
||||
|
||||
//! Parse the input arguments passed to main() and extract options as well as
|
||||
//! positional arguments.
|
||||
//!
|
||||
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
|
||||
//!
|
||||
//! If there is a single preceding hyphen, there should be exactly 1 character
|
||||
//! after the hyphen, which is interpreted as the option.
|
||||
//!
|
||||
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
|
||||
//! is interpreted as the option.
|
||||
//!
|
||||
//! If the option requires a value, the next argument is used as the value.
|
||||
//!
|
||||
//! Positional arguments must not start with a hyphen.
|
||||
//!
|
||||
//! If an argument requires a value, the next argument is interpreted as the
|
||||
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
|
||||
//! "--bar" as a value for option "foo" if "foo" requires a value).
|
||||
//! We also support --name=value syntax. In this case, 'value' would be used as
|
||||
//! the value, NOT the next argument.
|
||||
//!
|
||||
//! For options:
|
||||
//! { { 'a', "", false },
|
||||
//! { 'b', "", false },
|
||||
//! { 0, "cee", false },
|
||||
//! { 'd', "", true },
|
||||
//! { 'e', "", true },
|
||||
//! { 'f', "foo", true } }
|
||||
//!
|
||||
//! ./main hello world -a -a --cee -d 12 -f 34
|
||||
//! and
|
||||
//! ./main hello world -a -a --cee -d 12 --foo 34
|
||||
//!
|
||||
//! will result in:
|
||||
//!
|
||||
//! TRTParsedArgs {
|
||||
//! errMsg: "",
|
||||
//! values: { { 2, {} },
|
||||
//! { 0, {} },
|
||||
//! { 1, {} },
|
||||
//! { 1, {"12"} },
|
||||
//! { 0, {} },
|
||||
//! { 1, {"34"} } }
|
||||
//! positionalArgs: {"hello", "world"},
|
||||
//! }
|
||||
//!
|
||||
//! Non-POSIX behavior:
|
||||
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
|
||||
//! option must have its own hyphen prefix.
|
||||
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
|
||||
//! whitespace-separated from the option it is for.
|
||||
//!
|
||||
//! @param[in] argc The number of arguments passed to main (including the
|
||||
//! file name, which is disregarded)
|
||||
//! @param[in] argv The arguments passed to main (including the file name,
|
||||
//! which is disregarded)
|
||||
//! @param[in] options List of TRTOptions to parse
|
||||
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
|
||||
//! the fields.
|
||||
TRTParsedArgs getOptions(int argc, const char* const* argv,
|
||||
const std::vector<TRTOption>& options);
|
||||
} // namespace utility
|
||||
} // namespace nvinfer1
|
||||
|
||||
#endif // TRT_GET_OPTIONS_H
|
||||
3787
fastdeploy/backends/tensorrt/common/half.h
Normal file
3787
fastdeploy/backends/tensorrt/common/half.h
Normal file
File diff suppressed because it is too large
Load Diff
38
fastdeploy/backends/tensorrt/common/logger.cpp
Normal file
38
fastdeploy/backends/tensorrt/common/logger.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "logger.h"
|
||||
#include "ErrorRecorder.h"
|
||||
#include "logging.h"
|
||||
|
||||
SampleErrorRecorder gRecorder;
|
||||
namespace sample {
|
||||
Logger gLogger{Logger::Severity::kINFO};
|
||||
LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
|
||||
LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
|
||||
LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
|
||||
LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
|
||||
LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
|
||||
|
||||
void setReportableSeverity(Logger::Severity severity) {
|
||||
gLogger.setReportableSeverity(severity);
|
||||
gLogVerbose.setReportableSeverity(severity);
|
||||
gLogInfo.setReportableSeverity(severity);
|
||||
gLogWarning.setReportableSeverity(severity);
|
||||
gLogError.setReportableSeverity(severity);
|
||||
gLogFatal.setReportableSeverity(severity);
|
||||
}
|
||||
} // namespace sample
|
||||
35
fastdeploy/backends/tensorrt/common/logger.h
Normal file
35
fastdeploy/backends/tensorrt/common/logger.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LOGGER_H
|
||||
#define LOGGER_H
|
||||
|
||||
#include "logging.h"
|
||||
|
||||
class SampleErrorRecorder;
|
||||
extern SampleErrorRecorder gRecorder;
|
||||
namespace sample {
|
||||
extern Logger gLogger;
|
||||
extern LogStreamConsumer gLogVerbose;
|
||||
extern LogStreamConsumer gLogInfo;
|
||||
extern LogStreamConsumer gLogWarning;
|
||||
extern LogStreamConsumer gLogError;
|
||||
extern LogStreamConsumer gLogFatal;
|
||||
|
||||
void setReportableSeverity(Logger::Severity severity);
|
||||
} // namespace sample
|
||||
|
||||
#endif // LOGGER_H
|
||||
573
fastdeploy/backends/tensorrt/common/logging.h
Normal file
573
fastdeploy/backends/tensorrt/common/logging.h
Normal file
@@ -0,0 +1,573 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_LOGGING_H
|
||||
#define TENSORRT_LOGGING_H
|
||||
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "sampleOptions.h"
|
||||
#include <cassert>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace sample {
|
||||
|
||||
using Severity = nvinfer1::ILogger::Severity;
|
||||
|
||||
class LogStreamConsumerBuffer : public std::stringbuf {
|
||||
public:
|
||||
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix,
|
||||
bool shouldLog)
|
||||
: mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {}
|
||||
|
||||
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
|
||||
: mOutput(other.mOutput), mPrefix(other.mPrefix),
|
||||
mShouldLog(other.mShouldLog) {}
|
||||
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
|
||||
LogStreamConsumerBuffer() = delete;
|
||||
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
|
||||
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
|
||||
|
||||
~LogStreamConsumerBuffer() override {
|
||||
// std::streambuf::pbase() gives a pointer to the beginning of the buffered
|
||||
// part of the output sequence
|
||||
// std::streambuf::pptr() gives a pointer to the current position of the
|
||||
// output sequence
|
||||
// if the pointer to the beginning is not equal to the pointer to the
|
||||
// current position,
|
||||
// call putOutput() to log the output to the stream
|
||||
if (pbase() != pptr()) {
|
||||
putOutput();
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! synchronizes the stream buffer and returns 0 on success
|
||||
//! synchronizing the stream buffer consists of inserting the buffer contents
|
||||
//! into the stream,
|
||||
//! resetting the buffer and flushing the stream
|
||||
//!
|
||||
int32_t sync() override {
|
||||
putOutput();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void putOutput() {
|
||||
if (mShouldLog) {
|
||||
// prepend timestamp
|
||||
std::time_t timestamp = std::time(nullptr);
|
||||
tm* tm_local = std::localtime(×tamp);
|
||||
mOutput << "[";
|
||||
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon
|
||||
<< "/";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
|
||||
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year
|
||||
<< "-";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
|
||||
// std::stringbuf::str() gets the string contents of the buffer
|
||||
// insert the buffer contents pre-appended by the appropriate prefix into
|
||||
// the stream
|
||||
mOutput << mPrefix << str();
|
||||
}
|
||||
// set the buffer to empty
|
||||
str("");
|
||||
// flush the stream
|
||||
mOutput.flush();
|
||||
}
|
||||
|
||||
void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
|
||||
|
||||
private:
|
||||
std::ostream& mOutput;
|
||||
std::string mPrefix;
|
||||
bool mShouldLog{};
|
||||
}; // class LogStreamConsumerBuffer
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumerBase
|
||||
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before
|
||||
//! std::ostream in LogStreamConsumer
|
||||
//!
|
||||
class LogStreamConsumerBase {
|
||||
public:
|
||||
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix,
|
||||
bool shouldLog)
|
||||
: mBuffer(stream, prefix, shouldLog) {}
|
||||
|
||||
protected:
|
||||
std::mutex mLogMutex;
|
||||
LogStreamConsumerBuffer mBuffer;
|
||||
}; // class LogStreamConsumerBase
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumer
|
||||
//! \brief Convenience object used to facilitate use of C++ stream syntax when
|
||||
//! logging messages.
|
||||
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
|
||||
//! This is because the LogStreamConsumerBase class is used to initialize the
|
||||
//! LogStreamConsumerBuffer member field
|
||||
//! in LogStreamConsumer and then the address of the buffer is passed to
|
||||
//! std::ostream.
|
||||
//! This is necessary to prevent the address of an uninitialized buffer from
|
||||
//! being passed to std::ostream.
|
||||
//! Please do not change the order of the parent classes.
|
||||
//!
|
||||
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream {
|
||||
public:
|
||||
//!
|
||||
//! \brief Creates a LogStreamConsumer which logs messages with level
|
||||
//! severity.
|
||||
//! Reportable severity determines if the messages are severe enough to be
|
||||
//! logged.
|
||||
//!
|
||||
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity,
|
||||
nvinfer1::ILogger::Severity severity)
|
||||
: LogStreamConsumerBase(severityOstream(severity),
|
||||
severityPrefix(severity),
|
||||
severity <= reportableSeverity),
|
||||
std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
,
|
||||
mShouldLog(severity <= reportableSeverity), mSeverity(severity) {}
|
||||
|
||||
LogStreamConsumer(LogStreamConsumer&& other) noexcept
|
||||
: LogStreamConsumerBase(severityOstream(other.mSeverity),
|
||||
severityPrefix(other.mSeverity),
|
||||
other.mShouldLog),
|
||||
std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
,
|
||||
mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {}
|
||||
LogStreamConsumer(const LogStreamConsumer& other) = delete;
|
||||
LogStreamConsumer() = delete;
|
||||
~LogStreamConsumer() = default;
|
||||
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
|
||||
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
|
||||
|
||||
void setReportableSeverity(Severity reportableSeverity) {
|
||||
mShouldLog = mSeverity <= reportableSeverity;
|
||||
mBuffer.setShouldLog(mShouldLog);
|
||||
}
|
||||
|
||||
std::mutex& getMutex() { return mLogMutex; }
|
||||
|
||||
bool getShouldLog() const { return mShouldLog; }
|
||||
|
||||
private:
|
||||
static std::ostream& severityOstream(Severity severity) {
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
static std::string severityPrefix(Severity severity) {
|
||||
switch (severity) {
|
||||
case Severity::kINTERNAL_ERROR:
|
||||
return "[F] ";
|
||||
case Severity::kERROR:
|
||||
return "[E] ";
|
||||
case Severity::kWARNING:
|
||||
return "[W] ";
|
||||
case Severity::kINFO:
|
||||
return "[I] ";
|
||||
case Severity::kVERBOSE:
|
||||
return "[V] ";
|
||||
default:
|
||||
assert(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
bool mShouldLog;
|
||||
Severity mSeverity;
|
||||
}; // class LogStreamConsumer
|
||||
|
||||
template <typename T>
|
||||
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) {
|
||||
if (logger.getShouldLog()) {
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
os << obj;
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
//!
|
||||
//! Special handling std::endl
|
||||
//!
|
||||
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
|
||||
std::ostream& (*f)(std::ostream&)) {
|
||||
if (logger.getShouldLog()) {
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
os << f;
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger,
|
||||
const nvinfer1::Dims& dims) {
|
||||
if (logger.getShouldLog()) {
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i) {
|
||||
os << (i ? "x" : "") << dims.d[i];
|
||||
}
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \class Logger
|
||||
//!
|
||||
//! \brief Class which manages logging of TensorRT tools and samples
|
||||
//!
|
||||
//! \details This class provides a common interface for TensorRT tools and
|
||||
//! samples to log information to the console,
|
||||
//! and supports logging two types of messages:
|
||||
//!
|
||||
//! - Debugging messages with an associated severity (info, warning, error, or
|
||||
//! internal error/fatal)
|
||||
//! - Test pass/fail messages
|
||||
//!
|
||||
//! The advantage of having all samples use this class for logging as opposed to
|
||||
//! emitting directly to stdout/stderr is
|
||||
//! that the logic for controlling the verbosity and formatting of sample output
|
||||
//! is centralized in one location.
|
||||
//!
|
||||
//! In the future, this class could be extended to support dumping test results
|
||||
//! to a file in some standard format
|
||||
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the
|
||||
//! duration of a test run).
|
||||
//!
|
||||
//! TODO: For backwards compatibility with existing samples, this class inherits
|
||||
//! directly from the nvinfer1::ILogger
|
||||
//! interface, which is problematic since there isn't a clean separation between
|
||||
//! messages coming from the TensorRT
|
||||
//! library and messages coming from the sample.
|
||||
//!
|
||||
//! In the future (once all samples are updated to use Logger::getTRTLogger() to
|
||||
//! access the ILogger) we can refactor the
|
||||
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger
|
||||
//! implementation a member of the Logger
|
||||
//! object.
|
||||
//!
|
||||
class Logger : public nvinfer1::ILogger {
|
||||
public:
|
||||
explicit Logger(Severity severity = Severity::kWARNING)
|
||||
: mReportableSeverity(severity) {}
|
||||
|
||||
//!
|
||||
//! \enum TestResult
|
||||
//! \brief Represents the state of a given test
|
||||
//!
|
||||
enum class TestResult {
|
||||
kRUNNING, //!< The test is running
|
||||
kPASSED, //!< The test passed
|
||||
kFAILED, //!< The test failed
|
||||
kWAIVED //!< The test was waived
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger
|
||||
//! associated with this Logger
|
||||
//! \return The nvinfer1::ILogger associated with this Logger
|
||||
//!
|
||||
//! TODO Once all samples are updated to use this method to register the
|
||||
//! logger with TensorRT,
|
||||
//! we can eliminate the inheritance of Logger from ILogger
|
||||
//!
|
||||
nvinfer1::ILogger& getTRTLogger() noexcept { return *this; }
|
||||
|
||||
//!
|
||||
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
|
||||
//!
|
||||
//! Note samples should not be calling this function directly; it will
|
||||
//! eventually go away once we eliminate the
|
||||
//! inheritance from nvinfer1::ILogger
|
||||
//!
|
||||
void log(Severity severity, const char* msg) noexcept override {
|
||||
LogStreamConsumer(mReportableSeverity, severity)
|
||||
<< "[TRT] " << std::string(msg) << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Method for controlling the verbosity of logging output
|
||||
//!
|
||||
//! \param severity The logger will only emit messages that have severity of
|
||||
//! this level or higher.
|
||||
//!
|
||||
void setReportableSeverity(Severity severity) noexcept {
|
||||
mReportableSeverity = severity;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Opaque handle that holds logging information for a particular test
|
||||
//!
|
||||
//! This object is an opaque handle to information used by the Logger to print
|
||||
//! test results.
|
||||
//! The sample must call Logger::defineTest() in order to obtain a TestAtom
|
||||
//! that can be used
|
||||
//! with Logger::reportTest{Start,End}().
|
||||
//!
|
||||
class TestAtom {
|
||||
public:
|
||||
TestAtom(TestAtom&&) = default;
|
||||
|
||||
private:
|
||||
friend class Logger;
|
||||
|
||||
TestAtom(bool started, const std::string& name, const std::string& cmdline)
|
||||
: mStarted(started), mName(name), mCmdline(cmdline) {}
|
||||
|
||||
bool mStarted;
|
||||
std::string mName;
|
||||
std::string mCmdline;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Define a test for logging
|
||||
//!
|
||||
//! \param[in] name The name of the test. This should be a string starting
|
||||
//! with
|
||||
//! "TensorRT" and containing dot-separated strings
|
||||
//! containing
|
||||
//! the characters [A-Za-z0-9_].
|
||||
//! For example, "TensorRT.sample_googlenet"
|
||||
//! \param[in] cmdline The command line used to reproduce the test
|
||||
//
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
//!
|
||||
static TestAtom defineTest(const std::string& name,
|
||||
const std::string& cmdline) {
|
||||
return TestAtom(false, name, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief A convenience overloaded version of defineTest() that accepts an
|
||||
//! array of command-line arguments
|
||||
//! as input
|
||||
//!
|
||||
//! \param[in] name The name of the test
|
||||
//! \param[in] argc The number of command-line arguments
|
||||
//! \param[in] argv The array of command-line arguments (given as C strings)
|
||||
//!
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
//!
|
||||
static TestAtom defineTest(const std::string& name, int32_t argc,
|
||||
char const* const* argv) {
|
||||
// Append TensorRT version as info
|
||||
const std::string vname =
|
||||
name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
|
||||
auto cmdline = genCmdlineString(argc, argv);
|
||||
return defineTest(vname, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has started.
|
||||
//!
|
||||
//! \pre reportTestStart() has not been called yet for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has started
|
||||
//!
|
||||
static void reportTestStart(TestAtom& testAtom) {
|
||||
reportTestResult(testAtom, TestResult::kRUNNING);
|
||||
assert(!testAtom.mStarted);
|
||||
testAtom.mStarted = true;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has ended.
|
||||
//!
|
||||
//! \pre reportTestStart() has been called for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has ended
|
||||
//! \param[in] result The result of the test. Should be one of
|
||||
//! TestResult::kPASSED,
|
||||
//! TestResult::kFAILED, TestResult::kWAIVED
|
||||
//!
|
||||
static void reportTestEnd(TestAtom const& testAtom, TestResult result) {
|
||||
assert(result != TestResult::kRUNNING);
|
||||
assert(testAtom.mStarted);
|
||||
reportTestResult(testAtom, result);
|
||||
}
|
||||
|
||||
static int32_t reportPass(TestAtom const& testAtom) {
|
||||
reportTestEnd(testAtom, TestResult::kPASSED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t reportFail(TestAtom const& testAtom) {
|
||||
reportTestEnd(testAtom, TestResult::kFAILED);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
static int32_t reportWaive(TestAtom const& testAtom) {
|
||||
reportTestEnd(testAtom, TestResult::kWAIVED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t reportTest(TestAtom const& testAtom, bool pass) {
|
||||
return pass ? reportPass(testAtom) : reportFail(testAtom);
|
||||
}
|
||||
|
||||
Severity getReportableSeverity() const { return mReportableSeverity; }
|
||||
|
||||
private:
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a log message with the
|
||||
//! given severity
|
||||
//!
|
||||
static const char* severityPrefix(Severity severity) {
|
||||
switch (severity) {
|
||||
case Severity::kINTERNAL_ERROR:
|
||||
return "[F] ";
|
||||
case Severity::kERROR:
|
||||
return "[E] ";
|
||||
case Severity::kWARNING:
|
||||
return "[W] ";
|
||||
case Severity::kINFO:
|
||||
return "[I] ";
|
||||
case Severity::kVERBOSE:
|
||||
return "[V] ";
|
||||
default:
|
||||
assert(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a test result message
|
||||
//! with the given result
|
||||
//!
|
||||
static const char* testResultString(TestResult result) {
|
||||
switch (result) {
|
||||
case TestResult::kRUNNING:
|
||||
return "RUNNING";
|
||||
case TestResult::kPASSED:
|
||||
return "PASSED";
|
||||
case TestResult::kFAILED:
|
||||
return "FAILED";
|
||||
case TestResult::kWAIVED:
|
||||
return "WAIVED";
|
||||
default:
|
||||
assert(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate output stream (cout or cerr) to use with the
|
||||
//! given severity
|
||||
//!
|
||||
static std::ostream& severityOstream(Severity severity) {
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief method that implements logging test results
|
||||
//!
|
||||
static void reportTestResult(TestAtom const& testAtom, TestResult result) {
|
||||
severityOstream(Severity::kINFO)
|
||||
<< "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
|
||||
<< testAtom.mCmdline << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief generate a command line string from the given (argc, argv) values
|
||||
//!
|
||||
static std::string genCmdlineString(int32_t argc, char const* const* argv) {
|
||||
std::stringstream ss;
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
if (i > 0) {
|
||||
ss << " ";
|
||||
}
|
||||
ss << argv[i];
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Severity mReportableSeverity;
|
||||
}; // class Logger
|
||||
|
||||
namespace {
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages
|
||||
//! of severity kVERBOSE
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) {
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages
|
||||
//! of severity kINFO
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_INFO(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_INFO(const Logger& logger) {
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages
|
||||
//! of severity kWARNING
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_WARN(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_WARN(const Logger& logger) {
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages
|
||||
//! of severity kERROR
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_ERROR(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_ERROR(const Logger& logger) {
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages
|
||||
//! of severity kINTERNAL_ERROR
|
||||
//! ("fatal" severity)
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_FATAL(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_FATAL(const Logger& logger) {
|
||||
return LogStreamConsumer(logger.getReportableSeverity(),
|
||||
Severity::kINTERNAL_ERROR);
|
||||
}
|
||||
} // anonymous namespace
|
||||
} // namespace sample
|
||||
#endif // TENSORRT_LOGGING_H
|
||||
126
fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
Normal file
126
fastdeploy/backends/tensorrt/common/parserOnnxConfig.h
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PARSER_ONNX_CONFIG_H
|
||||
#define PARSER_ONNX_CONFIG_H
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvOnnxConfig.h"
|
||||
#include "NvOnnxParser.h"
|
||||
|
||||
#define ONNX_DEBUG 1
|
||||
|
||||
/**
|
||||
* \class ParserOnnxConfig
|
||||
* \brief Configuration Manager Class Concrete Implementation
|
||||
*
|
||||
* \note:
|
||||
*
|
||||
*/
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig {
|
||||
protected:
|
||||
string mModelFilename{};
|
||||
string mTextFilename{};
|
||||
string mFullTextFilename{};
|
||||
nvinfer1::DataType mModelDtype;
|
||||
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
|
||||
bool mPrintLayercInfo;
|
||||
|
||||
public:
|
||||
ParserOnnxConfig()
|
||||
: mModelDtype(nvinfer1::DataType::kFLOAT),
|
||||
mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)),
|
||||
mPrintLayercInfo(false) {
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug()) {
|
||||
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
~ParserOnnxConfig() {
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug()) {
|
||||
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept {
|
||||
mModelDtype = modelDtype;
|
||||
}
|
||||
|
||||
virtual nvinfer1::DataType getModelDtype() const noexcept {
|
||||
return mModelDtype;
|
||||
}
|
||||
|
||||
virtual const char* getModelFileName() const noexcept {
|
||||
return mModelFilename.c_str();
|
||||
}
|
||||
virtual void setModelFileName(const char* onnxFilename) noexcept {
|
||||
mModelFilename = string(onnxFilename);
|
||||
}
|
||||
virtual nvonnxparser::IOnnxConfig::Verbosity
|
||||
getVerbosityLevel() const noexcept {
|
||||
return mVerbosity;
|
||||
}
|
||||
virtual void addVerbosity() noexcept { ++mVerbosity; }
|
||||
virtual void reduceVerbosity() noexcept { --mVerbosity; }
|
||||
virtual void
|
||||
setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept {
|
||||
mVerbosity = verbosity;
|
||||
}
|
||||
|
||||
virtual const char* getTextFileName() const noexcept {
|
||||
return mTextFilename.c_str();
|
||||
}
|
||||
virtual void setTextFileName(const char* textFilename) noexcept {
|
||||
mTextFilename = string(textFilename);
|
||||
}
|
||||
virtual const char* getFullTextFileName() const noexcept {
|
||||
return mFullTextFilename.c_str();
|
||||
}
|
||||
virtual void setFullTextFileName(const char* fullTextFilename) noexcept {
|
||||
mFullTextFilename = string(fullTextFilename);
|
||||
}
|
||||
virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
|
||||
virtual void setPrintLayerInfo(bool src) noexcept {
|
||||
mPrintLayercInfo = src;
|
||||
} //!< get the boolean variable corresponding to the Layer Info, see
|
||||
//! getPrintLayerInfo()
|
||||
|
||||
virtual bool isDebug() const noexcept {
|
||||
#if ONNX_DEBUG
|
||||
return (std::getenv("ONNX_DEBUG") ? true : false);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void destroy() noexcept { delete this; }
|
||||
|
||||
}; // class ParserOnnxConfig
|
||||
|
||||
#endif
|
||||
65
fastdeploy/backends/tensorrt/common/safeCommon.h
Normal file
65
fastdeploy/backends/tensorrt/common/safeCommon.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_SAFE_COMMON_H
|
||||
#define TENSORRT_SAFE_COMMON_H
|
||||
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#define CHECK(status) \
|
||||
do { \
|
||||
auto ret = (status); \
|
||||
if (ret != 0) { \
|
||||
std::cerr << "Cuda failure: " << ret << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace samplesCommon {
|
||||
template <typename T> inline std::shared_ptr<T> infer_object(T* obj) {
|
||||
if (!obj) {
|
||||
throw std::runtime_error("Failed to create object");
|
||||
}
|
||||
return std::shared_ptr<T>(obj);
|
||||
}
|
||||
|
||||
inline uint32_t elementSize(nvinfer1::DataType t) {
|
||||
switch (t) {
|
||||
case nvinfer1::DataType::kINT32:
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
return 4;
|
||||
case nvinfer1::DataType::kHALF:
|
||||
return 2;
|
||||
case nvinfer1::DataType::kINT8:
|
||||
return 1;
|
||||
case nvinfer1::DataType::kBOOL:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename A, typename B> inline A divUp(A x, B n) {
|
||||
return (x + n - 1) / n;
|
||||
}
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_SAFE_COMMON_H
|
||||
251
fastdeploy/backends/tensorrt/common/sampleConfig.h
Normal file
251
fastdeploy/backends/tensorrt/common/sampleConfig.h
Normal file
@@ -0,0 +1,251 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef SampleConfig_H
|
||||
#define SampleConfig_H
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvOnnxConfig.h"
|
||||
class SampleConfig : public nvonnxparser::IOnnxConfig {
|
||||
public:
|
||||
enum class InputDataFormat : int { kASCII = 0, kPPM = 1 };
|
||||
|
||||
private:
|
||||
std::string mModelFilename;
|
||||
std::string mEngineFilename;
|
||||
std::string mTextFilename;
|
||||
std::string mFullTextFilename;
|
||||
std::string mImageFilename;
|
||||
std::string mReferenceFilename;
|
||||
std::string mOutputFilename;
|
||||
std::string mCalibrationFilename;
|
||||
std::string mTimingCacheFilename;
|
||||
int64_t mLabel{-1};
|
||||
int64_t mMaxBatchSize{32};
|
||||
int64_t mCalibBatchSize{0};
|
||||
int64_t mMaxNCalibBatch{0};
|
||||
int64_t mFirstCalibBatch{0};
|
||||
int64_t mUseDLACore{-1};
|
||||
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
|
||||
bool mTF32{true};
|
||||
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
|
||||
bool mPrintLayercInfo{false};
|
||||
bool mDebugBuilder{false};
|
||||
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
|
||||
uint64_t mTopK{0};
|
||||
float mFailurePercentage{-1.0f};
|
||||
float mTolerance{0.0f};
|
||||
float mAbsTolerance{1e-5f};
|
||||
|
||||
public:
|
||||
SampleConfig() {
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug()) {
|
||||
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
~SampleConfig() {
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug()) {
|
||||
std::cout << "SampleConfig::dtor(): " << this << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
void setModelDtype(const nvinfer1::DataType mdt) noexcept {
|
||||
mModelDtype = mdt;
|
||||
}
|
||||
|
||||
nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; }
|
||||
|
||||
bool getTF32() const noexcept { return mTF32; }
|
||||
|
||||
void setTF32(bool enabled) noexcept { mTF32 = enabled; }
|
||||
|
||||
const char* getModelFileName() const noexcept {
|
||||
return mModelFilename.c_str();
|
||||
}
|
||||
|
||||
void setModelFileName(const char* onnxFilename) noexcept {
|
||||
mModelFilename = std::string(onnxFilename);
|
||||
}
|
||||
Verbosity getVerbosityLevel() const noexcept { return mVerbosity; }
|
||||
void addVerbosity() noexcept { ++mVerbosity; }
|
||||
void reduceVerbosity() noexcept { --mVerbosity; }
|
||||
virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; }
|
||||
const char* getEngineFileName() const noexcept {
|
||||
return mEngineFilename.c_str();
|
||||
}
|
||||
void setEngineFileName(const char* engineFilename) noexcept {
|
||||
mEngineFilename = std::string(engineFilename);
|
||||
}
|
||||
const char* getTextFileName() const noexcept { return mTextFilename.c_str(); }
|
||||
void setTextFileName(const char* textFilename) noexcept {
|
||||
mTextFilename = std::string(textFilename);
|
||||
}
|
||||
const char* getFullTextFileName() const noexcept {
|
||||
return mFullTextFilename.c_str();
|
||||
}
|
||||
void setFullTextFileName(const char* fullTextFilename) noexcept {
|
||||
mFullTextFilename = std::string(fullTextFilename);
|
||||
}
|
||||
void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label
|
||||
|
||||
int64_t getLabel() const noexcept { return mLabel; } //!< get the Label
|
||||
|
||||
bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; }
|
||||
|
||||
void setPrintLayerInfo(bool b) noexcept {
|
||||
mPrintLayercInfo = b;
|
||||
} //!< get the boolean variable corresponding to the Layer Info, see
|
||||
//! getPrintLayerInfo()
|
||||
|
||||
void setMaxBatchSize(int64_t maxBatchSize) noexcept {
|
||||
mMaxBatchSize = maxBatchSize;
|
||||
} //!< set the Max Batch Size
|
||||
int64_t getMaxBatchSize() const noexcept {
|
||||
return mMaxBatchSize;
|
||||
} //!< get the Max Batch Size
|
||||
|
||||
void setCalibBatchSize(int64_t CalibBatchSize) noexcept {
|
||||
mCalibBatchSize = CalibBatchSize;
|
||||
} //!< set the calibration batch size
|
||||
int64_t getCalibBatchSize() const noexcept {
|
||||
return mCalibBatchSize;
|
||||
} //!< get calibration batch size
|
||||
|
||||
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept {
|
||||
mMaxNCalibBatch = MaxNCalibBatch;
|
||||
} //!< set Max Number of Calibration Batches
|
||||
int64_t getMaxNCalibBatch() const noexcept {
|
||||
return mMaxNCalibBatch;
|
||||
} //!< get the Max Number of Calibration Batches
|
||||
|
||||
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept {
|
||||
mFirstCalibBatch = FirstCalibBatch;
|
||||
} //!< set the first calibration batch
|
||||
int64_t getFirstCalibBatch() const noexcept {
|
||||
return mFirstCalibBatch;
|
||||
} //!< get the first calibration batch
|
||||
|
||||
void setUseDLACore(int64_t UseDLACore) noexcept {
|
||||
mUseDLACore = UseDLACore;
|
||||
} //!< set the DLA core to use
|
||||
int64_t getUseDLACore() const noexcept {
|
||||
return mUseDLACore;
|
||||
} //!< get the DLA core to use
|
||||
|
||||
void setDebugBuilder() noexcept {
|
||||
mDebugBuilder = true;
|
||||
} //!< enable the Debug info, while building the engine.
|
||||
bool getDebugBuilder() const noexcept {
|
||||
return mDebugBuilder;
|
||||
} //!< get the boolean variable, corresponding to the debug builder
|
||||
|
||||
const char*
|
||||
getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
|
||||
{
|
||||
return mImageFilename.c_str();
|
||||
}
|
||||
void setImageFileName(
|
||||
const char* imageFilename) noexcept //!< get the Image file name
|
||||
{
|
||||
mImageFilename = std::string(imageFilename);
|
||||
}
|
||||
const char* getReferenceFileName() const noexcept {
|
||||
return mReferenceFilename.c_str();
|
||||
}
|
||||
void setReferenceFileName(
|
||||
const char* referenceFilename) noexcept //!< set reference file name
|
||||
{
|
||||
mReferenceFilename = std::string(referenceFilename);
|
||||
}
|
||||
|
||||
void setInputDataFormat(InputDataFormat idt) noexcept {
|
||||
mInputDataFormat = idt;
|
||||
} //!< specifies expected data format of the image file (PPM or ASCII)
|
||||
InputDataFormat getInputDataFormat() const noexcept {
|
||||
return mInputDataFormat;
|
||||
} //!< returns the expected data format of the image file.
|
||||
|
||||
const char* getOutputFileName()
|
||||
const noexcept //!< specifies the file to save the results
|
||||
{
|
||||
return mOutputFilename.c_str();
|
||||
}
|
||||
void setOutputFileName(
|
||||
const char* outputFilename) noexcept //!< get the output file name
|
||||
{
|
||||
mOutputFilename = std::string(outputFilename);
|
||||
}
|
||||
|
||||
const char* getCalibrationFileName() const noexcept {
|
||||
return mCalibrationFilename.c_str();
|
||||
} //!< specifies the file containing the list of image files for int8
|
||||
//! calibration
|
||||
void setCalibrationFileName(
|
||||
const char* calibrationFilename) noexcept //!< get the int 8 calibration
|
||||
//! list file name
|
||||
{
|
||||
mCalibrationFilename = std::string(calibrationFilename);
|
||||
}
|
||||
|
||||
uint64_t getTopK() const noexcept { return mTopK; }
|
||||
void setTopK(uint64_t topK) noexcept {
|
||||
mTopK = topK;
|
||||
} //!< If this options is specified, return the K top probabilities.
|
||||
|
||||
float getFailurePercentage() const noexcept { return mFailurePercentage; }
|
||||
|
||||
void setFailurePercentage(float f) noexcept { mFailurePercentage = f; }
|
||||
|
||||
float getAbsoluteTolerance() const noexcept { return mAbsTolerance; }
|
||||
|
||||
void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; }
|
||||
|
||||
float getTolerance() const noexcept { return mTolerance; }
|
||||
|
||||
void setTolerance(float t) noexcept { mTolerance = t; }
|
||||
|
||||
const char* getTimingCacheFilename() const noexcept {
|
||||
return mTimingCacheFilename.c_str();
|
||||
}
|
||||
|
||||
void setTimingCacheFileName(const char* timingCacheFilename) noexcept {
|
||||
mTimingCacheFilename = std::string(timingCacheFilename);
|
||||
}
|
||||
|
||||
bool isDebug() const noexcept {
|
||||
#if ONNX_DEBUG
|
||||
return (std::getenv("ONNX_DEBUG") ? true : false);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void destroy() noexcept { delete this; }
|
||||
|
||||
}; // class SampleConfig
|
||||
|
||||
#endif
|
||||
397
fastdeploy/backends/tensorrt/common/sampleDevice.h
Normal file
397
fastdeploy/backends/tensorrt/common/sampleDevice.h
Normal file
@@ -0,0 +1,397 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_DEVICE_H
|
||||
#define TRT_SAMPLE_DEVICE_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
namespace sample {
|
||||
|
||||
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) {
|
||||
if (ret != cudaSuccess) {
|
||||
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
class TrtCudaEvent;
|
||||
|
||||
namespace {
|
||||
|
||||
void cudaSleep(void* sleep) {
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//!
|
||||
//! \class TrtCudaStream
|
||||
//! \brief Managed CUDA stream
|
||||
//!
|
||||
class TrtCudaStream {
|
||||
public:
|
||||
TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); }
|
||||
|
||||
TrtCudaStream(const TrtCudaStream&) = delete;
|
||||
|
||||
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
|
||||
|
||||
TrtCudaStream(TrtCudaStream&&) = delete;
|
||||
|
||||
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
|
||||
|
||||
~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); }
|
||||
|
||||
cudaStream_t get() const { return mStream; }
|
||||
|
||||
void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); }
|
||||
|
||||
void wait(TrtCudaEvent& event);
|
||||
|
||||
void sleep(float* ms) {
|
||||
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t mStream{};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class TrtCudaEvent
|
||||
//! \brief Managed CUDA event
|
||||
//!
|
||||
class TrtCudaEvent {
|
||||
public:
|
||||
explicit TrtCudaEvent(bool blocking = true) {
|
||||
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
|
||||
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
|
||||
}
|
||||
|
||||
TrtCudaEvent(const TrtCudaEvent&) = delete;
|
||||
|
||||
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
|
||||
|
||||
TrtCudaEvent(TrtCudaEvent&&) = delete;
|
||||
|
||||
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
|
||||
|
||||
~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); }
|
||||
|
||||
cudaEvent_t get() const { return mEvent; }
|
||||
|
||||
void record(const TrtCudaStream& stream) {
|
||||
cudaCheck(cudaEventRecord(mEvent, stream.get()));
|
||||
}
|
||||
|
||||
void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); }
|
||||
|
||||
// Returns time elapsed time in milliseconds
|
||||
float operator-(const TrtCudaEvent& e) const {
|
||||
float time{0};
|
||||
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
|
||||
return time;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaEvent_t mEvent{};
|
||||
};
|
||||
|
||||
inline void TrtCudaStream::wait(TrtCudaEvent& event) {
|
||||
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
|
||||
}
|
||||
|
||||
//!
|
||||
//! \class TrtCudaGraph
|
||||
//! \brief Managed CUDA graph
|
||||
//!
|
||||
class TrtCudaGraph {
|
||||
public:
|
||||
explicit TrtCudaGraph() = default;
|
||||
|
||||
TrtCudaGraph(const TrtCudaGraph&) = delete;
|
||||
|
||||
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
|
||||
|
||||
TrtCudaGraph(TrtCudaGraph&&) = delete;
|
||||
|
||||
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
|
||||
|
||||
~TrtCudaGraph() {
|
||||
if (mGraphExec) {
|
||||
cudaGraphExecDestroy(mGraphExec);
|
||||
}
|
||||
}
|
||||
|
||||
void beginCapture(TrtCudaStream& stream) {
|
||||
cudaCheck(
|
||||
cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
|
||||
}
|
||||
|
||||
bool launch(TrtCudaStream& stream) {
|
||||
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
|
||||
}
|
||||
|
||||
void endCapture(TrtCudaStream& stream) {
|
||||
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
|
||||
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
|
||||
cudaCheck(cudaGraphDestroy(mGraph));
|
||||
}
|
||||
|
||||
void endCaptureOnError(TrtCudaStream& stream) {
|
||||
// There are two possibilities why stream capture would fail:
|
||||
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
|
||||
// (2) TRT reports a failure.
|
||||
// In case (1), the returning mGraph should be nullptr.
|
||||
// In case (2), the returning mGraph is not nullptr, but it should not be
|
||||
// used.
|
||||
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
|
||||
if (ret == cudaErrorStreamCaptureInvalidated) {
|
||||
assert(mGraph == nullptr);
|
||||
} else {
|
||||
assert(ret == cudaSuccess);
|
||||
assert(mGraph != nullptr);
|
||||
cudaCheck(cudaGraphDestroy(mGraph));
|
||||
mGraph = nullptr;
|
||||
}
|
||||
// Clean up any CUDA error.
|
||||
cudaGetLastError();
|
||||
sample::gLogWarning << "The CUDA graph capture on the stream has failed."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaGraph_t mGraph{};
|
||||
cudaGraphExec_t mGraphExec{};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class TrtCudaBuffer
|
||||
//! \brief Managed buffer for host and device
|
||||
//!
|
||||
template <typename A, typename D> class TrtCudaBuffer {
|
||||
public:
|
||||
TrtCudaBuffer() = default;
|
||||
|
||||
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
|
||||
|
||||
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
|
||||
|
||||
TrtCudaBuffer(TrtCudaBuffer&& rhs) {
|
||||
reset(rhs.mPtr);
|
||||
rhs.mPtr = nullptr;
|
||||
}
|
||||
|
||||
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) {
|
||||
if (this != &rhs) {
|
||||
reset(rhs.mPtr);
|
||||
rhs.mPtr = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~TrtCudaBuffer() { reset(); }
|
||||
|
||||
TrtCudaBuffer(size_t size) { A()(&mPtr, size); }
|
||||
|
||||
void allocate(size_t size) {
|
||||
reset();
|
||||
A()(&mPtr, size);
|
||||
}
|
||||
|
||||
void reset(void* ptr = nullptr) {
|
||||
if (mPtr) {
|
||||
D()(mPtr);
|
||||
}
|
||||
mPtr = ptr;
|
||||
}
|
||||
|
||||
void* get() const { return mPtr; }
|
||||
|
||||
private:
|
||||
void* mPtr{nullptr};
|
||||
};
|
||||
|
||||
struct DeviceAllocator {
|
||||
void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); }
|
||||
};
|
||||
|
||||
struct DeviceDeallocator {
|
||||
void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); }
|
||||
};
|
||||
|
||||
struct ManagedAllocator {
|
||||
void operator()(void** ptr, size_t size) {
|
||||
cudaCheck(cudaMallocManaged(ptr, size));
|
||||
}
|
||||
};
|
||||
|
||||
struct HostAllocator {
|
||||
void operator()(void** ptr, size_t size) {
|
||||
cudaCheck(cudaMallocHost(ptr, size));
|
||||
}
|
||||
};
|
||||
|
||||
struct HostDeallocator {
|
||||
void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); }
|
||||
};
|
||||
|
||||
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
|
||||
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
|
||||
|
||||
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
|
||||
|
||||
//!
|
||||
//! \class MirroredBuffer
|
||||
//! \brief Coupled host and device buffers
|
||||
//!
|
||||
class IMirroredBuffer {
|
||||
public:
|
||||
//!
|
||||
//! Allocate memory for the mirrored buffer give the size
|
||||
//! of the allocation.
|
||||
//!
|
||||
virtual void allocate(size_t size) = 0;
|
||||
|
||||
//!
|
||||
//! Get the pointer to the device side buffer.
|
||||
//!
|
||||
//! \return pointer to device memory or nullptr if uninitialized.
|
||||
//!
|
||||
virtual void* getDeviceBuffer() const = 0;
|
||||
|
||||
//!
|
||||
//! Get the pointer to the host side buffer.
|
||||
//!
|
||||
//! \return pointer to host memory or nullptr if uninitialized.
|
||||
//!
|
||||
virtual void* getHostBuffer() const = 0;
|
||||
|
||||
//!
|
||||
//! Copy the memory from host to device.
|
||||
//!
|
||||
virtual void hostToDevice(TrtCudaStream& stream) = 0;
|
||||
|
||||
//!
|
||||
//! Copy the memory from device to host.
|
||||
//!
|
||||
virtual void deviceToHost(TrtCudaStream& stream) = 0;
|
||||
|
||||
//!
|
||||
//! Interface to get the size of the memory
|
||||
//!
|
||||
//! \return the size of memory allocated.
|
||||
//!
|
||||
virtual size_t getSize() const = 0;
|
||||
|
||||
//!
|
||||
//! Virtual destructor declaraion
|
||||
//!
|
||||
virtual ~IMirroredBuffer() = default;
|
||||
|
||||
}; // class IMirroredBuffer
|
||||
|
||||
//!
|
||||
//! Class to have a seperate memory buffer for discrete device and host
|
||||
//! allocations.
|
||||
//!
|
||||
class DiscreteMirroredBuffer : public IMirroredBuffer {
|
||||
public:
|
||||
void allocate(size_t size) {
|
||||
mSize = size;
|
||||
mHostBuffer.allocate(size);
|
||||
mDeviceBuffer.allocate(size);
|
||||
}
|
||||
|
||||
void* getDeviceBuffer() const { return mDeviceBuffer.get(); }
|
||||
|
||||
void* getHostBuffer() const { return mHostBuffer.get(); }
|
||||
|
||||
void hostToDevice(TrtCudaStream& stream) {
|
||||
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize,
|
||||
cudaMemcpyHostToDevice, stream.get()));
|
||||
}
|
||||
|
||||
void deviceToHost(TrtCudaStream& stream) {
|
||||
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize,
|
||||
cudaMemcpyDeviceToHost, stream.get()));
|
||||
}
|
||||
|
||||
size_t getSize() const { return mSize; }
|
||||
|
||||
private:
|
||||
size_t mSize{0};
|
||||
TrtHostBuffer mHostBuffer;
|
||||
TrtDeviceBuffer mDeviceBuffer;
|
||||
}; // class DiscreteMirroredBuffer
|
||||
|
||||
//!
|
||||
//! Class to have a unified memory buffer for embedded devices.
|
||||
//!
|
||||
class UnifiedMirroredBuffer : public IMirroredBuffer {
|
||||
public:
|
||||
void allocate(size_t size) {
|
||||
mSize = size;
|
||||
mBuffer.allocate(size);
|
||||
}
|
||||
|
||||
void* getDeviceBuffer() const { return mBuffer.get(); }
|
||||
|
||||
void* getHostBuffer() const { return mBuffer.get(); }
|
||||
|
||||
void hostToDevice(TrtCudaStream& stream) {
|
||||
// Does nothing since we are using unified memory.
|
||||
}
|
||||
|
||||
void deviceToHost(TrtCudaStream& stream) {
|
||||
// Does nothing since we are using unified memory.
|
||||
}
|
||||
|
||||
size_t getSize() const { return mSize; }
|
||||
|
||||
private:
|
||||
size_t mSize{0};
|
||||
TrtManagedBuffer mBuffer;
|
||||
}; // class UnifiedMirroredBuffer
|
||||
|
||||
inline void setCudaDevice(int device, std::ostream& os) {
|
||||
cudaCheck(cudaSetDevice(device));
|
||||
|
||||
cudaDeviceProp properties;
|
||||
cudaCheck(cudaGetDeviceProperties(&properties, device));
|
||||
|
||||
// clang-format off
|
||||
os << "=== Device Information ===" << std::endl;
|
||||
os << "Selected Device: " << properties.name << std::endl;
|
||||
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
|
||||
os << "SMs: " << properties.multiProcessorCount << std::endl;
|
||||
os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
|
||||
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
|
||||
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
|
||||
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
|
||||
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
|
||||
os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_DEVICE_H
|
||||
1708
fastdeploy/backends/tensorrt/common/sampleEngines.cpp
Normal file
1708
fastdeploy/backends/tensorrt/common/sampleEngines.cpp
Normal file
File diff suppressed because it is too large
Load Diff
195
fastdeploy/backends/tensorrt/common/sampleEngines.h
Normal file
195
fastdeploy/backends/tensorrt/common/sampleEngines.h
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_ENGINES_H
|
||||
#define TRT_SAMPLE_ENGINES_H
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "NvCaffeParser.h"
|
||||
#include "NvInfer.h"
|
||||
#include "NvInferConsistency.h"
|
||||
#include "NvInferSafeRuntime.h"
|
||||
#include "NvOnnxParser.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
struct Parser {
|
||||
TrtUniquePtr<nvcaffeparser1::ICaffeParser> caffeParser;
|
||||
TrtUniquePtr<nvonnxparser::IParser> onnxParser;
|
||||
|
||||
operator bool() const { return caffeParser || onnxParser; }
|
||||
};
|
||||
|
||||
struct BuildEnvironment {
|
||||
TrtUniquePtr<INetworkDefinition> network;
|
||||
//! Parser that creates the network. Must be declared *after* network, so that
|
||||
//! when
|
||||
//! ~BuildEnvironment() executes, the parser is destroyed before the network
|
||||
//! is destroyed.
|
||||
Parser parser;
|
||||
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
|
||||
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
|
||||
std::vector<uint8_t> engineBlob;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Generate a network definition for a given model
|
||||
//!
|
||||
//! \return Parser The parser used to initialize the network and that holds the
|
||||
//! weights for the network, or an invalid
|
||||
//! parser (the returned parser converts to false if tested)
|
||||
//!
|
||||
//! Constant input dimensions in the model must not be changed in the
|
||||
//! corresponding
|
||||
//! network definition, because its correctness may rely on the constants.
|
||||
//!
|
||||
//! \see Parser::operator bool()
|
||||
//!
|
||||
Parser modelToNetwork(const ModelOptions& model,
|
||||
nvinfer1::INetworkDefinition& network, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Set up network and config
|
||||
//!
|
||||
//! \return boolean Return true if network and config were successfully set
|
||||
//!
|
||||
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys,
|
||||
IBuilder& builder, INetworkDefinition& network,
|
||||
IBuilderConfig& config, std::ostream& err,
|
||||
std::vector<std::vector<char>>& sparseWeights);
|
||||
|
||||
//!
|
||||
//! \brief Log refittable layers and weights of a refittable engine
|
||||
//!
|
||||
void dumpRefittable(nvinfer1::ICudaEngine& engine);
|
||||
|
||||
//!
|
||||
//! \brief Load a serialized engine
|
||||
//!
|
||||
//! \return Pointer to the engine loaded or nullptr if the operation failed
|
||||
//!
|
||||
nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore,
|
||||
std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Save an engine into a file
|
||||
//!
|
||||
//! \return boolean Return true if the engine was successfully saved
|
||||
//!
|
||||
bool saveEngine(const nvinfer1::ICudaEngine& engine,
|
||||
const std::string& fileName, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Create an engine from model or serialized file, and optionally save
|
||||
//! engine
|
||||
//!
|
||||
//! \return Pointer to the engine created or nullptr if the creation failed
|
||||
//!
|
||||
bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build,
|
||||
const SystemOptions& sys, BuildEnvironment& env,
|
||||
std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Create an engine from model or serialized file, and optionally save
|
||||
//! engine
|
||||
//!
|
||||
//! \return Pointer to the engine created or nullptr if the creation failed
|
||||
//!
|
||||
inline TrtUniquePtr<nvinfer1::ICudaEngine> getEngine(const ModelOptions& model,
|
||||
const BuildOptions& build,
|
||||
const SystemOptions& sys,
|
||||
std::ostream& err) {
|
||||
BuildEnvironment env;
|
||||
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
|
||||
if (getEngineBuildEnv(model, build, sys, env, err)) {
|
||||
engine.swap(env.engine);
|
||||
}
|
||||
return engine;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Create a serialized network
|
||||
//!
|
||||
//! \return Pointer to a host memory for a serialized network
|
||||
//!
|
||||
IHostMemory* networkToSerialized(const BuildOptions& build,
|
||||
const SystemOptions& sys, IBuilder& builder,
|
||||
INetworkDefinition& network,
|
||||
std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Tranfer model to a serialized network
|
||||
//!
|
||||
//! \return Pointer to a host memory for a serialized network
|
||||
//!
|
||||
IHostMemory* modelToSerialized(const ModelOptions& model,
|
||||
const BuildOptions& build,
|
||||
const SystemOptions& sys, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Serialize network and save it into a file
|
||||
//!
|
||||
//! \return boolean Return true if the network was successfully serialized and
|
||||
//! saved
|
||||
//!
|
||||
bool serializeAndSave(const ModelOptions& model, const BuildOptions& build,
|
||||
const SystemOptions& sys, std::ostream& err);
|
||||
|
||||
bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine,
|
||||
bool multiThreading);
|
||||
|
||||
//!
|
||||
//! \brief Set tensor scales from a calibration table
|
||||
//!
|
||||
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network,
|
||||
const std::vector<IOFormat>& inputFormats,
|
||||
const std::vector<IOFormat>& outputFormats,
|
||||
const std::string& calibrationFile);
|
||||
|
||||
//!
|
||||
//! \brief Check if safe runtime is loaded.
|
||||
//!
|
||||
bool hasSafeRuntime();
|
||||
|
||||
//!
|
||||
//! \brief Create a safe runtime object if the dynamic library is loaded.
|
||||
//!
|
||||
nvinfer1::safe::IRuntime*
|
||||
createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Check if consistency checker is loaded.
|
||||
//!
|
||||
bool hasConsistencyChecker();
|
||||
|
||||
//!
|
||||
//! \brief Create a consistency checker object if the dynamic library is loaded.
|
||||
//!
|
||||
nvinfer1::consistency::IConsistencyChecker*
|
||||
createConsistencyChecker(nvinfer1::ILogger& logger,
|
||||
IHostMemory const* engine) noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Run consistency check on serialized engine.
|
||||
//!
|
||||
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_ENGINES_H
|
||||
943
fastdeploy/backends/tensorrt/common/sampleInference.cpp
Normal file
943
fastdeploy/backends/tensorrt/common/sampleInference.cpp
Normal file
@@ -0,0 +1,943 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <cuda_profiler_api.h>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__QNX__)
|
||||
#include <sys/neutrino.h>
|
||||
#include <sys/syspage.h>
|
||||
#endif
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "ErrorRecorder.h"
|
||||
#include "logger.h"
|
||||
#include "sampleDevice.h"
|
||||
#include "sampleEngines.h"
|
||||
#include "sampleInference.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleReporting.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
template <class MapType, class EngineType>
|
||||
bool validateTensorNames(const MapType& map, const EngineType* engine,
|
||||
const int32_t endBindingIndex) {
|
||||
// Check if the provided input tensor names match the input tensors of the
|
||||
// engine.
|
||||
// Throw an error if the provided input tensor names cannot be found because
|
||||
// it implies a potential typo.
|
||||
for (const auto& item : map) {
|
||||
bool tensorNameFound{false};
|
||||
for (int32_t b = 0; b < endBindingIndex; ++b) {
|
||||
if (engine->bindingIsInput(b) &&
|
||||
engine->getBindingName(b) == item.first) {
|
||||
tensorNameFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!tensorNameFound) {
|
||||
sample::gLogError
|
||||
<< "Cannot find input tensor with name \"" << item.first
|
||||
<< "\" in the engine bindings! "
|
||||
<< "Please make sure the input tensor names are correct."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class EngineType, class ContextType> class FillBindingClosure {
|
||||
private:
|
||||
using InputsMap = std::unordered_map<std::string, std::string>;
|
||||
using BindingsVector = std::vector<std::unique_ptr<Bindings>>;
|
||||
|
||||
EngineType const* engine;
|
||||
ContextType const* context;
|
||||
InputsMap const& inputs;
|
||||
BindingsVector& bindings;
|
||||
int32_t batch;
|
||||
int32_t endBindingIndex;
|
||||
|
||||
void fillOneBinding(int32_t bindingIndex, int64_t vol) {
|
||||
auto const dims = getDims(bindingIndex);
|
||||
auto const name = engine->getBindingName(bindingIndex);
|
||||
auto const isInput = engine->bindingIsInput(bindingIndex);
|
||||
auto const dataType = engine->getBindingDataType(bindingIndex);
|
||||
auto const* bindingInOutStr = isInput ? "input" : "output";
|
||||
for (auto& binding : bindings) {
|
||||
const auto input = inputs.find(name);
|
||||
if (isInput && input != inputs.end()) {
|
||||
sample::gLogInfo << "Using values loaded from " << input->second
|
||||
<< " for input " << name << std::endl;
|
||||
binding->addBinding(bindingIndex, name, isInput, vol, dataType,
|
||||
input->second);
|
||||
} else {
|
||||
sample::gLogInfo << "Using random values for " << bindingInOutStr << " "
|
||||
<< name << std::endl;
|
||||
binding->addBinding(bindingIndex, name, isInput, vol, dataType);
|
||||
}
|
||||
sample::gLogInfo << "Created " << bindingInOutStr << " binding for "
|
||||
<< name << " with dimensions " << dims << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
bool fillAllBindings(int32_t batch, int32_t endBindingIndex) {
|
||||
if (!validateTensorNames(inputs, engine, endBindingIndex)) {
|
||||
sample::gLogError << "Invalid tensor names found in --loadInputs flag."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int32_t b = 0; b < endBindingIndex; b++) {
|
||||
auto const dims = getDims(b);
|
||||
auto const comps = engine->getBindingComponentsPerElement(b);
|
||||
auto const strides = context->getStrides(b);
|
||||
int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b);
|
||||
auto const vol = volume(dims, strides, vectorDimIndex, comps, batch);
|
||||
fillOneBinding(b, vol);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Dims getDims(int32_t bindingIndex);
|
||||
|
||||
public:
|
||||
FillBindingClosure(EngineType const* _engine, ContextType const* _context,
|
||||
InputsMap const& _inputs, BindingsVector& _bindings,
|
||||
int32_t _batch, int32_t _endBindingIndex)
|
||||
: engine(_engine), context(_context), inputs(_inputs),
|
||||
bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {}
|
||||
|
||||
bool operator()() { return fillAllBindings(batch, endBindingIndex); }
|
||||
};
|
||||
|
||||
template <>
|
||||
Dims FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>::
|
||||
getDims(int32_t bindingIndex) {
|
||||
return context->getBindingDimensions(bindingIndex);
|
||||
}
|
||||
|
||||
template <>
|
||||
Dims FillBindingClosure<
|
||||
nvinfer1::safe::ICudaEngine,
|
||||
nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) {
|
||||
return engine->getBindingDimensions(bindingIndex);
|
||||
}
|
||||
|
||||
bool setUpInference(InferenceEnvironment& iEnv,
|
||||
const InferenceOptions& inference) {
|
||||
int32_t device{};
|
||||
cudaCheck(cudaGetDevice(&device));
|
||||
|
||||
cudaDeviceProp properties;
|
||||
cudaCheck(cudaGetDeviceProperties(&properties, device));
|
||||
// Use managed memory on integrated devices when transfers are skipped
|
||||
// and when it is explicitly requested on the commandline.
|
||||
bool useManagedMemory{(inference.skipTransfers && properties.integrated) ||
|
||||
inference.useManaged};
|
||||
using FillSafeBindings =
|
||||
FillBindingClosure<nvinfer1::safe::ICudaEngine,
|
||||
nvinfer1::safe::IExecutionContext>;
|
||||
if (iEnv.safe) {
|
||||
ASSERT(sample::hasSafeRuntime());
|
||||
auto* safeEngine = iEnv.safeEngine.get();
|
||||
for (int32_t s = 0; s < inference.streams; ++s) {
|
||||
iEnv.safeContext.emplace_back(safeEngine->createExecutionContext());
|
||||
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
|
||||
}
|
||||
const int32_t nBindings = safeEngine->getNbBindings();
|
||||
auto const* safeContext = iEnv.safeContext.front().get();
|
||||
// batch is set to 1 because safety only support explicit batch.
|
||||
return FillSafeBindings(iEnv.safeEngine.get(), safeContext,
|
||||
inference.inputs, iEnv.bindings, 1, nBindings)();
|
||||
}
|
||||
|
||||
using FillStdBindings =
|
||||
FillBindingClosure<nvinfer1::ICudaEngine, nvinfer1::IExecutionContext>;
|
||||
|
||||
for (int32_t s = 0; s < inference.streams; ++s) {
|
||||
auto ec = iEnv.engine->createExecutionContext();
|
||||
if (ec == nullptr) {
|
||||
sample::gLogError << "Unable to create execution context for stream " << s
|
||||
<< "." << std::endl;
|
||||
return false;
|
||||
}
|
||||
iEnv.context.emplace_back(ec);
|
||||
iEnv.bindings.emplace_back(new Bindings(useManagedMemory));
|
||||
}
|
||||
if (iEnv.profiler) {
|
||||
iEnv.context.front()->setProfiler(iEnv.profiler.get());
|
||||
// Always run reportToProfiler() after enqueue launch
|
||||
iEnv.context.front()->setEnqueueEmitsProfile(false);
|
||||
}
|
||||
|
||||
const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles();
|
||||
const int32_t nBindings = iEnv.engine->getNbBindings();
|
||||
const int32_t bindingsInProfile =
|
||||
nOptProfiles > 0 ? nBindings / nOptProfiles : 0;
|
||||
const int32_t endBindingIndex =
|
||||
bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings();
|
||||
|
||||
if (nOptProfiles > 1) {
|
||||
sample::gLogWarning << "Multiple profiles are currently not supported. "
|
||||
"Running with one profile."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// Make sure that the tensor names provided in command-line args actually
|
||||
// exist in any of the engine bindings
|
||||
// to avoid silent typos.
|
||||
if (!validateTensorNames(inference.shapes, iEnv.engine.get(),
|
||||
endBindingIndex)) {
|
||||
sample::gLogError << "Invalid tensor names found in --shapes flag."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set all input dimensions before all bindings can be allocated
|
||||
for (int32_t b = 0; b < endBindingIndex; ++b) {
|
||||
if (iEnv.engine->bindingIsInput(b)) {
|
||||
auto dims = iEnv.context.front()->getBindingDimensions(b);
|
||||
const bool isScalar = dims.nbDims == 0;
|
||||
const bool isDynamicInput =
|
||||
std::any_of(dims.d, dims.d + dims.nbDims,
|
||||
[](int32_t dim) { return dim == -1; }) ||
|
||||
iEnv.engine->isShapeBinding(b);
|
||||
if (isDynamicInput) {
|
||||
auto shape = inference.shapes.find(iEnv.engine->getBindingName(b));
|
||||
|
||||
std::vector<int32_t> staticDims;
|
||||
if (shape == inference.shapes.end()) {
|
||||
// If no shape is provided, set dynamic dimensions to 1.
|
||||
constexpr int32_t DEFAULT_DIMENSION = 1;
|
||||
if (iEnv.engine->isShapeBinding(b)) {
|
||||
if (isScalar) {
|
||||
staticDims.push_back(1);
|
||||
} else {
|
||||
staticDims.resize(dims.d[0]);
|
||||
std::fill(staticDims.begin(), staticDims.end(),
|
||||
DEFAULT_DIMENSION);
|
||||
}
|
||||
} else {
|
||||
staticDims.resize(dims.nbDims);
|
||||
std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
|
||||
[&](int32_t dimension) {
|
||||
return dimension >= 0 ? dimension
|
||||
: DEFAULT_DIMENSION;
|
||||
});
|
||||
}
|
||||
sample::gLogWarning << "Dynamic dimensions required for input: "
|
||||
<< iEnv.engine->getBindingName(b)
|
||||
<< ", but no shapes were provided. Automatically "
|
||||
"overriding shape to: "
|
||||
<< staticDims << std::endl;
|
||||
} else if (inference.inputs.count(shape->first) &&
|
||||
iEnv.engine->isShapeBinding(b)) {
|
||||
if (isScalar || dims.nbDims == 1) {
|
||||
// Load shape tensor from file.
|
||||
size_t const size = isScalar ? 1 : dims.d[0];
|
||||
staticDims.resize(size);
|
||||
auto const& filename = inference.inputs.at(shape->first);
|
||||
auto dst = reinterpret_cast<char*>(staticDims.data());
|
||||
loadFromFile(filename, dst,
|
||||
size * sizeof(decltype(staticDims)::value_type));
|
||||
} else {
|
||||
sample::gLogWarning << "Cannot load shape tensor " << shape->first
|
||||
<< " from file, "
|
||||
<< "ND-Shape isn't supported yet" << std::endl;
|
||||
// Fallback
|
||||
staticDims = shape->second;
|
||||
}
|
||||
} else {
|
||||
staticDims = shape->second;
|
||||
}
|
||||
|
||||
for (auto& c : iEnv.context) {
|
||||
if (iEnv.engine->isShapeBinding(b)) {
|
||||
if (!c->setInputShapeBinding(b, staticDims.data())) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!c->setBindingDimensions(b, toDims(staticDims))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto* engine = iEnv.engine.get();
|
||||
auto const* context = iEnv.context.front().get();
|
||||
int32_t const batch =
|
||||
engine->hasImplicitBatchDimension() ? inference.batch : 1;
|
||||
return FillStdBindings(engine, context, inference.inputs, iEnv.bindings,
|
||||
batch, endBindingIndex)();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(__QNX__)
|
||||
using TimePoint = double;
|
||||
#else
|
||||
using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
|
||||
#endif
|
||||
|
||||
TimePoint getCurrentTime() {
|
||||
#if defined(__QNX__)
|
||||
uint64_t const currentCycles = ClockCycles();
|
||||
uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
|
||||
// Return current timestamp in ms.
|
||||
return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
|
||||
#else
|
||||
return std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
}
|
||||
|
||||
//!
|
||||
//! \struct SyncStruct
|
||||
//! \brief Threads synchronization structure
|
||||
//!
|
||||
struct SyncStruct {
|
||||
std::mutex mutex;
|
||||
TrtCudaStream mainStream;
|
||||
TrtCudaEvent gpuStart{cudaEventBlockingSync};
|
||||
TimePoint cpuStart{};
|
||||
float sleep{};
|
||||
};
|
||||
|
||||
struct Enqueue {
|
||||
explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers)
|
||||
: mContext(context), mBuffers(buffers) {}
|
||||
|
||||
nvinfer1::IExecutionContext& mContext;
|
||||
void** mBuffers{};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class EnqueueImplicit
|
||||
//! \brief Functor to enqueue inference with implict batch
|
||||
//!
|
||||
class EnqueueImplicit : private Enqueue {
|
||||
public:
|
||||
explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers,
|
||||
int32_t batch)
|
||||
: Enqueue(context, buffers), mBatch(batch) {}
|
||||
|
||||
bool operator()(TrtCudaStream& stream) const {
|
||||
if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) {
|
||||
// Collecting layer timing info from current profile index of execution
|
||||
// context
|
||||
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
|
||||
!mContext.reportToProfiler()) {
|
||||
gLogWarning
|
||||
<< "Failed to collect layer timing info from previous enqueue()"
|
||||
<< std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
int32_t mBatch;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class EnqueueExplicit
|
||||
//! \brief Functor to enqueue inference with explict batch
|
||||
//!
|
||||
class EnqueueExplicit : private Enqueue {
|
||||
public:
|
||||
explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers)
|
||||
: Enqueue(context, buffers) {}
|
||||
|
||||
bool operator()(TrtCudaStream& stream) const {
|
||||
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
|
||||
// Collecting layer timing info from current profile index of execution
|
||||
// context
|
||||
if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() &&
|
||||
!mContext.reportToProfiler()) {
|
||||
gLogWarning
|
||||
<< "Failed to collect layer timing info from previous enqueueV2()"
|
||||
<< std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class EnqueueGraph
|
||||
//! \brief Functor to enqueue inference from CUDA Graph
|
||||
//!
|
||||
class EnqueueGraph {
|
||||
public:
|
||||
explicit EnqueueGraph(nvinfer1::IExecutionContext& context,
|
||||
TrtCudaGraph& graph)
|
||||
: mGraph(graph), mContext(context) {}
|
||||
|
||||
bool operator()(TrtCudaStream& stream) const {
|
||||
if (mGraph.launch(stream)) {
|
||||
// Collecting layer timing info from current profile index of execution
|
||||
// context
|
||||
if (mContext.getProfiler() && !mContext.reportToProfiler()) {
|
||||
gLogWarning << "Failed to collect layer timing info from previous CUDA "
|
||||
"graph launch"
|
||||
<< std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
TrtCudaGraph& mGraph;
|
||||
nvinfer1::IExecutionContext& mContext;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class EnqueueSafe
|
||||
//! \brief Functor to enqueue safe execution context
|
||||
//!
|
||||
class EnqueueSafe {
|
||||
public:
|
||||
explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context,
|
||||
void** buffers)
|
||||
: mContext(context), mBuffers(buffers) {}
|
||||
|
||||
bool operator()(TrtCudaStream& stream) const {
|
||||
if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
nvinfer1::safe::IExecutionContext& mContext;
|
||||
void** mBuffers{};
|
||||
};
|
||||
|
||||
using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
|
||||
|
||||
enum class StreamType : int32_t {
|
||||
kINPUT = 0,
|
||||
kCOMPUTE = 1,
|
||||
kOUTPUT = 2,
|
||||
kNUM = 3
|
||||
};
|
||||
|
||||
enum class EventType : int32_t {
|
||||
kINPUT_S = 0,
|
||||
kINPUT_E = 1,
|
||||
kCOMPUTE_S = 2,
|
||||
kCOMPUTE_E = 3,
|
||||
kOUTPUT_S = 4,
|
||||
kOUTPUT_E = 5,
|
||||
kNUM = 6
|
||||
};
|
||||
|
||||
using MultiStream =
|
||||
std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
|
||||
|
||||
using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>,
|
||||
static_cast<int32_t>(EventType::kNUM)>;
|
||||
|
||||
using EnqueueTimes = std::array<TimePoint, 2>;
|
||||
|
||||
//!
|
||||
//! \class Iteration
|
||||
//! \brief Inference iteration and streams management
|
||||
//!
|
||||
template <class ContextType> class Iteration {
|
||||
public:
|
||||
Iteration(int32_t id, const InferenceOptions& inference, ContextType& context,
|
||||
Bindings& bindings)
|
||||
: mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap),
|
||||
mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth),
|
||||
mContext(&context) {
|
||||
for (int32_t d = 0; d < mDepth; ++d) {
|
||||
for (int32_t e = 0; e < static_cast<int32_t>(EventType::kNUM); ++e) {
|
||||
mEvents[d][e].reset(new TrtCudaEvent(!inference.spin));
|
||||
}
|
||||
}
|
||||
createEnqueueFunction(inference, context, bindings);
|
||||
}
|
||||
|
||||
bool query(bool skipTransfers) {
|
||||
if (mActive[mNext]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!skipTransfers) {
|
||||
record(EventType::kINPUT_S, StreamType::kINPUT);
|
||||
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
|
||||
record(EventType::kINPUT_E, StreamType::kINPUT);
|
||||
wait(EventType::kINPUT_E,
|
||||
StreamType::kCOMPUTE); // Wait for input DMA before compute
|
||||
}
|
||||
|
||||
record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
|
||||
recordEnqueueTime();
|
||||
if (!mEnqueue(getStream(StreamType::kCOMPUTE))) {
|
||||
return false;
|
||||
}
|
||||
recordEnqueueTime();
|
||||
record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
|
||||
|
||||
if (!skipTransfers) {
|
||||
wait(EventType::kCOMPUTE_E,
|
||||
StreamType::kOUTPUT); // Wait for compute before output DMA
|
||||
record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
|
||||
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
|
||||
record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
|
||||
}
|
||||
|
||||
mActive[mNext] = true;
|
||||
moveNext();
|
||||
return true;
|
||||
}
|
||||
|
||||
float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
|
||||
std::vector<InferenceTrace>& trace, bool skipTransfers) {
|
||||
if (mActive[mNext]) {
|
||||
if (skipTransfers) {
|
||||
getEvent(EventType::kCOMPUTE_E).synchronize();
|
||||
} else {
|
||||
getEvent(EventType::kOUTPUT_E).synchronize();
|
||||
}
|
||||
trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
|
||||
mActive[mNext] = false;
|
||||
return getEvent(EventType::kCOMPUTE_S) - gpuStart;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart,
|
||||
std::vector<InferenceTrace>& trace, bool skipTransfers) {
|
||||
for (int32_t d = 0; d < mDepth; ++d) {
|
||||
sync(cpuStart, gpuStart, trace, skipTransfers);
|
||||
moveNext();
|
||||
}
|
||||
}
|
||||
|
||||
void wait(TrtCudaEvent& gpuStart) {
|
||||
getStream(StreamType::kINPUT).wait(gpuStart);
|
||||
}
|
||||
|
||||
void setInputData() {
|
||||
mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
|
||||
}
|
||||
|
||||
void fetchOutputData() {
|
||||
mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
|
||||
}
|
||||
|
||||
private:
|
||||
void moveNext() { mNext = mDepth - 1 - mNext; }
|
||||
|
||||
TrtCudaStream& getStream(StreamType t) {
|
||||
return mStream[static_cast<int32_t>(t)];
|
||||
}
|
||||
|
||||
TrtCudaEvent& getEvent(EventType t) {
|
||||
return *mEvents[mNext][static_cast<int32_t>(t)];
|
||||
}
|
||||
|
||||
void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); }
|
||||
|
||||
void recordEnqueueTime() {
|
||||
mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
|
||||
enqueueStart = 1 - enqueueStart;
|
||||
}
|
||||
|
||||
TimePoint getEnqueueTime(bool start) {
|
||||
return mEnqueueTimes[mNext][start ? 0 : 1];
|
||||
}
|
||||
|
||||
void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); }
|
||||
|
||||
InferenceTrace getTrace(const TimePoint& cpuStart,
|
||||
const TrtCudaEvent& gpuStart, bool skipTransfers) {
|
||||
float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
|
||||
: getEvent(EventType::kINPUT_S) - gpuStart;
|
||||
float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart
|
||||
: getEvent(EventType::kINPUT_E) - gpuStart;
|
||||
float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
|
||||
: getEvent(EventType::kOUTPUT_S) - gpuStart;
|
||||
float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart
|
||||
: getEvent(EventType::kOUTPUT_E) - gpuStart;
|
||||
|
||||
return InferenceTrace(mStreamId,
|
||||
std::chrono::duration<float, std::milli>(
|
||||
getEnqueueTime(true) - cpuStart)
|
||||
.count(),
|
||||
std::chrono::duration<float, std::milli>(
|
||||
getEnqueueTime(false) - cpuStart)
|
||||
.count(),
|
||||
is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart,
|
||||
getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
|
||||
}
|
||||
|
||||
void createEnqueueFunction(const InferenceOptions& inference,
|
||||
nvinfer1::IExecutionContext& context,
|
||||
Bindings& bindings) {
|
||||
if (inference.batch) {
|
||||
mEnqueue = EnqueueFunction(EnqueueImplicit(
|
||||
context, mBindings.getDeviceBuffers(), inference.batch));
|
||||
} else {
|
||||
mEnqueue = EnqueueFunction(
|
||||
EnqueueExplicit(context, mBindings.getDeviceBuffers()));
|
||||
}
|
||||
if (inference.graph) {
|
||||
TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
|
||||
// Avoid capturing initialization calls by executing the enqueue function
|
||||
// at least
|
||||
// once before starting CUDA graph capture.
|
||||
const auto ret = mEnqueue(stream);
|
||||
assert(ret);
|
||||
stream.synchronize();
|
||||
|
||||
mGraph.beginCapture(stream);
|
||||
// The built TRT engine may contain operations that are not permitted
|
||||
// under CUDA graph capture mode.
|
||||
// When the stream is capturing, the enqueue call may return false if the
|
||||
// current CUDA graph capture fails.
|
||||
if (mEnqueue(stream)) {
|
||||
mGraph.endCapture(stream);
|
||||
mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
|
||||
} else {
|
||||
mGraph.endCaptureOnError(stream);
|
||||
// Ensure any CUDA error has been cleaned up.
|
||||
cudaCheck(cudaGetLastError());
|
||||
sample::gLogWarning << "The built TensorRT engine contains operations "
|
||||
"that are not permitted under "
|
||||
"CUDA graph capture mode."
|
||||
<< std::endl;
|
||||
sample::gLogWarning << "The specified --useCudaGraph flag has been "
|
||||
"ignored. The inference will be "
|
||||
"launched without using CUDA graph launch."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void createEnqueueFunction(const InferenceOptions&,
|
||||
nvinfer1::safe::IExecutionContext& context,
|
||||
Bindings&) {
|
||||
mEnqueue =
|
||||
EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers()));
|
||||
}
|
||||
|
||||
Bindings& mBindings;
|
||||
|
||||
TrtCudaGraph mGraph;
|
||||
EnqueueFunction mEnqueue;
|
||||
|
||||
int32_t mStreamId{0};
|
||||
int32_t mNext{0};
|
||||
int32_t mDepth{2}; // default to double buffer to hide DMA transfers
|
||||
|
||||
std::vector<bool> mActive;
|
||||
MultiStream mStream;
|
||||
std::vector<MultiEvent> mEvents;
|
||||
|
||||
int32_t enqueueStart{0};
|
||||
std::vector<EnqueueTimes> mEnqueueTimes;
|
||||
ContextType* mContext{nullptr};
|
||||
};
|
||||
|
||||
template <class ContextType>
|
||||
bool inferenceLoop(
|
||||
std::vector<std::unique_ptr<Iteration<ContextType>>>& iStreams,
|
||||
const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations,
|
||||
float maxDurationMs, float warmupMs, std::vector<InferenceTrace>& trace,
|
||||
bool skipTransfers, float idleMs) {
|
||||
float durationMs = 0;
|
||||
int32_t skip = 0;
|
||||
|
||||
for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs;
|
||||
++i) {
|
||||
for (auto& s : iStreams) {
|
||||
if (!s->query(skipTransfers)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (auto& s : iStreams) {
|
||||
durationMs = std::max(durationMs,
|
||||
s->sync(cpuStart, gpuStart, trace, skipTransfers));
|
||||
}
|
||||
if (durationMs < warmupMs) // Warming up
|
||||
{
|
||||
if (durationMs) // Skip complete iterations
|
||||
{
|
||||
++skip;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (idleMs != 0.F) {
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::duration<float, std::milli>(idleMs));
|
||||
}
|
||||
}
|
||||
for (auto& s : iStreams) {
|
||||
s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class ContextType>
|
||||
void inferenceExecution(const InferenceOptions& inference,
|
||||
InferenceEnvironment& iEnv, SyncStruct& sync,
|
||||
const int32_t threadIdx, const int32_t streamsPerThread,
|
||||
int32_t device, std::vector<InferenceTrace>& trace) {
|
||||
float warmupMs = inference.warmup;
|
||||
float durationMs = inference.duration * 1000.F + warmupMs;
|
||||
|
||||
cudaCheck(cudaSetDevice(device));
|
||||
|
||||
std::vector<std::unique_ptr<Iteration<ContextType>>> iStreams;
|
||||
|
||||
for (int32_t s = 0; s < streamsPerThread; ++s) {
|
||||
const int32_t streamId{threadIdx * streamsPerThread + s};
|
||||
auto* iteration = new Iteration<ContextType>(
|
||||
streamId, inference, *iEnv.template getContext<ContextType>(streamId),
|
||||
*iEnv.bindings[streamId]);
|
||||
if (inference.skipTransfers) {
|
||||
iteration->setInputData();
|
||||
}
|
||||
iStreams.emplace_back(iteration);
|
||||
}
|
||||
|
||||
for (auto& s : iStreams) {
|
||||
s->wait(sync.gpuStart);
|
||||
}
|
||||
|
||||
std::vector<InferenceTrace> localTrace;
|
||||
if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart,
|
||||
inference.iterations, durationMs, warmupMs, localTrace,
|
||||
inference.skipTransfers, inference.idle)) {
|
||||
iEnv.error = true;
|
||||
}
|
||||
|
||||
if (inference.skipTransfers) {
|
||||
for (auto& s : iStreams) {
|
||||
s->fetchOutputData();
|
||||
}
|
||||
}
|
||||
|
||||
sync.mutex.lock();
|
||||
trace.insert(trace.end(), localTrace.begin(), localTrace.end());
|
||||
sync.mutex.unlock();
|
||||
}
|
||||
|
||||
inline std::thread makeThread(const InferenceOptions& inference,
|
||||
InferenceEnvironment& iEnv, SyncStruct& sync,
|
||||
int32_t threadIdx, int32_t streamsPerThread,
|
||||
int32_t device,
|
||||
std::vector<InferenceTrace>& trace) {
|
||||
if (iEnv.safe) {
|
||||
ASSERT(sample::hasSafeRuntime());
|
||||
return std::thread(inferenceExecution<nvinfer1::safe::IExecutionContext>,
|
||||
std::cref(inference), std::ref(iEnv), std::ref(sync),
|
||||
threadIdx, streamsPerThread, device, std::ref(trace));
|
||||
}
|
||||
|
||||
return std::thread(inferenceExecution<nvinfer1::IExecutionContext>,
|
||||
std::cref(inference), std::ref(iEnv), std::ref(sync),
|
||||
threadIdx, streamsPerThread, device, std::ref(trace));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
|
||||
int32_t device, std::vector<InferenceTrace>& trace) {
|
||||
cudaCheck(cudaProfilerStart());
|
||||
|
||||
trace.resize(0);
|
||||
|
||||
SyncStruct sync;
|
||||
sync.sleep = inference.sleep;
|
||||
sync.mainStream.sleep(&sync.sleep);
|
||||
sync.cpuStart = getCurrentTime();
|
||||
sync.gpuStart.record(sync.mainStream);
|
||||
|
||||
// When multiple streams are used, trtexec can run inference in two modes:
|
||||
// (1) if inference.threads is true, then run each stream on each thread.
|
||||
// (2) if inference.threads is false, then run all streams on the same thread.
|
||||
const int32_t numThreads = inference.threads ? inference.streams : 1;
|
||||
const int32_t streamsPerThread = inference.threads ? 1 : inference.streams;
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) {
|
||||
threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx,
|
||||
streamsPerThread, device, trace));
|
||||
}
|
||||
for (auto& th : threads) {
|
||||
th.join();
|
||||
}
|
||||
|
||||
cudaCheck(cudaProfilerStop());
|
||||
|
||||
auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) {
|
||||
return a.h2dStart < b.h2dStart;
|
||||
};
|
||||
std::sort(trace.begin(), trace.end(), cmpTrace);
|
||||
|
||||
return !iEnv.error;
|
||||
}
|
||||
|
||||
namespace {
|
||||
size_t reportGpuMemory() {
|
||||
static size_t prevFree{0};
|
||||
size_t free{0};
|
||||
size_t total{0};
|
||||
size_t newlyAllocated{0};
|
||||
cudaCheck(cudaMemGetInfo(&free, &total));
|
||||
sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
|
||||
if (prevFree != 0) {
|
||||
newlyAllocated = (prevFree - free);
|
||||
sample::gLogInfo << ", newly allocated GPU memory = "
|
||||
<< newlyAllocated / 1024.0_MiB << " GiB";
|
||||
}
|
||||
sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB"
|
||||
<< std::endl;
|
||||
prevFree = free;
|
||||
return newlyAllocated;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
//! Returns true if deserialization is slower than expected or fails.
|
||||
bool timeDeserialize(InferenceEnvironment& iEnv) {
|
||||
constexpr int32_t kNB_ITERS{20};
|
||||
std::unique_ptr<IRuntime> rt{
|
||||
createInferRuntime(sample::gLogger.getTRTLogger())};
|
||||
std::unique_ptr<ICudaEngine> engine;
|
||||
|
||||
std::unique_ptr<safe::IRuntime> safeRT{
|
||||
sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())};
|
||||
std::unique_ptr<safe::ICudaEngine> safeEngine;
|
||||
|
||||
if (iEnv.safe) {
|
||||
ASSERT(sample::hasSafeRuntime() && safeRT != nullptr);
|
||||
safeRT->setErrorRecorder(&gRecorder);
|
||||
}
|
||||
|
||||
auto timeDeserializeFn = [&]() -> float {
|
||||
bool deserializeOK{false};
|
||||
engine.reset(nullptr);
|
||||
safeEngine.reset(nullptr);
|
||||
auto startClock = std::chrono::high_resolution_clock::now();
|
||||
if (iEnv.safe) {
|
||||
safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(),
|
||||
iEnv.engineBlob.size()));
|
||||
deserializeOK = (safeEngine != nullptr);
|
||||
} else {
|
||||
engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(),
|
||||
iEnv.engineBlob.size(), nullptr));
|
||||
deserializeOK = (engine != nullptr);
|
||||
}
|
||||
auto endClock = std::chrono::high_resolution_clock::now();
|
||||
// return NAN if deserialization failed.
|
||||
return deserializeOK
|
||||
? std::chrono::duration<float, std::milli>(endClock - startClock)
|
||||
.count()
|
||||
: NAN;
|
||||
};
|
||||
|
||||
// Warmup the caches to make sure that cache thrashing isn't throwing off the
|
||||
// results
|
||||
{
|
||||
sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
|
||||
for (int32_t i = 0, e = 2; i < e; ++i) {
|
||||
timeDeserializeFn();
|
||||
}
|
||||
}
|
||||
sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
|
||||
float const first = timeDeserializeFn();
|
||||
|
||||
// Check if first deserialization suceeded.
|
||||
if (std::isnan(first)) {
|
||||
sample::gLogError << "Engine deserialization failed." << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
sample::gLogInfo << "First deserialization time = " << first
|
||||
<< " milliseconds" << std::endl;
|
||||
|
||||
// Record initial gpu memory state.
|
||||
reportGpuMemory();
|
||||
|
||||
float totalTime{0.F};
|
||||
for (int32_t i = 0; i < kNB_ITERS; ++i) {
|
||||
totalTime += timeDeserializeFn();
|
||||
}
|
||||
const auto averageTime = totalTime / kNB_ITERS;
|
||||
// reportGpuMemory sometimes reports zero after a single deserialization of a
|
||||
// small engine,
|
||||
// so use the size of memory for all the iterations.
|
||||
const auto totalEngineSizeGpu = reportGpuMemory();
|
||||
sample::gLogInfo << "Total deserialization time = " << totalTime
|
||||
<< " milliseconds in " << kNB_ITERS
|
||||
<< " iterations, average time = " << averageTime
|
||||
<< " milliseconds, first time = " << first
|
||||
<< " milliseconds." << std::endl;
|
||||
sample::gLogInfo << "Deserialization Bandwidth = "
|
||||
<< 1E-6 * totalEngineSizeGpu / totalTime << " GB/s"
|
||||
<< std::endl;
|
||||
|
||||
// If the first deserialization is more than tolerance slower than
|
||||
// the average deserialization, return true, which means an error occurred.
|
||||
// The tolerance is set to 2x since the deserialization time is quick and
|
||||
// susceptible
|
||||
// to caching issues causing problems in the first timing.
|
||||
const auto tolerance = 2.0F;
|
||||
const bool isSlowerThanExpected = first > averageTime * tolerance;
|
||||
if (isSlowerThanExpected) {
|
||||
sample::gLogInfo << "First deserialization time divided by average time is "
|
||||
<< (first / averageTime) << ". Exceeds tolerance of "
|
||||
<< tolerance << "x." << std::endl;
|
||||
}
|
||||
return isSlowerThanExpected;
|
||||
}
|
||||
|
||||
std::string getLayerInformation(const InferenceEnvironment& iEnv,
|
||||
nvinfer1::LayerInformationFormat format) {
|
||||
auto runtime = std::unique_ptr<IRuntime>(
|
||||
createInferRuntime(sample::gLogger.getTRTLogger()));
|
||||
auto inspector =
|
||||
std::unique_ptr<IEngineInspector>(iEnv.engine->createEngineInspector());
|
||||
if (!iEnv.context.empty()) {
|
||||
inspector->setExecutionContext(iEnv.context.front().get());
|
||||
}
|
||||
std::string result = inspector->getEngineInformation(format);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
88
fastdeploy/backends/tensorrt/common/sampleInference.h
Normal file
88
fastdeploy/backends/tensorrt/common/sampleInference.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_INFERENCE_H
|
||||
#define TRT_SAMPLE_INFERENCE_H
|
||||
|
||||
#include "sampleReporting.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvInferSafeRuntime.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
struct InferenceEnvironment {
|
||||
TrtUniquePtr<nvinfer1::ICudaEngine> engine;
|
||||
std::unique_ptr<Profiler> profiler;
|
||||
std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> context;
|
||||
std::vector<std::unique_ptr<Bindings>> bindings;
|
||||
bool error{false};
|
||||
|
||||
std::vector<uint8_t> engineBlob;
|
||||
|
||||
bool safe{false};
|
||||
std::unique_ptr<nvinfer1::safe::ICudaEngine> safeEngine;
|
||||
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContext;
|
||||
|
||||
template <class ContextType>
|
||||
inline ContextType* getContext(int32_t streamIdx);
|
||||
};
|
||||
|
||||
template <>
|
||||
inline nvinfer1::IExecutionContext*
|
||||
InferenceEnvironment::getContext(int32_t streamIdx) {
|
||||
return context[streamIdx].get();
|
||||
}
|
||||
|
||||
template <>
|
||||
inline nvinfer1::safe::IExecutionContext*
|
||||
InferenceEnvironment::getContext(int32_t streamIdx) {
|
||||
return safeContext[streamIdx].get();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Set up contexts and bindings for inference
|
||||
//!
|
||||
bool setUpInference(InferenceEnvironment& iEnv,
|
||||
const InferenceOptions& inference);
|
||||
|
||||
//!
|
||||
//! \brief Deserialize the engine and time how long it takes.
|
||||
//!
|
||||
bool timeDeserialize(InferenceEnvironment& iEnv);
|
||||
|
||||
//!
|
||||
//! \brief Run inference and collect timing, return false if any error hit
|
||||
//! during inference
|
||||
//!
|
||||
bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv,
|
||||
int32_t device, std::vector<InferenceTrace>& trace);
|
||||
|
||||
//!
|
||||
//! \brief Get layer information of the engine.
|
||||
//!
|
||||
std::string getLayerInformation(const InferenceEnvironment& iEnv,
|
||||
nvinfer1::LayerInformationFormat format);
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_INFERENCE_H
|
||||
1634
fastdeploy/backends/tensorrt/common/sampleOptions.cpp
Normal file
1634
fastdeploy/backends/tensorrt/common/sampleOptions.cpp
Normal file
File diff suppressed because it is too large
Load Diff
311
fastdeploy/backends/tensorrt/common/sampleOptions.h
Normal file
311
fastdeploy/backends/tensorrt/common/sampleOptions.h
Normal file
@@ -0,0 +1,311 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_OPTIONS_H
|
||||
#define TRT_SAMPLE_OPTIONS_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
// Build default params
|
||||
constexpr int32_t maxBatchNotProvided{0};
|
||||
constexpr int32_t defaultMinTiming{1};
|
||||
constexpr int32_t defaultAvgTiming{8};
|
||||
|
||||
// System default params
|
||||
constexpr int32_t defaultDevice{0};
|
||||
|
||||
// Inference default params
|
||||
constexpr int32_t defaultBatch{1};
|
||||
constexpr int32_t batchNotProvided{0};
|
||||
constexpr int32_t defaultStreams{1};
|
||||
constexpr int32_t defaultIterations{10};
|
||||
constexpr float defaultWarmUp{200.F};
|
||||
constexpr float defaultDuration{3.F};
|
||||
constexpr float defaultSleep{};
|
||||
constexpr float defaultIdle{};
|
||||
|
||||
// Reporting default params
|
||||
constexpr int32_t defaultAvgRuns{10};
|
||||
constexpr float defaultPercentile{99};
|
||||
|
||||
enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
|
||||
|
||||
enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF };
|
||||
|
||||
enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
|
||||
|
||||
enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL };
|
||||
|
||||
using Arguments = std::unordered_multimap<std::string, std::string>;
|
||||
|
||||
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
|
||||
|
||||
using ShapeRange =
|
||||
std::array<std::vector<int32_t>,
|
||||
nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
|
||||
|
||||
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
|
||||
using LayerOutputTypes =
|
||||
std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
|
||||
|
||||
struct Options {
|
||||
virtual void parse(Arguments& arguments) = 0;
|
||||
};
|
||||
|
||||
struct BaseModelOptions : public Options {
|
||||
ModelFormat format{ModelFormat::kANY};
|
||||
std::string model;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct UffInput : public Options {
|
||||
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
|
||||
bool NHWC{false};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct ModelOptions : public Options {
|
||||
BaseModelOptions baseModel;
|
||||
std::string prototxt;
|
||||
std::vector<std::string> outputs;
|
||||
UffInput uffInputs;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct BuildOptions : public Options {
|
||||
int32_t maxBatch{maxBatchNotProvided};
|
||||
double workspace{-1.0};
|
||||
double dlaSRAM{-1.0};
|
||||
double dlaLocalDRAM{-1.0};
|
||||
double dlaGlobalDRAM{-1.0};
|
||||
int32_t minTiming{defaultMinTiming};
|
||||
int32_t avgTiming{defaultAvgTiming};
|
||||
bool tf32{true};
|
||||
bool fp16{false};
|
||||
bool int8{false};
|
||||
bool directIO{false};
|
||||
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
|
||||
LayerPrecisions layerPrecisions;
|
||||
LayerOutputTypes layerOutputTypes;
|
||||
bool safe{false};
|
||||
bool consistency{false};
|
||||
bool restricted{false};
|
||||
bool save{false};
|
||||
bool load{false};
|
||||
bool refittable{false};
|
||||
SparsityFlag sparsity{SparsityFlag::kDISABLE};
|
||||
nvinfer1::ProfilingVerbosity profilingVerbosity{
|
||||
nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
|
||||
std::string engine;
|
||||
std::string calibration;
|
||||
std::unordered_map<std::string, ShapeRange> shapes;
|
||||
std::unordered_map<std::string, ShapeRange> shapesCalib;
|
||||
std::vector<IOFormat> inputFormats;
|
||||
std::vector<IOFormat> outputFormats;
|
||||
nvinfer1::TacticSources enabledTactics{0};
|
||||
nvinfer1::TacticSources disabledTactics{0};
|
||||
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
|
||||
std::string timingCacheFile{};
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct SystemOptions : public Options {
|
||||
int32_t device{defaultDevice};
|
||||
int32_t DLACore{-1};
|
||||
bool fallback{false};
|
||||
std::vector<std::string> plugins;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct InferenceOptions : public Options {
|
||||
int32_t batch{batchNotProvided};
|
||||
int32_t iterations{defaultIterations};
|
||||
int32_t streams{defaultStreams};
|
||||
float warmup{defaultWarmUp};
|
||||
float duration{defaultDuration};
|
||||
float sleep{defaultSleep};
|
||||
float idle{defaultIdle};
|
||||
bool overlap{true};
|
||||
bool skipTransfers{false};
|
||||
bool useManaged{false};
|
||||
bool spin{false};
|
||||
bool threads{false};
|
||||
bool graph{false};
|
||||
bool skip{false};
|
||||
bool rerun{false};
|
||||
bool timeDeserialize{false};
|
||||
bool timeRefit{false};
|
||||
std::unordered_map<std::string, std::string> inputs;
|
||||
std::unordered_map<std::string, std::vector<int32_t>> shapes;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct ReportingOptions : public Options {
|
||||
bool verbose{false};
|
||||
int32_t avgs{defaultAvgRuns};
|
||||
float percentile{defaultPercentile};
|
||||
bool refit{false};
|
||||
bool output{false};
|
||||
bool profile{false};
|
||||
bool layerInfo{false};
|
||||
std::string exportTimes;
|
||||
std::string exportOutput;
|
||||
std::string exportProfile;
|
||||
std::string exportLayerInfo;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
struct SafeBuilderOptions : public Options {
|
||||
std::string serialized{};
|
||||
std::string onnxModelFile{};
|
||||
bool help{false};
|
||||
bool verbose{false};
|
||||
std::vector<IOFormat> inputFormats;
|
||||
std::vector<IOFormat> outputFormats;
|
||||
bool int8{false};
|
||||
std::string calibFile{};
|
||||
std::vector<std::string> plugins;
|
||||
bool consistency{false};
|
||||
bool standard{false};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void printHelp(std::ostream& out);
|
||||
};
|
||||
|
||||
struct AllOptions : public Options {
|
||||
ModelOptions model;
|
||||
BuildOptions build;
|
||||
SystemOptions system;
|
||||
InferenceOptions inference;
|
||||
ReportingOptions reporting;
|
||||
bool helps{false};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
|
||||
|
||||
bool parseHelp(Arguments& arguments);
|
||||
|
||||
void helpHelp(std::ostream& out);
|
||||
|
||||
// Functions to print options
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const UffInput& input);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) {
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i) {
|
||||
os << (i ? "x" : "") << dims.d[i];
|
||||
}
|
||||
return os;
|
||||
}
|
||||
inline std::ostream& operator<<(std::ostream& os,
|
||||
const nvinfer1::WeightsRole role) {
|
||||
switch (role) {
|
||||
case nvinfer1::WeightsRole::kKERNEL: {
|
||||
os << "Kernel";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kBIAS: {
|
||||
os << "Bias";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kSHIFT: {
|
||||
os << "Shift";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kSCALE: {
|
||||
os << "Scale";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kCONSTANT: {
|
||||
os << "Constant";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kANY: {
|
||||
os << "Any";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os,
|
||||
const std::vector<int32_t>& vec) {
|
||||
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i) {
|
||||
os << (i ? "x" : "") << vec[i];
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLES_OPTIONS_H
|
||||
480
fastdeploy/backends/tensorrt/common/sampleReporting.cpp
Normal file
480
fastdeploy/backends/tensorrt/common/sampleReporting.cpp
Normal file
@@ -0,0 +1,480 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
#include "sampleInference.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleReporting.h"
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
namespace sample {
|
||||
|
||||
namespace {
|
||||
|
||||
//!
|
||||
//! \brief Find percentile in an ascending sequence of timings
|
||||
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
|
||||
//!
|
||||
template <typename T>
|
||||
float findPercentile(float percentile,
|
||||
std::vector<InferenceTime> const& timings,
|
||||
T const& toFloat) {
|
||||
int32_t const all = static_cast<int32_t>(timings.size());
|
||||
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
|
||||
if (timings.empty()) {
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
if (percentile < 0.0f || percentile > 100.0f) {
|
||||
throw std::runtime_error("percentile is not in [0, 100]!");
|
||||
}
|
||||
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Find median in a sorted sequence of timings
|
||||
//!
|
||||
template <typename T>
|
||||
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat) {
|
||||
if (timings.empty()) {
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
int32_t const m = timings.size() / 2;
|
||||
if (timings.size() % 2) {
|
||||
return toFloat(timings[m]);
|
||||
}
|
||||
|
||||
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Find coefficient of variance (which is std / mean) in a sorted
|
||||
//! sequence of timings given the mean
|
||||
//!
|
||||
template <typename T>
|
||||
float findCoeffOfVariance(std::vector<InferenceTime> const& timings,
|
||||
T const& toFloat, float mean) {
|
||||
if (timings.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mean == 0.F) {
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
auto const metricAccumulator = [toFloat, mean](float acc,
|
||||
InferenceTime const& a) {
|
||||
float const diff = toFloat(a) - mean;
|
||||
return acc + diff * diff;
|
||||
};
|
||||
float const variance =
|
||||
std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) /
|
||||
timings.size();
|
||||
|
||||
return std::sqrt(variance) / mean * 100.F;
|
||||
}
|
||||
|
||||
inline InferenceTime traceToTiming(const InferenceTrace& a) {
|
||||
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart),
|
||||
(a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart),
|
||||
(a.d2hEnd - a.h2dStart));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
|
||||
float benchTimeMs, std::ostream& os) {
|
||||
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms"
|
||||
<< std::endl;
|
||||
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000
|
||||
<< " s" << std::endl;
|
||||
}
|
||||
|
||||
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
|
||||
std::ostream& os) {
|
||||
int32_t count = 0;
|
||||
InferenceTime sum;
|
||||
|
||||
os << std::endl;
|
||||
os << "=== Trace details ===" << std::endl;
|
||||
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
|
||||
for (auto const& t : timings) {
|
||||
sum += t;
|
||||
|
||||
if (++count == runsPerAvg) {
|
||||
// clang-format off
|
||||
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
|
||||
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg
|
||||
<< " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl;
|
||||
// clang-format on
|
||||
count = 0;
|
||||
sum.enq = 0;
|
||||
sum.h2d = 0;
|
||||
sum.compute = 0;
|
||||
sum.d2h = 0;
|
||||
sum.e2e = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void printMetricExplanations(std::ostream& os) {
|
||||
os << std::endl;
|
||||
os << "=== Explanations of the performance metrics ===" << std::endl;
|
||||
os << "Total Host Walltime: the host walltime from when the first query "
|
||||
"(after warmups) is enqueued to when the "
|
||||
"last query is completed."
|
||||
<< std::endl;
|
||||
os << "GPU Compute Time: the GPU latency to execute the kernels for a query."
|
||||
<< std::endl;
|
||||
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all "
|
||||
"the queries. If this is significantly "
|
||||
"shorter than Total Host Walltime, the GPU may be under-utilized "
|
||||
"because of host-side overheads or data "
|
||||
"transfers."
|
||||
<< std::endl;
|
||||
os << "Throughput: the observed throughput computed by dividing the number "
|
||||
"of queries by the Total Host Walltime. "
|
||||
"If this is significantly lower than the reciprocal of GPU Compute "
|
||||
"Time, the GPU may be under-utilized "
|
||||
"because of host-side overheads or data transfers."
|
||||
<< std::endl;
|
||||
os << "Enqueue Time: the host latency to enqueue a query. If this is longer "
|
||||
"than GPU Compute Time, the GPU may be "
|
||||
"under-utilized."
|
||||
<< std::endl;
|
||||
os << "H2D Latency: the latency for host-to-device data transfers for input "
|
||||
"tensors of a single query."
|
||||
<< std::endl;
|
||||
os << "D2H Latency: the latency for device-to-host data transfers for output "
|
||||
"tensors of a single query."
|
||||
<< std::endl;
|
||||
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H "
|
||||
"Latency. This is the latency to infer a "
|
||||
"single query."
|
||||
<< std::endl;
|
||||
os << "End-to-End Host Latency: the duration from when the H2D of a query is "
|
||||
"called to when the D2H of the same "
|
||||
"query is completed, which includes the latency to wait for the "
|
||||
"completion of the previous query. This is "
|
||||
"the latency of a query if multiple queries are enqueued consecutively."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
PerformanceResult
|
||||
getPerformanceResult(std::vector<InferenceTime> const& timings,
|
||||
std::function<float(InferenceTime const&)> metricGetter,
|
||||
float percentile) {
|
||||
auto const metricComparator = [metricGetter](InferenceTime const& a,
|
||||
InferenceTime const& b) {
|
||||
return metricGetter(a) < metricGetter(b);
|
||||
};
|
||||
auto const metricAccumulator = [metricGetter](float acc,
|
||||
InferenceTime const& a) {
|
||||
return acc + metricGetter(a);
|
||||
};
|
||||
std::vector<InferenceTime> newTimings = timings;
|
||||
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
|
||||
PerformanceResult result;
|
||||
result.min = metricGetter(newTimings.front());
|
||||
result.max = metricGetter(newTimings.back());
|
||||
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f,
|
||||
metricAccumulator) /
|
||||
newTimings.size();
|
||||
result.median = findMedian(newTimings, metricGetter);
|
||||
result.percentile = findPercentile(percentile, newTimings, metricGetter);
|
||||
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
|
||||
return result;
|
||||
}
|
||||
|
||||
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs,
|
||||
float percentile, int32_t batchSize, std::ostream& osInfo,
|
||||
std::ostream& osWarning, std::ostream& osVerbose) {
|
||||
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
|
||||
|
||||
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
|
||||
auto const latencyResult =
|
||||
getPerformanceResult(timings, getLatency, percentile);
|
||||
|
||||
auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; };
|
||||
auto const e2eLatencyResult =
|
||||
getPerformanceResult(timings, getEndToEnd, percentile);
|
||||
|
||||
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
|
||||
auto const enqueueResult =
|
||||
getPerformanceResult(timings, getEnqueue, percentile);
|
||||
|
||||
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
|
||||
auto const h2dResult = getPerformanceResult(timings, getH2d, percentile);
|
||||
|
||||
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
|
||||
auto const gpuComputeResult =
|
||||
getPerformanceResult(timings, getCompute, percentile);
|
||||
|
||||
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
|
||||
auto const d2hResult = getPerformanceResult(timings, getD2h, percentile);
|
||||
|
||||
auto const toPerfString = [percentile](const PerformanceResult& r) {
|
||||
std::stringstream s;
|
||||
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean
|
||||
<< " ms, "
|
||||
<< "median = " << r.median << " ms, percentile(" << percentile
|
||||
<< "%) = " << r.percentile << " ms";
|
||||
return s.str();
|
||||
};
|
||||
|
||||
osInfo << std::endl;
|
||||
osInfo << "=== Performance summary ===" << std::endl;
|
||||
osInfo << "Throughput: " << throughput << " qps" << std::endl;
|
||||
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
|
||||
osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult)
|
||||
<< std::endl;
|
||||
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
|
||||
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
|
||||
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
|
||||
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
|
||||
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
|
||||
osInfo << "Total GPU Compute Time: "
|
||||
<< gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
|
||||
|
||||
// Report warnings if the throughput is bound by other factors than GPU
|
||||
// Compute Time.
|
||||
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
|
||||
if (enqueueResult.median >
|
||||
kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) {
|
||||
osWarning << "* Throughput may be bound by Enqueue Time rather than GPU "
|
||||
"Compute and the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs "
|
||||
"where possible) may increase the "
|
||||
"throughput."
|
||||
<< std::endl;
|
||||
}
|
||||
if (h2dResult.median >= gpuComputeResult.median) {
|
||||
osWarning << "* Throughput may be bound by host-to-device transfers for "
|
||||
"the inputs rather than GPU Compute and "
|
||||
"the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " Add --noDataTransfers flag to disable data transfers."
|
||||
<< std::endl;
|
||||
}
|
||||
if (d2hResult.median >= gpuComputeResult.median) {
|
||||
osWarning << "* Throughput may be bound by device-to-host transfers for "
|
||||
"the outputs rather than GPU Compute "
|
||||
"and the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " Add --noDataTransfers flag to disable data transfers."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// Report warnings if the GPU Compute Time is unstable.
|
||||
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
|
||||
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) {
|
||||
osWarning
|
||||
<< "* GPU compute time is unstable, with coefficient of variance = "
|
||||
<< gpuComputeResult.coeffVar << "%." << std::endl;
|
||||
osWarning << " If not already in use, locking GPU clock frequency or "
|
||||
"adding --useSpinWait may improve the "
|
||||
<< "stability." << std::endl;
|
||||
}
|
||||
|
||||
// Explain what the metrics mean.
|
||||
osInfo << "Explanations of the performance metrics are printed in the "
|
||||
"verbose logs."
|
||||
<< std::endl;
|
||||
printMetricExplanations(osVerbose);
|
||||
|
||||
osInfo << std::endl;
|
||||
}
|
||||
|
||||
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
|
||||
const ReportingOptions& reporting, float warmupMs,
|
||||
int32_t batchSize, std::ostream& osInfo,
|
||||
std::ostream& osWarning, std::ostream& osVerbose) {
|
||||
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) {
|
||||
return a.computeStart >= warmupMs;
|
||||
};
|
||||
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
|
||||
int32_t const warmups = noWarmup - trace.begin();
|
||||
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
|
||||
// when implicit batch used, batchSize = options.inference.batch, which is
|
||||
// parsed through --batch
|
||||
// when explicit batch used, batchSize = options.inference.batch = 0
|
||||
// treat inference with explicit batch as a single query and report the
|
||||
// throughput
|
||||
batchSize = batchSize ? batchSize : 1;
|
||||
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize,
|
||||
warmupMs, benchTime, osInfo);
|
||||
|
||||
std::vector<InferenceTime> timings(trace.size() - warmups);
|
||||
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
|
||||
printTiming(timings, reporting.avgs, osInfo);
|
||||
printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo,
|
||||
osWarning, osVerbose);
|
||||
|
||||
if (!reporting.exportTimes.empty()) {
|
||||
exportJSONTrace(trace, reporting.exportTimes);
|
||||
}
|
||||
}
|
||||
|
||||
//! Printed format:
|
||||
//! [ value, ...]
|
||||
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end
|
||||
//! h2d" : time, "start compute" : time,
|
||||
//! "end compute" : time, "start d2h" : time, "end d2h" : time,
|
||||
//! "h2d" : time, "compute" : time,
|
||||
//! "d2h" : time, "latency" : time, "end to end" : time }
|
||||
//!
|
||||
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
|
||||
std::string const& fileName) {
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
os << "[" << std::endl;
|
||||
char const* sep = " ";
|
||||
for (auto const& t : trace) {
|
||||
InferenceTime const it(traceToTiming(t));
|
||||
os << sep << "{ ";
|
||||
sep = ", ";
|
||||
// clang-format off
|
||||
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
|
||||
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
|
||||
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
|
||||
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
|
||||
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
|
||||
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep
|
||||
<< "\"endToEndMs\" : " << it.e2e << " }" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept {
|
||||
if (mIterator == mLayers.end()) {
|
||||
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
|
||||
mUpdatesCount += mLayers.empty() || first;
|
||||
if (first) {
|
||||
mIterator = mLayers.begin();
|
||||
} else {
|
||||
mLayers.emplace_back();
|
||||
mLayers.back().name = layerName;
|
||||
mIterator = mLayers.end() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
mIterator->timeMs += timeMs;
|
||||
++mIterator;
|
||||
}
|
||||
|
||||
void Profiler::print(std::ostream& os) const noexcept {
|
||||
std::string const nameHdr("Layer");
|
||||
std::string const timeHdr(" Time (ms)");
|
||||
std::string const avgHdr(" Avg. Time (ms)");
|
||||
std::string const percentageHdr(" Time %");
|
||||
|
||||
float const totalTimeMs = getTotalTime();
|
||||
|
||||
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) {
|
||||
return a.name.size() < b.name.size();
|
||||
};
|
||||
auto const longestName =
|
||||
std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
|
||||
auto const nameLength =
|
||||
std::max(longestName->name.size() + 1, nameHdr.size());
|
||||
auto const timeLength = timeHdr.size();
|
||||
auto const avgLength = avgHdr.size();
|
||||
auto const percentageLength = percentageHdr.size();
|
||||
|
||||
os << std::endl
|
||||
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
|
||||
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr
|
||||
<< std::endl;
|
||||
|
||||
for (auto const& p : mLayers) {
|
||||
// clang-format off
|
||||
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs
|
||||
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount
|
||||
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100
|
||||
<< std::endl;
|
||||
}
|
||||
{
|
||||
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
|
||||
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
|
||||
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept {
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
os << "[" << std::endl
|
||||
<< " { \"count\" : " << mUpdatesCount << " }" << std::endl;
|
||||
|
||||
auto const totalTimeMs = getTotalTime();
|
||||
|
||||
for (auto const& l : mLayers) {
|
||||
// clang-format off
|
||||
os << ", {" << " \"name\" : \"" << l.name << "\""
|
||||
", \"timeMs\" : " << l.timeMs
|
||||
<< ", \"averageMs\" : " << l.timeMs / mUpdatesCount
|
||||
<< ", \"percentage\" : " << l.timeMs / totalTimeMs * 100
|
||||
<< " }" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
void dumpInputs(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::ostream& os) {
|
||||
os << "Input Tensors:" << std::endl;
|
||||
bindings.dumpInputs(context, os);
|
||||
}
|
||||
|
||||
void dumpOutputs(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::ostream& os) {
|
||||
os << "Output Tensors:" << std::endl;
|
||||
bindings.dumpOutputs(context, os);
|
||||
}
|
||||
|
||||
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::string const& fileName,
|
||||
int32_t batch) {
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
std::string sep = " ";
|
||||
auto const output = bindings.getOutputBindings();
|
||||
os << "[" << std::endl;
|
||||
for (auto const& binding : output) {
|
||||
// clang-format off
|
||||
os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl;
|
||||
sep = ", ";
|
||||
os << " " << sep << "\"dimensions\" : \"";
|
||||
bindings.dumpBindingDimensions(binding.second, context, os);
|
||||
os << "\"" << std::endl;
|
||||
os << " " << sep << "\"values\" : [ ";
|
||||
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
|
||||
os << " ]" << std::endl << " }" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
211
fastdeploy/backends/tensorrt/common/sampleReporting.h
Normal file
211
fastdeploy/backends/tensorrt/common/sampleReporting.h
Normal file
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_REPORTING_H
|
||||
#define TRT_SAMPLE_REPORTING_H
|
||||
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
//!
|
||||
//! \struct InferenceTime
|
||||
//! \brief Measurement times in milliseconds
|
||||
//!
|
||||
struct InferenceTime {
|
||||
InferenceTime(float q, float i, float c, float o, float e)
|
||||
: enq(q), h2d(i), compute(c), d2h(o), e2e(e) {}
|
||||
|
||||
InferenceTime() = default;
|
||||
InferenceTime(InferenceTime const&) = default;
|
||||
InferenceTime(InferenceTime&&) = default;
|
||||
InferenceTime& operator=(InferenceTime const&) = default;
|
||||
InferenceTime& operator=(InferenceTime&&) = default;
|
||||
~InferenceTime() = default;
|
||||
|
||||
float enq{0}; // Enqueue
|
||||
float h2d{0}; // Host to Device
|
||||
float compute{0}; // Compute
|
||||
float d2h{0}; // Device to Host
|
||||
float e2e{0}; // end to end
|
||||
|
||||
// ideal latency
|
||||
float latency() const { return h2d + compute + d2h; }
|
||||
};
|
||||
|
||||
//!
|
||||
//! \struct InferenceTrace
|
||||
//! \brief Measurement points in milliseconds
|
||||
//!
|
||||
struct InferenceTrace {
|
||||
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs,
|
||||
float ce, float os, float oe)
|
||||
: stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie),
|
||||
computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {}
|
||||
|
||||
InferenceTrace() = default;
|
||||
InferenceTrace(InferenceTrace const&) = default;
|
||||
InferenceTrace(InferenceTrace&&) = default;
|
||||
InferenceTrace& operator=(InferenceTrace const&) = default;
|
||||
InferenceTrace& operator=(InferenceTrace&&) = default;
|
||||
~InferenceTrace() = default;
|
||||
|
||||
int32_t stream{0};
|
||||
float enqStart{0};
|
||||
float enqEnd{0};
|
||||
float h2dStart{0};
|
||||
float h2dEnd{0};
|
||||
float computeStart{0};
|
||||
float computeEnd{0};
|
||||
float d2hStart{0};
|
||||
float d2hEnd{0};
|
||||
};
|
||||
|
||||
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) {
|
||||
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute,
|
||||
a.d2h + b.d2h, a.e2e + b.e2e);
|
||||
}
|
||||
|
||||
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) {
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \struct PerformanceResult
|
||||
//! \brief Performance result of a performance metric
|
||||
//!
|
||||
struct PerformanceResult {
|
||||
float min{0};
|
||||
float max{0};
|
||||
float mean{0};
|
||||
float median{0};
|
||||
float percentile{0};
|
||||
float coeffVar{0}; // coefficient of variation
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Print benchmarking time and number of traces collected
|
||||
//!
|
||||
void printProlog(int32_t warmups, int32_t timings, float warmupMs,
|
||||
float walltime, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print a timing trace
|
||||
//!
|
||||
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg,
|
||||
std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print the performance summary of a trace
|
||||
//!
|
||||
void printEpilog(std::vector<InferenceTime> const& timings, float percentile,
|
||||
int32_t batchSize, std::ostream& osInfo,
|
||||
std::ostream& osWarning, std::ostream& osVerbose);
|
||||
|
||||
//!
|
||||
//! \brief Get the result of a specific performance metric from a trace
|
||||
//!
|
||||
PerformanceResult
|
||||
getPerformanceResult(std::vector<InferenceTime> const& timings,
|
||||
std::function<float(InferenceTime const&)> metricGetter,
|
||||
float percentile);
|
||||
|
||||
//!
|
||||
//! \brief Print the explanations of the performance metrics printed in
|
||||
//! printEpilog() function.
|
||||
//!
|
||||
void printMetricExplanations(std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print and summarize a timing trace
|
||||
//!
|
||||
void printPerformanceReport(std::vector<InferenceTrace> const& trace,
|
||||
ReportingOptions const& reporting, float warmupMs,
|
||||
int32_t batchSize, std::ostream& osInfo,
|
||||
std::ostream& osWarning, std::ostream& osVerbose);
|
||||
|
||||
//!
|
||||
//! \brief Export a timing trace to JSON file
|
||||
//!
|
||||
void exportJSONTrace(std::vector<InferenceTrace> const& trace,
|
||||
std::string const& fileName);
|
||||
|
||||
//!
|
||||
//! \brief Print input tensors to stream
|
||||
//!
|
||||
void dumpInputs(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print output tensors to stream
|
||||
//!
|
||||
void dumpOutputs(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Export output tensors to JSON file
|
||||
//!
|
||||
void exportJSONOutput(nvinfer1::IExecutionContext const& context,
|
||||
Bindings const& bindings, std::string const& fileName,
|
||||
int32_t batch);
|
||||
|
||||
//!
|
||||
//! \struct LayerProfile
|
||||
//! \brief Layer profile information
|
||||
//!
|
||||
struct LayerProfile {
|
||||
std::string name;
|
||||
float timeMs{0};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class Profiler
|
||||
//! \brief Collect per-layer profile information, assuming times are reported in
|
||||
//! the same order
|
||||
//!
|
||||
class Profiler : public nvinfer1::IProfiler {
|
||||
public:
|
||||
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
|
||||
|
||||
void print(std::ostream& os) const noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Export a profile to JSON file
|
||||
//!
|
||||
void exportJSONProfile(std::string const& fileName) const noexcept;
|
||||
|
||||
private:
|
||||
float getTotalTime() const noexcept {
|
||||
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
|
||||
return accumulator + lp.timeMs;
|
||||
};
|
||||
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime);
|
||||
}
|
||||
|
||||
std::vector<LayerProfile> mLayers;
|
||||
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
|
||||
int32_t mUpdatesCount{0};
|
||||
};
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_REPORTING_H
|
||||
494
fastdeploy/backends/tensorrt/common/sampleUtils.h
Normal file
494
fastdeploy/backends/tensorrt/common/sampleUtils.h
Normal file
@@ -0,0 +1,494 @@
|
||||
/*
|
||||
* Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_UTILS_H
|
||||
#define TRT_SAMPLE_UTILS_H
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "logger.h"
|
||||
#include "sampleDevice.h"
|
||||
#include "sampleOptions.h"
|
||||
|
||||
namespace sample {
|
||||
|
||||
inline int dataTypeSize(nvinfer1::DataType dataType) {
|
||||
switch (dataType) {
|
||||
case nvinfer1::DataType::kINT32:
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
return 4;
|
||||
case nvinfer1::DataType::kHALF:
|
||||
return 2;
|
||||
case nvinfer1::DataType::kBOOL:
|
||||
case nvinfer1::DataType::kINT8:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T> inline T roundUp(T m, T n) {
|
||||
return ((m + n - 1) / n) * n;
|
||||
}
|
||||
|
||||
inline int volume(const nvinfer1::Dims& d) {
|
||||
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int>());
|
||||
}
|
||||
|
||||
//! comps is the number of components in a vector. Ignored if vecDim < 0.
|
||||
inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides,
|
||||
int vecDim, int comps, int batch) {
|
||||
int maxNbElems = 1;
|
||||
for (int i = 0; i < dims.nbDims; ++i) {
|
||||
// Get effective length of axis.
|
||||
int d = dims.d[i];
|
||||
// Any dimension is 0, it is an empty tensor.
|
||||
if (d == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (i == vecDim) {
|
||||
d = samplesCommon::divUp(d, comps);
|
||||
}
|
||||
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
|
||||
}
|
||||
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
|
||||
}
|
||||
|
||||
inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) {
|
||||
if (vecDim != -1) {
|
||||
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
|
||||
}
|
||||
return volume(dims) * std::max(batch, 1);
|
||||
}
|
||||
|
||||
inline nvinfer1::Dims toDims(const std::vector<int>& vec) {
|
||||
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
|
||||
if (static_cast<int>(vec.size()) > limit) {
|
||||
sample::gLogWarning
|
||||
<< "Vector too long, only first 8 elements are used in dimension."
|
||||
<< std::endl;
|
||||
}
|
||||
// Pick first nvinfer1::Dims::MAX_DIMS elements
|
||||
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
|
||||
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
|
||||
return dims;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void fillBuffer(void* buffer, int64_t volume, T min, T max) {
|
||||
T* typedBuffer = static_cast<T*>(buffer);
|
||||
std::default_random_engine engine;
|
||||
if (std::is_integral<T>::value) {
|
||||
std::uniform_int_distribution<int> distribution(min, max);
|
||||
auto generator = [&engine, &distribution]() {
|
||||
return static_cast<T>(distribution(engine));
|
||||
};
|
||||
std::generate(typedBuffer, typedBuffer + volume, generator);
|
||||
} else {
|
||||
std::uniform_real_distribution<float> distribution(min, max);
|
||||
auto generator = [&engine, &distribution]() {
|
||||
return static_cast<T>(distribution(engine));
|
||||
};
|
||||
std::generate(typedBuffer, typedBuffer + volume, generator);
|
||||
}
|
||||
}
|
||||
|
||||
// Specialization needed for custom type __half
|
||||
template <typename H>
|
||||
inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) {
|
||||
H* typedBuffer = static_cast<H*>(buffer);
|
||||
std::default_random_engine engine;
|
||||
std::uniform_real_distribution<float> distribution(min, max);
|
||||
auto generator = [&engine, &distribution]() {
|
||||
return static_cast<H>(distribution(engine));
|
||||
};
|
||||
std::generate(typedBuffer, typedBuffer + volume, generator);
|
||||
}
|
||||
template <>
|
||||
inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min,
|
||||
__half max) {
|
||||
fillBufferHalf(buffer, volume, min, max);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void dumpBuffer(const void* buffer, const std::string& separator,
|
||||
std::ostream& os, const Dims& dims, const Dims& strides,
|
||||
int32_t vectorDim, int32_t spv) {
|
||||
const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1,
|
||||
std::multiplies<int64_t>());
|
||||
const T* typedBuffer = static_cast<const T*>(buffer);
|
||||
std::string sep;
|
||||
for (int64_t v = 0; v < volume; ++v) {
|
||||
int64_t curV = v;
|
||||
int32_t dataOffset = 0;
|
||||
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) {
|
||||
int32_t dimVal = curV % dims.d[dimIndex];
|
||||
if (dimIndex == vectorDim) {
|
||||
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
|
||||
} else {
|
||||
dataOffset +=
|
||||
dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
|
||||
}
|
||||
curV /= dims.d[dimIndex];
|
||||
ASSERT(curV >= 0);
|
||||
}
|
||||
|
||||
os << sep << typedBuffer[dataOffset];
|
||||
sep = separator;
|
||||
}
|
||||
}
|
||||
|
||||
inline void loadFromFile(std::string const& fileName, char* dst, size_t size) {
|
||||
ASSERT(dst);
|
||||
|
||||
std::ifstream file(fileName, std::ios::in | std::ios::binary);
|
||||
if (file.is_open()) {
|
||||
file.read(dst, size);
|
||||
file.close();
|
||||
} else {
|
||||
std::stringstream msg;
|
||||
msg << "Cannot open file " << fileName << "!";
|
||||
throw std::invalid_argument(msg.str());
|
||||
}
|
||||
}
|
||||
|
||||
struct Binding {
|
||||
bool isInput{false};
|
||||
std::unique_ptr<IMirroredBuffer> buffer;
|
||||
int64_t volume{0};
|
||||
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
|
||||
|
||||
void fill(const std::string& fileName) {
|
||||
loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()),
|
||||
buffer->getSize());
|
||||
}
|
||||
|
||||
void fill() {
|
||||
switch (dataType) {
|
||||
case nvinfer1::DataType::kBOOL: {
|
||||
fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kINT32: {
|
||||
fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kINT8: {
|
||||
fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kFLOAT: {
|
||||
fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kHALF: {
|
||||
fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim,
|
||||
int32_t spv, const std::string separator = " ") const {
|
||||
switch (dataType) {
|
||||
case nvinfer1::DataType::kBOOL: {
|
||||
dumpBuffer<bool>(buffer->getHostBuffer(), separator, os, dims, strides,
|
||||
vectorDim, spv);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kINT32: {
|
||||
dumpBuffer<int32_t>(buffer->getHostBuffer(), separator, os, dims, strides,
|
||||
vectorDim, spv);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kINT8: {
|
||||
dumpBuffer<int8_t>(buffer->getHostBuffer(), separator, os, dims, strides,
|
||||
vectorDim, spv);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kFLOAT: {
|
||||
dumpBuffer<float>(buffer->getHostBuffer(), separator, os, dims, strides,
|
||||
vectorDim, spv);
|
||||
break;
|
||||
}
|
||||
case nvinfer1::DataType::kHALF: {
|
||||
dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides,
|
||||
vectorDim, spv);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Bindings {
|
||||
public:
|
||||
Bindings() = delete;
|
||||
explicit Bindings(bool useManaged) : mUseManaged(useManaged) {}
|
||||
|
||||
void addBinding(int b, const std::string& name, bool isInput, int64_t volume,
|
||||
nvinfer1::DataType dataType,
|
||||
const std::string& fileName = "") {
|
||||
while (mBindings.size() <= static_cast<size_t>(b)) {
|
||||
mBindings.emplace_back();
|
||||
mDevicePointers.emplace_back();
|
||||
}
|
||||
mNames[name] = b;
|
||||
if (mBindings[b].buffer == nullptr) {
|
||||
if (mUseManaged) {
|
||||
mBindings[b].buffer.reset(new UnifiedMirroredBuffer);
|
||||
} else {
|
||||
mBindings[b].buffer.reset(new DiscreteMirroredBuffer);
|
||||
}
|
||||
}
|
||||
mBindings[b].isInput = isInput;
|
||||
// Some memory allocators return nullptr when allocating zero bytes, but
|
||||
// TensorRT requires a non-null ptr
|
||||
// even for empty tensors, so allocate a dummy byte.
|
||||
if (volume == 0) {
|
||||
mBindings[b].buffer->allocate(1);
|
||||
} else {
|
||||
mBindings[b].buffer->allocate(
|
||||
static_cast<size_t>(volume) *
|
||||
static_cast<size_t>(dataTypeSize(dataType)));
|
||||
}
|
||||
mBindings[b].volume = volume;
|
||||
mBindings[b].dataType = dataType;
|
||||
mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
|
||||
if (isInput) {
|
||||
if (fileName.empty()) {
|
||||
fill(b);
|
||||
} else {
|
||||
fill(b, fileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void** getDeviceBuffers() { return mDevicePointers.data(); }
|
||||
|
||||
void transferInputToDevice(TrtCudaStream& stream) {
|
||||
for (auto& b : mNames) {
|
||||
if (mBindings[b.second].isInput) {
|
||||
mBindings[b.second].buffer->hostToDevice(stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void transferOutputToHost(TrtCudaStream& stream) {
|
||||
for (auto& b : mNames) {
|
||||
if (!mBindings[b.second].isInput) {
|
||||
mBindings[b.second].buffer->deviceToHost(stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fill(int binding, const std::string& fileName) {
|
||||
mBindings[binding].fill(fileName);
|
||||
}
|
||||
|
||||
void fill(int binding) { mBindings[binding].fill(); }
|
||||
|
||||
void dumpBindingDimensions(int binding,
|
||||
const nvinfer1::IExecutionContext& context,
|
||||
std::ostream& os) const {
|
||||
const auto dims = context.getBindingDimensions(binding);
|
||||
// Do not add a newline terminator, because the caller may be outputting a
|
||||
// JSON string.
|
||||
os << dims;
|
||||
}
|
||||
|
||||
void dumpBindingValues(const nvinfer1::IExecutionContext& context,
|
||||
int binding, std::ostream& os,
|
||||
const std::string& separator = " ",
|
||||
int32_t batch = 1) const {
|
||||
Dims dims = context.getBindingDimensions(binding);
|
||||
Dims strides = context.getStrides(binding);
|
||||
int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding);
|
||||
const int32_t spv =
|
||||
context.getEngine().getBindingComponentsPerElement(binding);
|
||||
|
||||
if (context.getEngine().hasImplicitBatchDimension()) {
|
||||
auto insertN = [](Dims& d, int32_t bs) {
|
||||
const int32_t nbDims = d.nbDims;
|
||||
ASSERT(nbDims < Dims::MAX_DIMS);
|
||||
std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]);
|
||||
d.d[0] = bs;
|
||||
d.nbDims = nbDims + 1;
|
||||
};
|
||||
int32_t batchStride = 0;
|
||||
for (int32_t i = 0; i < strides.nbDims; ++i) {
|
||||
if (strides.d[i] * dims.d[i] > batchStride) {
|
||||
batchStride = strides.d[i] * dims.d[i];
|
||||
}
|
||||
}
|
||||
insertN(dims, batch);
|
||||
insertN(strides, batchStride);
|
||||
vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1;
|
||||
}
|
||||
|
||||
mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
|
||||
}
|
||||
|
||||
void dumpInputs(const nvinfer1::IExecutionContext& context,
|
||||
std::ostream& os) const {
|
||||
auto isInput = [](const Binding& b) { return b.isInput; };
|
||||
dumpBindings(context, isInput, os);
|
||||
}
|
||||
|
||||
void dumpOutputs(const nvinfer1::IExecutionContext& context,
|
||||
std::ostream& os) const {
|
||||
auto isOutput = [](const Binding& b) { return !b.isInput; };
|
||||
dumpBindings(context, isOutput, os);
|
||||
}
|
||||
|
||||
void dumpBindings(const nvinfer1::IExecutionContext& context,
|
||||
std::ostream& os) const {
|
||||
auto all = [](const Binding& b) { return true; };
|
||||
dumpBindings(context, all, os);
|
||||
}
|
||||
|
||||
void dumpBindings(const nvinfer1::IExecutionContext& context,
|
||||
bool (*predicate)(const Binding& b),
|
||||
std::ostream& os) const {
|
||||
for (const auto& n : mNames) {
|
||||
const auto binding = n.second;
|
||||
if (predicate(mBindings[binding])) {
|
||||
os << n.first << ": (";
|
||||
dumpBindingDimensions(binding, context, os);
|
||||
os << ")" << std::endl;
|
||||
|
||||
dumpBindingValues(context, binding, os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getInputBindings() const {
|
||||
auto isInput = [](const Binding& b) { return b.isInput; };
|
||||
return getBindings(isInput);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getOutputBindings() const {
|
||||
auto isOutput = [](const Binding& b) { return !b.isInput; };
|
||||
return getBindings(isOutput);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getBindings() const {
|
||||
auto all = [](const Binding& b) { return true; };
|
||||
return getBindings(all);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int>
|
||||
getBindings(bool (*predicate)(const Binding& b)) const {
|
||||
std::unordered_map<std::string, int> bindings;
|
||||
for (const auto& n : mNames) {
|
||||
const auto binding = n.second;
|
||||
if (predicate(mBindings[binding])) {
|
||||
bindings.insert(n);
|
||||
}
|
||||
}
|
||||
return bindings;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, int32_t> mNames;
|
||||
std::vector<Binding> mBindings;
|
||||
std::vector<void*> mDevicePointers;
|
||||
bool mUseManaged{false};
|
||||
};
|
||||
|
||||
template <typename T> struct TrtDestroyer {
|
||||
void operator()(T* t) { t->destroy(); }
|
||||
};
|
||||
|
||||
template <typename T> using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
|
||||
|
||||
inline bool broadcastIOFormats(const std::vector<IOFormat>& formats,
|
||||
size_t nbBindings, bool isInput = true) {
|
||||
bool broadcast = formats.size() == 1;
|
||||
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
|
||||
if (!formats.empty() && !validFormatsCount) {
|
||||
if (isInput) {
|
||||
throw std::invalid_argument(
|
||||
"The number of inputIOFormats must match network's inputs or be one "
|
||||
"for broadcasting.");
|
||||
} else {
|
||||
throw std::invalid_argument(
|
||||
"The number of outputIOFormats must match network's outputs or be "
|
||||
"one for broadcasting.");
|
||||
}
|
||||
}
|
||||
return broadcast;
|
||||
}
|
||||
|
||||
inline std::vector<char> loadTimingCacheFile(const std::string inFileName) {
|
||||
std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
|
||||
if (!iFile) {
|
||||
sample::gLogWarning << "Could not read timing cache from: " << inFileName
|
||||
<< ". A new timing cache will be generated and written."
|
||||
<< std::endl;
|
||||
return std::vector<char>();
|
||||
}
|
||||
iFile.seekg(0, std::ifstream::end);
|
||||
size_t fsize = iFile.tellg();
|
||||
iFile.seekg(0, std::ifstream::beg);
|
||||
std::vector<char> content(fsize);
|
||||
iFile.read(content.data(), fsize);
|
||||
iFile.close();
|
||||
sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from "
|
||||
<< inFileName << std::endl;
|
||||
return content;
|
||||
}
|
||||
|
||||
inline void saveTimingCacheFile(const std::string outFileName,
|
||||
const IHostMemory* blob) {
|
||||
std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
|
||||
if (!oFile) {
|
||||
sample::gLogWarning << "Could not write timing cache to: " << outFileName
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
oFile.write((char*)blob->data(), blob->size());
|
||||
oFile.close();
|
||||
sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to "
|
||||
<< outFileName << std::endl;
|
||||
}
|
||||
|
||||
inline int32_t getCudaDriverVersion() {
|
||||
int32_t version{-1};
|
||||
cudaCheck(cudaDriverGetVersion(&version));
|
||||
return version;
|
||||
}
|
||||
|
||||
inline int32_t getCudaRuntimeVersion() {
|
||||
int32_t version{-1};
|
||||
cudaCheck(cudaRuntimeGetVersion(&version));
|
||||
return version;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_UTILS_H
|
||||
453
fastdeploy/backends/tensorrt/trt_backend.cc
Normal file
453
fastdeploy/backends/tensorrt/trt_backend.cc
Normal file
@@ -0,0 +1,453 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#ifdef ENABLE_PADDLE_FRONTEND
|
||||
#include "paddle2onnx/converter.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
|
||||
if (dtype == nvinfer1::DataType::kFLOAT) {
|
||||
return sizeof(float);
|
||||
} else if (dtype == nvinfer1::DataType::kHALF) {
|
||||
return sizeof(float) / 2;
|
||||
} else if (dtype == nvinfer1::DataType::kINT8) {
|
||||
return sizeof(int8_t);
|
||||
} else if (dtype == nvinfer1::DataType::kINT32) {
|
||||
return sizeof(int32_t);
|
||||
}
|
||||
// kBOOL
|
||||
return sizeof(bool);
|
||||
}
|
||||
|
||||
FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
|
||||
if (dtype == nvinfer1::DataType::kFLOAT) {
|
||||
return FDDataType::FP32;
|
||||
} else if (dtype == nvinfer1::DataType::kHALF) {
|
||||
return FDDataType::FP16;
|
||||
} else if (dtype == nvinfer1::DataType::kINT8) {
|
||||
return FDDataType::INT8;
|
||||
} else if (dtype == nvinfer1::DataType::kINT32) {
|
||||
return FDDataType::INT32;
|
||||
}
|
||||
// kBOOL
|
||||
return FDDataType::BOOL;
|
||||
}
|
||||
|
||||
std::vector<int> toVec(const nvinfer1::Dims& dim) {
|
||||
std::vector<int> out(dim.d, dim.d + dim.nbDims);
|
||||
return out;
|
||||
}
|
||||
|
||||
bool TrtBackend::InitFromTrt(const std::string& trt_engine_file) {
|
||||
if (initialized_) {
|
||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
|
||||
if (!fin) {
|
||||
FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
fin.seekg(0, std::ios::end);
|
||||
std::string engine_buffer;
|
||||
engine_buffer.resize(fin.tellg());
|
||||
fin.seekg(0, std::ios::beg);
|
||||
fin.read(&(engine_buffer.at(0)), engine_buffer.size());
|
||||
fin.close();
|
||||
SampleUniquePtr<IRuntime> runtime{
|
||||
createInferRuntime(sample::gLogger.getTRTLogger())};
|
||||
if (!runtime) {
|
||||
FDERROR << "Failed to call createInferRuntime()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
|
||||
runtime->deserializeCudaEngine(engine_buffer.data(),
|
||||
engine_buffer.size()),
|
||||
samplesCommon::InferDeleter());
|
||||
if (!engine_) {
|
||||
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||
engine_->createExecutionContext());
|
||||
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
||||
GetInputOutputInfo();
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const TrtBackendOption& option, bool verbose) {
|
||||
if (initialized_) {
|
||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PADDLE_FRONTEND
|
||||
char* model_content_ptr;
|
||||
int model_content_size = 0;
|
||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||
&model_content_ptr, &model_content_size, 11, true,
|
||||
verbose, true, true, true)) {
|
||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
std::string onnx_model_proto(model_content_ptr,
|
||||
model_content_ptr + model_content_size);
|
||||
delete model_content_ptr;
|
||||
model_content_ptr = nullptr;
|
||||
return InitFromOnnx(onnx_model_proto, option, true);
|
||||
#else
|
||||
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
|
||||
"call `InitFromOnnx` instead."
|
||||
<< std::endl;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
const TrtBackendOption& option,
|
||||
bool from_memory_buffer) {
|
||||
if (initialized_) {
|
||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
cudaSetDevice(option.gpu_id);
|
||||
|
||||
if (option.serialize_file != "") {
|
||||
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
|
||||
if (fin) {
|
||||
FDLogger() << "Detect serialized TensorRT Engine file in "
|
||||
<< option.serialize_file << ", will load it directly."
|
||||
<< std::endl;
|
||||
fin.close();
|
||||
return InitFromTrt(option.serialize_file);
|
||||
}
|
||||
}
|
||||
|
||||
std::string onnx_content = "";
|
||||
if (!from_memory_buffer) {
|
||||
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
|
||||
if (!fin) {
|
||||
FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
fin.seekg(0, std::ios::end);
|
||||
onnx_content.resize(fin.tellg());
|
||||
fin.seekg(0, std::ios::beg);
|
||||
fin.read(&(onnx_content.at(0)), onnx_content.size());
|
||||
fin.close();
|
||||
} else {
|
||||
onnx_content = model_file;
|
||||
}
|
||||
|
||||
if (!CreateTrtEngine(onnx_content, option)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||
engine_->createExecutionContext());
|
||||
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
||||
GetInputOutputInfo();
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs) {
|
||||
AllocateBufferInDynamicShape(inputs, outputs);
|
||||
std::vector<void*> input_binds(inputs.size());
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
if (inputs[0].dtype == FDDataType::INT64) {
|
||||
int64_t* data = static_cast<int64_t*>(inputs[i].Data());
|
||||
std::vector<int32_t> casted_data(data, data + inputs[i].Numel());
|
||||
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
|
||||
static_cast<void*>(casted_data.data()),
|
||||
inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice,
|
||||
stream_) == 0,
|
||||
"[ERROR] Error occurs while copy memory from CPU to GPU.");
|
||||
} else {
|
||||
FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(),
|
||||
inputs[i].Data(), inputs[i].Nbytes(),
|
||||
cudaMemcpyHostToDevice, stream_) == 0,
|
||||
"[ERROR] Error occurs while copy memory from CPU to GPU.");
|
||||
}
|
||||
// FDASSERT(cudaMemcpy(inputs_buffer_[inputs[i].name].data(),
|
||||
// inputs[i].GetData(), inputs[i].Nbytes(),
|
||||
// cudaMemcpyHostToDevice) == 0,
|
||||
// "[ERROR] Error occurs while copy memory from CPU to GPU.");
|
||||
}
|
||||
if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
|
||||
FDERROR << "Failed to Infer with TensorRT." << std::endl;
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < outputs->size(); ++i) {
|
||||
FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(),
|
||||
outputs_buffer_[(*outputs)[i].name].data(),
|
||||
(*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost,
|
||||
stream_) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU.");
|
||||
// FDASSERT(cudaMemcpy((*outputs)[i].data.data(),
|
||||
// outputs_buffer_[(*outputs)[i].name].data(),
|
||||
// (*outputs)[i].Nbytes(),
|
||||
// cudaMemcpyDeviceToHost) == 0,
|
||||
// "[ERROR] Error occurs while copy memory from GPU to CPU.");
|
||||
}
|
||||
// FDASSERT(cudaStreamSynchronize(stream_) == 0,
|
||||
// "[ERROR] Error occurs while calling cudaStreamSynchronize().");
|
||||
return true;
|
||||
}
|
||||
|
||||
void TrtBackend::GetInputOutputInfo() {
|
||||
inputs_desc_.clear();
|
||||
outputs_desc_.clear();
|
||||
auto num_binds = engine_->getNbBindings();
|
||||
for (auto i = 0; i < num_binds; ++i) {
|
||||
std::string name = std::string(engine_->getBindingName(i));
|
||||
auto shape = toVec(engine_->getBindingDimensions(i));
|
||||
auto dtype = engine_->getBindingDataType(i);
|
||||
if (engine_->bindingIsInput(i)) {
|
||||
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
|
||||
inputs_buffer_[name] = DeviceBuffer(dtype);
|
||||
} else {
|
||||
outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
|
||||
outputs_buffer_[name] = DeviceBuffer(dtype);
|
||||
}
|
||||
}
|
||||
bindings_.resize(num_binds);
|
||||
}
|
||||
|
||||
void TrtBackend::AllocateBufferInDynamicShape(
|
||||
const std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs) {
|
||||
for (const auto& item : inputs) {
|
||||
auto idx = engine_->getBindingIndex(item.name.c_str());
|
||||
std::vector<int> shape(item.shape.begin(), item.shape.end());
|
||||
auto dims = sample::toDims(shape);
|
||||
context_->setBindingDimensions(idx, dims);
|
||||
if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) {
|
||||
inputs_buffer_[item.name].resize(dims);
|
||||
bindings_[idx] = inputs_buffer_[item.name].data();
|
||||
}
|
||||
}
|
||||
if (outputs->size() != outputs_desc_.size()) {
|
||||
outputs->resize(outputs_desc_.size());
|
||||
}
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||
auto output_dims = context_->getBindingDimensions(idx);
|
||||
(*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
||||
(*outputs)[i].shape.assign(output_dims.d,
|
||||
output_dims.d + output_dims.nbDims);
|
||||
(*outputs)[i].name = outputs_desc_[i].name;
|
||||
(*outputs)[i].data.resize(volume(output_dims) *
|
||||
TrtDataTypeSize(outputs_desc_[i].dtype));
|
||||
if ((*outputs)[i].Nbytes() >
|
||||
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
|
||||
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
|
||||
bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
|
||||
const TrtBackendOption& option) {
|
||||
const auto explicitBatch =
|
||||
1U << static_cast<uint32_t>(
|
||||
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
|
||||
|
||||
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(
|
||||
nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
|
||||
if (!builder) {
|
||||
FDERROR << "Failed to call createInferBuilder()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(
|
||||
builder->createNetworkV2(explicitBatch));
|
||||
if (!network) {
|
||||
FDERROR << "Failed to call createNetworkV2()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto config =
|
||||
SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
|
||||
if (!config) {
|
||||
FDERROR << "Failed to call createBuilderConfig()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (option.enable_fp16) {
|
||||
if (!builder->platformHasFastFp16()) {
|
||||
FDLogger() << "[WARN] Detected FP16 is not supported in the current GPU, "
|
||||
"will use FP32 instead."
|
||||
<< std::endl;
|
||||
} else {
|
||||
config->setFlag(nvinfer1::BuilderFlag::kFP16);
|
||||
}
|
||||
}
|
||||
|
||||
auto parser = SampleUniquePtr<nvonnxparser::IParser>(
|
||||
nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
|
||||
if (!parser) {
|
||||
FDERROR << "Failed to call createParser()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (!parser->parse(onnx_model.data(), onnx_model.size())) {
|
||||
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
FDLogger() << "Start to building TensorRT Engine..." << std::endl;
|
||||
bool fp16 = builder->platformHasFastFp16();
|
||||
builder->setMaxBatchSize(option.max_batch_size);
|
||||
|
||||
config->setMaxWorkspaceSize(option.max_workspace_size);
|
||||
|
||||
if (option.fixed_shape.size() > 0) {
|
||||
auto profile = builder->createOptimizationProfile();
|
||||
for (auto& item : option.fixed_shape) {
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kMIN,
|
||||
sample::toDims(item.second)),
|
||||
"[TrtBackend] Failed to set min_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kOPT,
|
||||
sample::toDims(item.second)),
|
||||
"[TrtBackend] Failed to set min_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kMAX,
|
||||
sample::toDims(item.second)),
|
||||
"[TrtBackend] Failed to set min_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
}
|
||||
config->addOptimizationProfile(profile);
|
||||
} else if (option.max_shape.size() > 0) {
|
||||
auto profile = builder->createOptimizationProfile();
|
||||
FDASSERT(option.max_shape.size() == option.min_shape.size() &&
|
||||
option.min_shape.size() == option.opt_shape.size(),
|
||||
"[TrtBackend] Size of max_shape/opt_shape/min_shape in "
|
||||
"TrtBackendOption should keep same.");
|
||||
for (const auto& item : option.min_shape) {
|
||||
// set min shape
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kMIN,
|
||||
sample::toDims(item.second)),
|
||||
"[TrtBackend] Failed to set min_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
|
||||
// set optimization shape
|
||||
auto iter = option.opt_shape.find(item.first);
|
||||
FDASSERT(iter != option.opt_shape.end(),
|
||||
"[TrtBackend] Cannot find input name: " + item.first +
|
||||
" in TrtBackendOption::opt_shape.");
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kOPT,
|
||||
sample::toDims(iter->second)),
|
||||
"[TrtBackend] Failed to set opt_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
// set max shape
|
||||
iter = option.max_shape.find(item.first);
|
||||
FDASSERT(iter != option.max_shape.end(),
|
||||
"[TrtBackend] Cannot find input name: " + item.first +
|
||||
" in TrtBackendOption::max_shape.");
|
||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
||||
nvinfer1::OptProfileSelector::kMAX,
|
||||
sample::toDims(iter->second)),
|
||||
"[TrtBackend] Failed to set max_shape for input: " + item.first +
|
||||
" in TrtBackend.");
|
||||
}
|
||||
config->addOptimizationProfile(profile);
|
||||
}
|
||||
|
||||
SampleUniquePtr<IHostMemory> plan{
|
||||
builder->buildSerializedNetwork(*network, *config)};
|
||||
if (!plan) {
|
||||
FDERROR << "Failed to call buildSerializedNetwork()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
SampleUniquePtr<IRuntime> runtime{
|
||||
createInferRuntime(sample::gLogger.getTRTLogger())};
|
||||
if (!runtime) {
|
||||
FDERROR << "Failed to call createInferRuntime()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
engine_ = std::shared_ptr<nvinfer1::ICudaEngine>(
|
||||
runtime->deserializeCudaEngine(plan->data(), plan->size()),
|
||||
samplesCommon::InferDeleter());
|
||||
if (!engine_) {
|
||||
FDERROR << "Failed to call deserializeCudaEngine()." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
FDLogger() << "TensorRT Engine is built succussfully." << std::endl;
|
||||
if (option.serialize_file != "") {
|
||||
FDLogger() << "Serialize TensorRTEngine to local file "
|
||||
<< option.serialize_file << "." << std::endl;
|
||||
std::ofstream engine_file(option.serialize_file.c_str());
|
||||
if (!engine_file) {
|
||||
FDERROR << "Failed to open " << option.serialize_file << " to write."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
engine_file.write(static_cast<char*>(plan->data()), plan->size());
|
||||
engine_file.close();
|
||||
FDLogger() << "TensorRTEngine is serialized to local file "
|
||||
<< option.serialize_file
|
||||
<< ", we can load this model from the seralized engine "
|
||||
"directly next time."
|
||||
<< std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TensorInfo TrtBackend::GetInputInfo(int index) {
|
||||
FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) +
|
||||
" should less than the number of inputs:" +
|
||||
std::to_string(NumInputs()) + ".");
|
||||
TensorInfo info;
|
||||
info.name = inputs_desc_[index].name;
|
||||
info.shape.assign(inputs_desc_[index].shape.begin(),
|
||||
inputs_desc_[index].shape.end());
|
||||
info.dtype = GetFDDataType(inputs_desc_[index].dtype);
|
||||
return info;
|
||||
}
|
||||
|
||||
TensorInfo TrtBackend::GetOutputInfo(int index) {
|
||||
FDASSERT(index < NumOutputs(),
|
||||
"The index:" + std::to_string(index) +
|
||||
" should less than the number of outputs:" +
|
||||
std::to_string(NumOutputs()) + ".");
|
||||
TensorInfo info;
|
||||
info.name = outputs_desc_[index].name;
|
||||
info.shape.assign(outputs_desc_[index].shape.begin(),
|
||||
outputs_desc_[index].shape.end());
|
||||
info.dtype = GetFDDataType(outputs_desc_[index].dtype);
|
||||
return info;
|
||||
}
|
||||
} // namespace fastdeploy
|
||||
98
fastdeploy/backends/tensorrt/trt_backend.h
Normal file
98
fastdeploy/backends/tensorrt/trt_backend.h
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
|
||||
#include "fastdeploy/backends/tensorrt/common/argsParser.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/buffers.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/common.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/logger.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace fastdeploy {
|
||||
using namespace samplesCommon;
|
||||
|
||||
struct TrtValueInfo {
|
||||
std::string name;
|
||||
std::vector<int> shape;
|
||||
nvinfer1::DataType dtype;
|
||||
};
|
||||
|
||||
struct TrtBackendOption {
|
||||
int gpu_id = 0;
|
||||
bool enable_fp16 = false;
|
||||
bool enable_int8 = false;
|
||||
size_t max_batch_size = 32;
|
||||
size_t max_workspace_size = 1 << 30;
|
||||
std::map<std::string, std::vector<int32_t>> fixed_shape;
|
||||
std::map<std::string, std::vector<int32_t>> max_shape;
|
||||
std::map<std::string, std::vector<int32_t>> min_shape;
|
||||
std::map<std::string, std::vector<int32_t>> opt_shape;
|
||||
std::string serialize_file = "";
|
||||
};
|
||||
|
||||
std::vector<int> toVec(const nvinfer1::Dims& dim);
|
||||
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
|
||||
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
||||
|
||||
class TrtBackend : public BaseBackend {
|
||||
public:
|
||||
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
||||
void BuildOption(const TrtBackendOption& option);
|
||||
|
||||
bool InitFromPaddle(const std::string& model_file,
|
||||
const std::string& params_file,
|
||||
const TrtBackendOption& option = TrtBackendOption(),
|
||||
bool verbose = false);
|
||||
bool InitFromOnnx(const std::string& model_file,
|
||||
const TrtBackendOption& option = TrtBackendOption(),
|
||||
bool from_memory_buffer = false);
|
||||
bool InitFromTrt(const std::string& trt_engine_file);
|
||||
|
||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
|
||||
|
||||
int NumInputs() const { return inputs_desc_.size(); }
|
||||
int NumOutputs() const { return outputs_desc_.size(); }
|
||||
TensorInfo GetInputInfo(int index);
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
|
||||
private:
|
||||
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
||||
std::shared_ptr<nvinfer1::IExecutionContext> context_;
|
||||
cudaStream_t stream_{};
|
||||
std::vector<void*> bindings_;
|
||||
std::vector<TrtValueInfo> inputs_desc_;
|
||||
std::vector<TrtValueInfo> outputs_desc_;
|
||||
std::map<std::string, DeviceBuffer> inputs_buffer_;
|
||||
std::map<std::string, DeviceBuffer> outputs_buffer_;
|
||||
|
||||
void GetInputOutputInfo();
|
||||
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs);
|
||||
bool CreateTrtEngine(const std::string& onnx_model,
|
||||
const TrtBackendOption& option);
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
50
fastdeploy/core/config.h.in
Normal file
50
fastdeploy/core/config.h.in
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#ifndef FASTDEPLOY_DEBUG
|
||||
#cmakedefine FASTDEPLOY_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef FASTDEPLOY_LIB
|
||||
#cmakedefine FASTDEPLOY_LIB
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_PADDLE_FRONTEND
|
||||
#cmakedefine ENABLE_PADDLE_FRONTEND
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_ORT_BACKEND
|
||||
#cmakedefine ENABLE_ORT_BACKEND
|
||||
#endif
|
||||
|
||||
#ifndef WITH_GPU
|
||||
#cmakedefine WITH_GPU
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_TRT_BACKEND
|
||||
#cmakedefine ENABLE_TRT_BACKEND
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_VISION
|
||||
#cmakedefine ENABLE_VISION
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_OPENCV_CUDA
|
||||
#cmakedefine ENABLE_OPENCV_CUDA
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_VISION_VISUALIZE
|
||||
#cmakedefine ENABLE_VISION_VISUALIZE
|
||||
#endif
|
||||
127
fastdeploy/core/fd_tensor.cc
Normal file
127
fastdeploy/core/fd_tensor.cc
Normal file
@@ -0,0 +1,127 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
#ifdef WITH_GPU
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void* FDTensor::MutableData() {
|
||||
if (external_data_ptr != nullptr) {
|
||||
return external_data_ptr;
|
||||
}
|
||||
return data.data();
|
||||
}
|
||||
|
||||
void* FDTensor::Data() {
|
||||
if (external_data_ptr != nullptr) {
|
||||
if (device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
// need to copy cuda mem to cpu first
|
||||
temporary_cpu_buffer.resize(Nbytes());
|
||||
FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr,
|
||||
Nbytes(), cudaMemcpyDeviceToHost) == 0,
|
||||
"[ERROR] Error occurs while copy memory from GPU to CPU");
|
||||
return temporary_cpu_buffer.data();
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"The FastDeploy didn't compile under -DWITH_GPU=ON, so this is "
|
||||
"an unexpected problem happend.");
|
||||
#endif
|
||||
} else {
|
||||
return external_data_ptr;
|
||||
}
|
||||
}
|
||||
return data.data();
|
||||
}
|
||||
|
||||
void FDTensor::SetExternalData(const std::vector<int>& new_shape,
|
||||
const FDDataType& data_type, void* data_buffer) {
|
||||
dtype = data_type;
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
external_data_ptr = data_buffer;
|
||||
}
|
||||
|
||||
void FDTensor::Allocate(const std::vector<int>& new_shape,
|
||||
const FDDataType& data_type,
|
||||
const std::string& tensor_name) {
|
||||
dtype = data_type;
|
||||
name = tensor_name;
|
||||
shape.assign(new_shape.begin(), new_shape.end());
|
||||
int unit = FDDataTypeSize(data_type);
|
||||
int total_size =
|
||||
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
|
||||
data.resize(total_size * unit);
|
||||
}
|
||||
|
||||
int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); }
|
||||
|
||||
int FDTensor::Numel() const {
|
||||
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max,
|
||||
double* min) {
|
||||
T* ptr = static_cast<T*>(src_ptr);
|
||||
*mean = 0;
|
||||
*max = -99999999;
|
||||
*min = 99999999;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (*(ptr + i) > *max) {
|
||||
*max = *(ptr + i);
|
||||
}
|
||||
if (*(ptr + i) < *min) {
|
||||
*min = *(ptr + i);
|
||||
}
|
||||
*mean += *(ptr + i);
|
||||
}
|
||||
*mean = *mean / size;
|
||||
}
|
||||
|
||||
void FDTensor::PrintInfo(const std::string& prefix) {
|
||||
double mean = 0;
|
||||
double max = -99999999;
|
||||
double min = 99999999;
|
||||
if (dtype == FDDataType::FP32) {
|
||||
CalculateStatisInfo<float>(Data(), Numel(), &mean, &max, &min);
|
||||
} else if (dtype == FDDataType::FP64) {
|
||||
CalculateStatisInfo<double>(Data(), Numel(), &mean, &max, &min);
|
||||
} else if (dtype == FDDataType::INT8) {
|
||||
CalculateStatisInfo<int8_t>(Data(), Numel(), &mean, &max, &min);
|
||||
} else if (dtype == FDDataType::UINT8) {
|
||||
CalculateStatisInfo<uint8_t>(Data(), Numel(), &mean, &max, &min);
|
||||
} else if (dtype == FDDataType::INT32) {
|
||||
CalculateStatisInfo<int32_t>(Data(), Numel(), &mean, &max, &min);
|
||||
} else if (dtype == FDDataType::INT64) {
|
||||
CalculateStatisInfo<int64_t>(Data(), Numel(), &mean, &max, &min);
|
||||
} else {
|
||||
FDASSERT(false,
|
||||
"PrintInfo function doesn't support current situation, maybe you "
|
||||
"need enhance this function now.")
|
||||
}
|
||||
std::cout << prefix << ": shape=";
|
||||
for (int i = 0; i < shape.size(); ++i) {
|
||||
std::cout << shape[i] << " ";
|
||||
}
|
||||
std::cout << ", dtype=" << FDDataTypeStr(dtype) << ", mean=" << mean
|
||||
<< ", max=" << max << ", min=" << min << std::endl;
|
||||
}
|
||||
|
||||
FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
|
||||
} // namespace fastdeploy
|
||||
84
fastdeploy/core/fd_tensor.h
Normal file
84
fastdeploy/core/fd_tensor.h
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "fastdeploy/core/fd_type.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
struct FASTDEPLOY_DECL FDTensor {
|
||||
std::vector<int8_t> data;
|
||||
std::vector<int64_t> shape;
|
||||
std::string name = "";
|
||||
FDDataType dtype;
|
||||
|
||||
// This use to skip memory copy step
|
||||
// the external_data_ptr will point to the user allocated memory
|
||||
// user has to maintain the memory, allocate and release
|
||||
void* external_data_ptr = nullptr;
|
||||
// The internal data will be on CPU
|
||||
// Some times, the external data is on the GPU, and we are going to use
|
||||
// GPU to inference the model
|
||||
// so we can skip data transfer, which may improve the efficience
|
||||
Device device = Device::CPU;
|
||||
|
||||
// if the external data is not on CPU, we use this temporary buffer
|
||||
// to transfer data to CPU at some cases we need to visit the
|
||||
// other devices' data
|
||||
std::vector<int8_t> temporary_cpu_buffer;
|
||||
|
||||
// Get data buffer pointer
|
||||
void* MutableData();
|
||||
|
||||
// Use this data to get the tensor data to process
|
||||
// Since the most senario is process data in CPU
|
||||
// this function weill return a pointer to cpu memory
|
||||
// buffer.
|
||||
// If the original data is on other device, the data
|
||||
// will copy to cpu store in `temporary_cpu_buffer`
|
||||
void* Data();
|
||||
|
||||
// Set user memory buffer for Tensor, the memory is managed by
|
||||
// the user it self, but the Tensor will share the memory with user
|
||||
// So take care with the user buffer
|
||||
void SetExternalData(const std::vector<int>& new_shape,
|
||||
const FDDataType& data_type, void* data_buffer);
|
||||
|
||||
// Initialize Tensor
|
||||
// Include setting attribute for tensor
|
||||
// and allocate cpu memory buffer
|
||||
void Allocate(const std::vector<int>& new_shape, const FDDataType& data_type,
|
||||
const std::string& tensor_name = "");
|
||||
|
||||
// Total size of tensor memory buffer in bytes
|
||||
int Nbytes() const;
|
||||
|
||||
// Total number of elements in this tensor
|
||||
int Numel() const;
|
||||
|
||||
// Debug function
|
||||
// Use this function to print shape, dtype, mean, max, min
|
||||
// prefix will also be printed as tag
|
||||
void PrintInfo(const std::string& prefix = "TensorInfo: ");
|
||||
|
||||
FDTensor() {}
|
||||
explicit FDTensor(const std::string& tensor_name);
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
122
fastdeploy/core/fd_type.cc
Normal file
122
fastdeploy/core/fd_type.cc
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/core/fd_type.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
int FDDataTypeSize(FDDataType data_type) {
|
||||
FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
|
||||
if (data_type == FDDataType::BOOL) {
|
||||
return sizeof(bool);
|
||||
} else if (data_type == FDDataType::INT16) {
|
||||
return sizeof(int16_t);
|
||||
} else if (data_type == FDDataType::INT32) {
|
||||
return sizeof(int32_t);
|
||||
} else if (data_type == FDDataType::INT64) {
|
||||
return sizeof(int64_t);
|
||||
} else if (data_type == FDDataType::FP32) {
|
||||
return sizeof(float);
|
||||
} else if (data_type == FDDataType::FP64) {
|
||||
return sizeof(double);
|
||||
} else if (data_type == FDDataType::UINT8) {
|
||||
return sizeof(uint8_t);
|
||||
} else {
|
||||
FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type));
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string FDDataTypeStr(FDDataType data_type) {
|
||||
FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported.");
|
||||
if (data_type == FDDataType::BOOL) {
|
||||
return "bool";
|
||||
} else if (data_type == FDDataType::INT16) {
|
||||
return "int16";
|
||||
} else if (data_type == FDDataType::INT32) {
|
||||
return "int32";
|
||||
} else if (data_type == FDDataType::INT64) {
|
||||
return "int64";
|
||||
} else if (data_type == FDDataType::FP16) {
|
||||
return "float16";
|
||||
} else if (data_type == FDDataType::FP32) {
|
||||
return "float32";
|
||||
} else if (data_type == FDDataType::FP64) {
|
||||
return "float64";
|
||||
} else if (data_type == FDDataType::UINT8) {
|
||||
return "uint8";
|
||||
} else if (data_type == FDDataType::INT8) {
|
||||
return "int8";
|
||||
} else {
|
||||
FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type));
|
||||
}
|
||||
return "UNKNOWN!";
|
||||
}
|
||||
|
||||
std::string Str(Device& d) {
|
||||
std::string out;
|
||||
switch (d) {
|
||||
case Device::DEFAULT:
|
||||
out = "Device::DEFAULT";
|
||||
break;
|
||||
case Device::CPU:
|
||||
out = "Device::CPU";
|
||||
break;
|
||||
case Device::GPU:
|
||||
out = "Device::GPU";
|
||||
break;
|
||||
default:
|
||||
out = "Device::UNKOWN";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string Str(FDDataType& fdt) {
|
||||
std::string out;
|
||||
switch (fdt) {
|
||||
case FDDataType::BOOL:
|
||||
out = "FDDataType::BOOL";
|
||||
break;
|
||||
case FDDataType::INT16:
|
||||
out = "FDDataType::INT16";
|
||||
break;
|
||||
case FDDataType::INT32:
|
||||
out = "FDDataType::INT32";
|
||||
break;
|
||||
case FDDataType::INT64:
|
||||
out = "FDDataType::INT64";
|
||||
break;
|
||||
case FDDataType::FP32:
|
||||
out = "FDDataType::FP32";
|
||||
break;
|
||||
case FDDataType::FP64:
|
||||
out = "FDDataType::FP64";
|
||||
break;
|
||||
case FDDataType::FP16:
|
||||
out = "FDDataType::FP16";
|
||||
break;
|
||||
case FDDataType::UINT8:
|
||||
out = "FDDataType::UINT8";
|
||||
break;
|
||||
case FDDataType::INT8:
|
||||
out = "FDDataType::INT8";
|
||||
break;
|
||||
default:
|
||||
out = "FDDataType::UNKNOWN";
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
59
fastdeploy/core/fd_type.h
Normal file
59
fastdeploy/core/fd_type.h
Normal file
@@ -0,0 +1,59 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "fastdeploy/core/config.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
enum class Device { DEFAULT, CPU, GPU };
|
||||
|
||||
FASTDEPLOY_DECL std::string Str(Device& d);
|
||||
|
||||
enum class FDDataType {
|
||||
BOOL,
|
||||
INT16,
|
||||
INT32,
|
||||
INT64,
|
||||
FP16,
|
||||
FP32,
|
||||
FP64,
|
||||
UNKNOWN1,
|
||||
UNKNOWN2,
|
||||
UNKNOWN3,
|
||||
UNKNOWN4,
|
||||
UNKNOWN5,
|
||||
UNKNOWN6,
|
||||
UNKNOWN7,
|
||||
UNKNOWN8,
|
||||
UNKNOWN9,
|
||||
UNKNOWN10,
|
||||
UNKNOWN11,
|
||||
UNKNOWN12,
|
||||
UNKNOWN13,
|
||||
UINT8,
|
||||
INT8
|
||||
};
|
||||
|
||||
FASTDEPLOY_DECL std::string Str(FDDataType& fdt);
|
||||
|
||||
FASTDEPLOY_DECL int32_t FDDataTypeSize(FDDataType data_dtype);
|
||||
|
||||
FASTDEPLOY_DECL std::string FDDataTypeStr(FDDataType data_dtype);
|
||||
} // namespace fastdeploy
|
||||
@@ -1,186 +0,0 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import os.path as osp
|
||||
import shutil
|
||||
import requests
|
||||
import time
|
||||
import zipfile
|
||||
import hashlib
|
||||
import tqdm
|
||||
import logging
|
||||
|
||||
DOWNLOAD_RETRY_LIMIT = 3
|
||||
|
||||
|
||||
def md5check(fullname, md5sum=None):
|
||||
if md5sum is None:
|
||||
return True
|
||||
|
||||
logging.info("File {} md5 checking...".format(fullname))
|
||||
md5 = hashlib.md5()
|
||||
with open(fullname, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
md5.update(chunk)
|
||||
calc_md5sum = md5.hexdigest()
|
||||
|
||||
if calc_md5sum != md5sum:
|
||||
logging.info("File {} md5 check failed, {}(calc) != "
|
||||
"{}(base)".format(fullname, calc_md5sum, md5sum))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def move_and_merge_tree(src, dst):
|
||||
"""
|
||||
Move src directory to dst, if dst is already exists,
|
||||
merge src to dst
|
||||
"""
|
||||
if not osp.exists(dst):
|
||||
shutil.move(src, dst)
|
||||
else:
|
||||
if not osp.isdir(src):
|
||||
shutil.move(src, dst)
|
||||
return
|
||||
for fp in os.listdir(src):
|
||||
src_fp = osp.join(src, fp)
|
||||
dst_fp = osp.join(dst, fp)
|
||||
if osp.isdir(src_fp):
|
||||
if osp.isdir(dst_fp):
|
||||
move_and_merge_tree(src_fp, dst_fp)
|
||||
else:
|
||||
shutil.move(src_fp, dst_fp)
|
||||
elif osp.isfile(src_fp) and \
|
||||
not osp.isfile(dst_fp):
|
||||
shutil.move(src_fp, dst_fp)
|
||||
|
||||
|
||||
def download(url, path, rename=None, md5sum=None, show_progress=False):
|
||||
"""
|
||||
Download from url, save to path.
|
||||
url (str): download url
|
||||
path (str): download to given path
|
||||
"""
|
||||
if not osp.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
fname = osp.split(url)[-1]
|
||||
fullname = osp.join(path, fname)
|
||||
if rename is not None:
|
||||
fullname = osp.join(path, rename)
|
||||
retry_cnt = 0
|
||||
while not (osp.exists(fullname) and md5check(fullname, md5sum)):
|
||||
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
|
||||
retry_cnt += 1
|
||||
else:
|
||||
logging.debug("{} download failed.".format(fname))
|
||||
raise RuntimeError("Download from {} failed. "
|
||||
"Retry limit reached".format(url))
|
||||
|
||||
logging.info("Downloading {} from {}".format(fname, url))
|
||||
|
||||
req = requests.get(url, stream=True)
|
||||
if req.status_code != 200:
|
||||
raise RuntimeError("Downloading from {} failed with code "
|
||||
"{}!".format(url, req.status_code))
|
||||
|
||||
# For protecting download interupted, download to
|
||||
# tmp_fullname firstly, move tmp_fullname to fullname
|
||||
# after download finished
|
||||
tmp_fullname = fullname + "_tmp"
|
||||
total_size = req.headers.get('content-length')
|
||||
with open(tmp_fullname, 'wb') as f:
|
||||
if total_size and show_progress:
|
||||
for chunk in tqdm.tqdm(
|
||||
req.iter_content(chunk_size=1024),
|
||||
total=(int(total_size) + 1023) // 1024,
|
||||
unit='KB'):
|
||||
f.write(chunk)
|
||||
else:
|
||||
for chunk in req.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
shutil.move(tmp_fullname, fullname)
|
||||
logging.debug("{} download completed.".format(fname))
|
||||
|
||||
return fullname
|
||||
|
||||
|
||||
def decompress(fname):
|
||||
"""
|
||||
Decompress for zip and tar file
|
||||
"""
|
||||
logging.info("Decompressing {}...".format(fname))
|
||||
|
||||
# For protecting decompressing interupted,
|
||||
# decompress to fpath_tmp directory firstly, if decompress
|
||||
# successed, move decompress files to fpath and delete
|
||||
# fpath_tmp and remove download compress file.
|
||||
fpath = osp.split(fname)[0]
|
||||
fpath_tmp = osp.join(fpath, 'tmp')
|
||||
if osp.isdir(fpath_tmp):
|
||||
shutil.rmtree(fpath_tmp)
|
||||
os.makedirs(fpath_tmp)
|
||||
|
||||
if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0:
|
||||
with tarfile.open(fname) as tf:
|
||||
tf.extractall(path=fpath_tmp)
|
||||
elif fname.find('.zip') >= 0:
|
||||
with zipfile.ZipFile(fname) as zf:
|
||||
zf.extractall(path=fpath_tmp)
|
||||
else:
|
||||
raise TypeError("Unsupport compress file type {}".format(fname))
|
||||
|
||||
for f in os.listdir(fpath_tmp):
|
||||
src_dir = osp.join(fpath_tmp, f)
|
||||
dst_dir = osp.join(fpath, f)
|
||||
move_and_merge_tree(src_dir, dst_dir)
|
||||
|
||||
shutil.rmtree(fpath_tmp)
|
||||
logging.debug("{} decompressed.".format(fname))
|
||||
return dst_dir
|
||||
|
||||
|
||||
def url2dir(url, path, rename=None):
|
||||
full_name = download(url, path, rename, show_progress=True)
|
||||
print("SDK is donwloaded, now extracting...")
|
||||
if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0:
|
||||
return decompress(full_name)
|
||||
|
||||
|
||||
def download_and_decompress(url, path='.', rename=None):
|
||||
fname = osp.split(url)[-1]
|
||||
fullname = osp.join(path, fname)
|
||||
# if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')):
|
||||
# fullname = osp.join(path, fname.split('.')[0])
|
||||
nranks = 0
|
||||
if nranks <= 1:
|
||||
dst_dir = url2dir(url, path, rename)
|
||||
if dst_dir is not None:
|
||||
fullname = dst_dir
|
||||
else:
|
||||
lock_path = fullname + '.lock'
|
||||
if not os.path.exists(fullname):
|
||||
with open(lock_path, 'w'):
|
||||
os.utime(lock_path, None)
|
||||
if local_rank == 0:
|
||||
dst_dir = url2dir(url, path, rename)
|
||||
if dst_dir is not None:
|
||||
fullname = dst_dir
|
||||
os.remove(lock_path)
|
||||
else:
|
||||
while os.path.exists(lock_path):
|
||||
time.sleep(1)
|
||||
return
|
||||
167
fastdeploy/fastdeploy_model.cc
Normal file
167
fastdeploy/fastdeploy_model.cc
Normal file
@@ -0,0 +1,167 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include "fastdeploy/fastdeploy_model.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
bool FastDeployModel::InitRuntime() {
|
||||
FDASSERT(
|
||||
ModelFormatCheck(runtime_option.model_file, runtime_option.model_format),
|
||||
"ModelFormatCheck Failed.");
|
||||
if (runtime_initialized_) {
|
||||
FDERROR << "The model is already initialized, cannot be initliazed again."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (runtime_option.backend != Backend::UNKNOWN) {
|
||||
if (runtime_option.backend == Backend::ORT) {
|
||||
if (!IsBackendAvailable(Backend::ORT)) {
|
||||
FDERROR
|
||||
<< "Backend::ORT is not complied with current FastDeploy library."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
} else if (runtime_option.backend == Backend::TRT) {
|
||||
if (!IsBackendAvailable(Backend::TRT)) {
|
||||
FDERROR
|
||||
<< "Backend:TRT is not complied with current FastDeploy library."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
FDERROR << "Only support Backend::ORT / Backend::TRT now." << std::endl;
|
||||
return false;
|
||||
}
|
||||
runtime_ = new Runtime();
|
||||
if (!runtime_->Init(runtime_option)) {
|
||||
return false;
|
||||
}
|
||||
runtime_initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (runtime_option.device == Device::CPU) {
|
||||
return CreateCpuBackend();
|
||||
} else if (runtime_option.device == Device::GPU) {
|
||||
#ifdef WITH_GPU
|
||||
return CreateGpuBackend();
|
||||
#else
|
||||
FDERROR << "The compiled FastDeploy library doesn't support GPU now."
|
||||
<< std::endl;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
FDERROR << "Only support CPU/GPU now." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FastDeployModel::CreateCpuBackend() {
|
||||
if (valid_cpu_backends.size() == 0) {
|
||||
FDERROR << "There's no valid cpu backends for model: " << ModelName()
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& b : valid_cpu_backends) {
|
||||
if (b == Backend::ORT) {
|
||||
if (!IsBackendAvailable(Backend::ORT)) {
|
||||
FDERROR << "OrtBackend is not complied with current FastDeploy library."
|
||||
<< std::endl;
|
||||
continue;
|
||||
}
|
||||
runtime_option.backend = b;
|
||||
runtime_ = new Runtime();
|
||||
if (!runtime_->Init(runtime_option)) {
|
||||
return false;
|
||||
}
|
||||
runtime_initialized_ = true;
|
||||
return true;
|
||||
} else {
|
||||
FDERROR << "Only Backend::ORT as cpu backend is supported now."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
FDERROR << "Cannot find an available cpu backend to load this model."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FastDeployModel::CreateGpuBackend() {
|
||||
if (valid_gpu_backends.size() == 0) {
|
||||
FDERROR << "There's no valid gpu backends for model: " << ModelName()
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& b : valid_gpu_backends) {
|
||||
if (b == Backend::ORT) {
|
||||
if (!IsBackendAvailable(Backend::ORT)) {
|
||||
FDERROR << "OrtBackend is not complied with current FastDeploy library."
|
||||
<< std::endl;
|
||||
continue;
|
||||
}
|
||||
runtime_option.backend = b;
|
||||
runtime_ = new Runtime();
|
||||
if (!runtime_->Init(runtime_option)) {
|
||||
return false;
|
||||
}
|
||||
runtime_initialized_ = true;
|
||||
return true;
|
||||
} else if (b == Backend::TRT) {
|
||||
if (!IsBackendAvailable(Backend::TRT)) {
|
||||
FDERROR << "TrtBackend is not complied with current FastDeploy library."
|
||||
<< std::endl;
|
||||
continue;
|
||||
}
|
||||
runtime_option.backend = b;
|
||||
runtime_ = new Runtime();
|
||||
if (!runtime_->Init(runtime_option)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
FDERROR << "Only Backend::ORT / Backend::TRT as gpu backends are "
|
||||
"supported now."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
FDERROR << "Cannot find an available gpu backend to load this model."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
|
||||
std::vector<FDTensor>* output_tensors) {
|
||||
return runtime_->Infer(input_tensors, output_tensors);
|
||||
}
|
||||
|
||||
void FastDeployModel::EnableDebug() {
|
||||
#ifdef FASTDEPLOY_DEBUG
|
||||
debug_ = true;
|
||||
#else
|
||||
FDLogger() << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so "
|
||||
"cannot enable debug mode."
|
||||
<< std::endl;
|
||||
debug_ = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool FastDeployModel::DebugEnabled() { return debug_; }
|
||||
|
||||
} // namespace fastdeploy
|
||||
67
fastdeploy/fastdeploy_model.h
Normal file
67
fastdeploy/fastdeploy_model.h
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
#include "fastdeploy/fastdeploy_runtime.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
class FASTDEPLOY_DECL FastDeployModel {
|
||||
public:
|
||||
virtual std::string ModelName() const { return "NameUndefined"; };
|
||||
|
||||
virtual bool InitRuntime();
|
||||
virtual bool CreateCpuBackend();
|
||||
virtual bool CreateGpuBackend();
|
||||
virtual bool Infer(std::vector<FDTensor>& input_tensors,
|
||||
std::vector<FDTensor>* output_tensors);
|
||||
|
||||
RuntimeOption runtime_option;
|
||||
std::vector<Backend> valid_cpu_backends = {Backend::ORT};
|
||||
std::vector<Backend> valid_gpu_backends = {Backend::ORT};
|
||||
std::vector<Backend> valid_external_backends;
|
||||
bool initialized = false;
|
||||
virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
|
||||
virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
|
||||
virtual TensorInfo InputInfoOfRuntime(int index) {
|
||||
return runtime_->GetInputInfo(index);
|
||||
}
|
||||
virtual TensorInfo OutputInfoOfRuntime(int index) {
|
||||
return runtime_->GetOutputInfo(index);
|
||||
}
|
||||
virtual bool Initialized() const {
|
||||
return runtime_initialized_ && initialized;
|
||||
}
|
||||
|
||||
virtual void EnableDebug();
|
||||
virtual bool DebugEnabled();
|
||||
|
||||
private:
|
||||
Runtime* runtime_ = nullptr;
|
||||
bool runtime_initialized_ = false;
|
||||
bool debug_ = false;
|
||||
};
|
||||
|
||||
#define TIMERECORD_START(id) \
|
||||
TimeCounter tc_##id; \
|
||||
tc_##id.Start();
|
||||
|
||||
#define TIMERECORD_END(id, prefix) \
|
||||
if (DebugEnabled()) { \
|
||||
tc_##id.End(); \
|
||||
FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \
|
||||
<< prefix << " duration = " << tc_##id.Duration() << "s." \
|
||||
<< std::endl; \
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
163
fastdeploy/fastdeploy_runtime.cc
Normal file
163
fastdeploy/fastdeploy_runtime.cc
Normal file
@@ -0,0 +1,163 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/fastdeploy_runtime.h"
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#ifdef ENABLE_ORT_BACKEND
|
||||
#include "fastdeploy/backends/ort/ort_backend.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
std::vector<Backend> GetAvailableBackends() {
|
||||
std::vector<Backend> backends;
|
||||
#ifdef ENABLE_ORT_BACKEND
|
||||
backends.push_back(Backend::ORT);
|
||||
#endif
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
backends.push_back(Backend::TRT);
|
||||
#endif
|
||||
return backends;
|
||||
}
|
||||
|
||||
bool IsBackendAvailable(const Backend& backend) {
|
||||
std::vector<Backend> backends = GetAvailableBackends();
|
||||
for (size_t i = 0; i < backends.size(); ++i) {
|
||||
if (backend == backends[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ModelFormatCheck(const std::string& model_file,
|
||||
const Frontend& model_format) {
|
||||
if (model_format == Frontend::PADDLE) {
|
||||
if (model_file.size() < 8 ||
|
||||
model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
|
||||
FDLogger() << "With model format of Frontend::PADDLE, the model file "
|
||||
"should ends with `.pdmodel`, but now it's "
|
||||
<< model_file << std::endl;
|
||||
return false;
|
||||
}
|
||||
} else if (model_format == Frontend::ONNX) {
|
||||
if (model_file.size() < 5 ||
|
||||
model_file.substr(model_file.size() - 5, 5) != ".onnx") {
|
||||
FDLogger() << "With model format of Frontend::ONNX, the model file "
|
||||
"should ends with `.onnx`, but now it's "
|
||||
<< model_file << std::endl;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
FDLogger() << "Only support model format with frontend Frontend::PADDLE / "
|
||||
"Frontend::ONNX."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Runtime::Init(const RuntimeOption& _option) {
|
||||
option = _option;
|
||||
if (option.backend == Backend::ORT) {
|
||||
CreateOrtBackend();
|
||||
} else if (option.backend == Backend::TRT) {
|
||||
CreateTrtBackend();
|
||||
} else {
|
||||
FDERROR << "Runtime only support Backend::ORT/Backend::TRT as backend now."
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TensorInfo Runtime::GetInputInfo(int index) {
|
||||
return backend_->GetInputInfo(index);
|
||||
}
|
||||
|
||||
TensorInfo Runtime::GetOutputInfo(int index) {
|
||||
return backend_->GetOutputInfo(index);
|
||||
}
|
||||
|
||||
bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
|
||||
std::vector<FDTensor>* output_tensors) {
|
||||
return backend_->Infer(input_tensors, output_tensors);
|
||||
}
|
||||
|
||||
void Runtime::CreateOrtBackend() {
|
||||
#ifdef ENABLE_ORT_BACKEND
|
||||
auto ort_option = OrtBackendOption();
|
||||
ort_option.graph_optimization_level = option.ort_graph_opt_level;
|
||||
ort_option.intra_op_num_threads = option.cpu_thread_num;
|
||||
ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
|
||||
ort_option.execution_mode = option.ort_execution_mode;
|
||||
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||
ort_option.gpu_id = option.device_id;
|
||||
FDASSERT(option.model_format == Frontend::PADDLE ||
|
||||
option.model_format == Frontend::ONNX,
|
||||
"OrtBackend only support model format of Frontend::PADDLE / "
|
||||
"Frontend::ONNX.");
|
||||
backend_ = new OrtBackend();
|
||||
auto casted_backend = dynamic_cast<OrtBackend*>(backend_);
|
||||
if (option.model_format == Frontend::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
|
||||
"Load model from ONNX failed while initliazing OrtBackend.");
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||
option.params_file, ort_option),
|
||||
"Load model from Paddle failed while initliazing OrtBackend.");
|
||||
}
|
||||
#else
|
||||
FDASSERT(false, "OrtBackend is not available, please compiled with "
|
||||
"ENABLE_ORT_BACKEND=ON.");
|
||||
#endif
|
||||
}
|
||||
|
||||
void Runtime::CreateTrtBackend() {
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
auto trt_option = TrtBackendOption();
|
||||
trt_option.gpu_id = option.device_id;
|
||||
trt_option.enable_fp16 = option.trt_enable_fp16;
|
||||
trt_option.enable_int8 = option.trt_enable_int8;
|
||||
trt_option.max_batch_size = option.trt_max_batch_size;
|
||||
trt_option.max_workspace_size = option.trt_max_workspace_size;
|
||||
trt_option.fixed_shape = option.trt_fixed_shape;
|
||||
trt_option.max_shape = option.trt_max_shape;
|
||||
trt_option.min_shape = option.trt_max_shape;
|
||||
trt_option.opt_shape = option.trt_opt_shape;
|
||||
trt_option.serialize_file = option.trt_serialize_file;
|
||||
FDASSERT(option.model_format == Frontend::PADDLE ||
|
||||
option.model_format == Frontend::ONNX,
|
||||
"TrtBackend only support model format of Frontend::PADDLE / "
|
||||
"Frontend::ONNX.");
|
||||
backend_ = new TrtBackend();
|
||||
auto casted_backend = dynamic_cast<TrtBackend*>(backend_);
|
||||
if (option.model_format == Frontend::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
|
||||
"Load model from ONNX failed while initliazing TrtBackend.");
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||
option.params_file, trt_option),
|
||||
"Load model from Paddle failed while initliazing TrtBackend.");
|
||||
}
|
||||
#else
|
||||
FDASSERT(false, "TrtBackend is not available, please compiled with "
|
||||
"ENABLE_TRT_BACKEND=ON.");
|
||||
#endif
|
||||
}
|
||||
} // namespace fastdeploy
|
||||
94
fastdeploy/fastdeploy_runtime.h
Normal file
94
fastdeploy/fastdeploy_runtime.h
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
#include "fastdeploy/utils/perf.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
enum class Backend { UNKNOWN, ORT, TRT, PDRT };
|
||||
enum class Frontend { PADDLE, ONNX };
|
||||
|
||||
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
|
||||
|
||||
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
|
||||
|
||||
bool ModelFormatCheck(const std::string& model_file,
|
||||
const Frontend& model_format);
|
||||
|
||||
struct FASTDEPLOY_DECL RuntimeOption {
|
||||
Backend backend = Backend::ORT;
|
||||
|
||||
// for cpu inference and preprocess
|
||||
int cpu_thread_num = 8;
|
||||
int device_id = 0;
|
||||
|
||||
#ifdef WITH_GPU
|
||||
Device device = Device::GPU;
|
||||
#else
|
||||
Device device = Device::CPU;
|
||||
#endif
|
||||
|
||||
// ======Only for ORT Backend========
|
||||
// -1 means use default value by ort
|
||||
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
|
||||
// ORT_ENABLE_ALL
|
||||
int ort_graph_opt_level = -1;
|
||||
int ort_inter_op_num_threads = -1;
|
||||
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
|
||||
int ort_execution_mode = -1;
|
||||
|
||||
// ======Only for Trt Backend=======
|
||||
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
||||
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
||||
std::string trt_serialize_file = "";
|
||||
bool trt_enable_fp16 = false;
|
||||
bool trt_enable_int8 = false;
|
||||
size_t trt_max_batch_size = 32;
|
||||
size_t trt_max_workspace_size = 1 << 30;
|
||||
|
||||
std::string model_file = ""; // Path of model file
|
||||
std::string params_file = ""; // Path of parameters file, can be empty
|
||||
Frontend model_format = Frontend::PADDLE; // format of input model
|
||||
};
|
||||
|
||||
struct FASTDEPLOY_DECL Runtime {
|
||||
public:
|
||||
// explicit Runtime(const RuntimeOption& _option = RuntimeOption());
|
||||
|
||||
bool Init(const RuntimeOption& _option);
|
||||
|
||||
bool Infer(std::vector<FDTensor>& input_tensors,
|
||||
std::vector<FDTensor>* output_tensors);
|
||||
|
||||
void CreateOrtBackend();
|
||||
|
||||
void CreateTrtBackend();
|
||||
|
||||
int NumInputs() { return backend_->NumInputs(); }
|
||||
int NumOutputs() { return backend_->NumOutputs(); }
|
||||
TensorInfo GetInputInfo(int index);
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
|
||||
RuntimeOption option;
|
||||
|
||||
private:
|
||||
BaseBackend* backend_;
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
55
fastdeploy/fastdeploy_runtime.py
Normal file
55
fastdeploy/fastdeploy_runtime.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import absolute_import
|
||||
import logging
|
||||
from . import fastdeploy_main as C
|
||||
|
||||
|
||||
class FastDeployModel:
|
||||
def __init__(self, option):
|
||||
self._model = None
|
||||
self._runtime_option = option
|
||||
if self._runtime_option is None:
|
||||
self._runtime_option = C.RuntimeOption()
|
||||
|
||||
def model_name(self):
|
||||
return self._model.model_name()
|
||||
|
||||
def num_inputs(self):
|
||||
return self._model.num_inputs()
|
||||
|
||||
def num_outputs(self):
|
||||
return self._model.num_outputs()
|
||||
|
||||
def get_input_info(self, index):
|
||||
assert index < self.num_inputs(
|
||||
), "The index:{} must be less than number of inputs:{}.".format(
|
||||
index, self.num_inputs())
|
||||
return self._model.get_input_info(index)
|
||||
|
||||
def get_output_info(self, index):
|
||||
assert index < self.num_outputs(
|
||||
), "The index:{} must be less than number of outputs:{}.".format(
|
||||
index, self.num_outputs())
|
||||
return self._model.get_output_info(index)
|
||||
|
||||
@property
|
||||
def runtime_option(self):
|
||||
return self._model.runtime_option if self._model is not None else None
|
||||
|
||||
@property
|
||||
def initialized(self):
|
||||
if self._model is None:
|
||||
return false
|
||||
return self._model.initialized()
|
||||
0
fastdeploy/libs/__init__.py
Normal file
0
fastdeploy/libs/__init__.py
Normal file
34
fastdeploy/pybind/fastdeploy_model.cc
Normal file
34
fastdeploy/pybind/fastdeploy_model.cc
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/pybind/main.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void BindFDModel(pybind11::module& m) {
|
||||
pybind11::class_<FastDeployModel>(m, "FastDeployModel")
|
||||
.def(pybind11::init<>(), "Default Constructor")
|
||||
.def("model_name", &FastDeployModel::ModelName)
|
||||
.def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime)
|
||||
.def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime)
|
||||
.def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime)
|
||||
.def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime)
|
||||
.def("initialized", &FastDeployModel::Initialized)
|
||||
.def_readwrite("runtime_option", &FastDeployModel::runtime_option)
|
||||
.def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends)
|
||||
.def_readwrite("valid_gpu_backends",
|
||||
&FastDeployModel::valid_gpu_backends);
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
114
fastdeploy/pybind/fastdeploy_runtime.cc
Normal file
114
fastdeploy/pybind/fastdeploy_runtime.cc
Normal file
@@ -0,0 +1,114 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/pybind/main.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void BindRuntime(pybind11::module& m) {
|
||||
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
|
||||
.def(pybind11::init())
|
||||
.def_readwrite("model_file", &RuntimeOption::model_file)
|
||||
.def_readwrite("params_file", &RuntimeOption::params_file)
|
||||
.def_readwrite("model_format", &RuntimeOption::model_format)
|
||||
.def_readwrite("backend", &RuntimeOption::backend)
|
||||
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
||||
.def_readwrite("device_id", &RuntimeOption::device_id)
|
||||
.def_readwrite("device", &RuntimeOption::device)
|
||||
.def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
|
||||
.def_readwrite("ort_inter_op_num_threads",
|
||||
&RuntimeOption::ort_inter_op_num_threads)
|
||||
.def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
|
||||
.def_readwrite("trt_fixed_shape", &RuntimeOption::trt_fixed_shape)
|
||||
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
||||
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
||||
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
||||
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
|
||||
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
|
||||
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
|
||||
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
|
||||
.def_readwrite("trt_max_workspace_size",
|
||||
&RuntimeOption::trt_max_workspace_size);
|
||||
pybind11::class_<Runtime>(m, "Runtime")
|
||||
.def(pybind11::init([](RuntimeOption& option) {
|
||||
Runtime* runtime = new Runtime();
|
||||
runtime->Init(option);
|
||||
return runtime;
|
||||
}))
|
||||
.def("infer", [](Runtime& self,
|
||||
std::map<std::string, pybind11::array>& data) {
|
||||
std::vector<FDTensor> inputs(data.size());
|
||||
int index = 0;
|
||||
for (auto iter = data.begin(); iter != data.end(); ++iter) {
|
||||
inputs[index].dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
|
||||
inputs[index].shape.insert(
|
||||
inputs[index].shape.begin(), iter->second.shape(),
|
||||
iter->second.shape() + iter->second.ndim());
|
||||
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
|
||||
// use SetExternalData
|
||||
inputs[index].data.resize(iter->second.nbytes());
|
||||
memcpy(inputs[index].data.data(), iter->second.mutable_data(),
|
||||
iter->second.nbytes());
|
||||
inputs[index].name = iter->first;
|
||||
}
|
||||
|
||||
std::vector<FDTensor> outputs(self.NumOutputs());
|
||||
self.Infer(inputs, &outputs);
|
||||
|
||||
std::vector<pybind11::array> results;
|
||||
results.reserve(outputs.size());
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
|
||||
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
|
||||
memcpy(results[i].mutable_data(), outputs[i].data.data(),
|
||||
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
|
||||
}
|
||||
return results;
|
||||
});
|
||||
|
||||
pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
|
||||
"Backend for inference.")
|
||||
.value("UNKOWN", Backend::UNKNOWN)
|
||||
.value("ORT", Backend::ORT)
|
||||
.value("TRT", Backend::TRT)
|
||||
.value("PDRT", Backend::PDRT);
|
||||
pybind11::enum_<Frontend>(m, "Frontend", pybind11::arithmetic(),
|
||||
"Frontend for inference.")
|
||||
.value("PADDLE", Frontend::PADDLE)
|
||||
.value("ONNX", Frontend::ONNX);
|
||||
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
|
||||
"Device for inference.")
|
||||
.value("CPU", Device::CPU)
|
||||
.value("GPU", Device::GPU);
|
||||
|
||||
pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
|
||||
"Data type of FastDeploy.")
|
||||
.value("BOOL", FDDataType::BOOL)
|
||||
.value("INT8", FDDataType::INT8)
|
||||
.value("INT16", FDDataType::INT16)
|
||||
.value("INT32", FDDataType::INT32)
|
||||
.value("INT64", FDDataType::INT64)
|
||||
.value("FP32", FDDataType::FP32)
|
||||
.value("FP64", FDDataType::FP64)
|
||||
.value("UINT8", FDDataType::UINT8);
|
||||
|
||||
pybind11::class_<TensorInfo>(m, "TensorInfo")
|
||||
.def_readwrite("name", &TensorInfo::name)
|
||||
.def_readwrite("shape", &TensorInfo::shape)
|
||||
.def_readwrite("dtype", &TensorInfo::dtype);
|
||||
|
||||
m.def("get_available_backends", []() { return GetAvailableBackends(); });
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
115
fastdeploy/pybind/main.cc
Normal file
115
fastdeploy/pybind/main.cc
Normal file
@@ -0,0 +1,115 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/pybind/main.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void BindRuntime(pybind11::module&);
|
||||
void BindFDModel(pybind11::module&);
|
||||
void BindVision(pybind11::module&);
|
||||
|
||||
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
|
||||
pybind11::dtype dt;
|
||||
if (fd_dtype == FDDataType::INT32) {
|
||||
dt = pybind11::dtype::of<int32_t>();
|
||||
} else if (fd_dtype == FDDataType::INT64) {
|
||||
dt = pybind11::dtype::of<int64_t>();
|
||||
} else if (fd_dtype == FDDataType::FP32) {
|
||||
dt = pybind11::dtype::of<float>();
|
||||
} else if (fd_dtype == FDDataType::FP64) {
|
||||
dt = pybind11::dtype::of<double>();
|
||||
} else {
|
||||
FDASSERT(false, "The function doesn't support data type of " +
|
||||
FDDataTypeStr(fd_dtype) + ".");
|
||||
}
|
||||
return dt;
|
||||
}
|
||||
|
||||
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) {
|
||||
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
|
||||
return FDDataType::INT32;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<int64_t>())) {
|
||||
return FDDataType::INT64;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
|
||||
return FDDataType::FP32;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<double>())) {
|
||||
return FDDataType::FP64;
|
||||
}
|
||||
FDASSERT(false, "NumpyDataTypeToFDDataType() only support "
|
||||
"int32/int64/float32/float64 now.");
|
||||
return FDDataType::FP32;
|
||||
}
|
||||
|
||||
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
|
||||
bool share_buffer) {
|
||||
tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype());
|
||||
tensor->shape.insert(tensor->shape.begin(), pyarray.shape(),
|
||||
pyarray.shape() + pyarray.ndim());
|
||||
if (share_buffer) {
|
||||
tensor->external_data_ptr = pyarray.mutable_data();
|
||||
} else {
|
||||
tensor->data.resize(pyarray.nbytes());
|
||||
memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes());
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_VISION
|
||||
int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) {
|
||||
if (np_dtype.is(pybind11::dtype::of<int32_t>())) {
|
||||
return CV_32S;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<int8_t>())) {
|
||||
return CV_8U;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<uint8_t>())) {
|
||||
return CV_8U;
|
||||
} else if (np_dtype.is(pybind11::dtype::of<float>())) {
|
||||
return CV_32F;
|
||||
} else {
|
||||
FDASSERT(
|
||||
false,
|
||||
"NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 "
|
||||
"now.");
|
||||
}
|
||||
return CV_8U;
|
||||
}
|
||||
|
||||
cv::Mat PyArrayToCvMat(pybind11::array& pyarray) {
|
||||
auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype());
|
||||
FDASSERT(
|
||||
pyarray.ndim() == 3,
|
||||
"Require rank of array to be 3 with HWC format while converting it to "
|
||||
"cv::Mat.");
|
||||
int channel = *(pyarray.shape() + 2);
|
||||
int height = *(pyarray.shape());
|
||||
int width = *(pyarray.shape() + 1);
|
||||
return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel),
|
||||
pyarray.mutable_data());
|
||||
}
|
||||
#endif
|
||||
|
||||
PYBIND11_MODULE(fastdeploy_main, m) {
|
||||
m.doc() =
|
||||
"Make programer easier to deploy deeplearning model, save time to save "
|
||||
"the world!";
|
||||
|
||||
BindRuntime(m);
|
||||
BindFDModel(m);
|
||||
#ifdef ENABLE_VISION
|
||||
auto vision_module =
|
||||
m.def_submodule("vision", "Vision module of FastDeploy.");
|
||||
BindVision(vision_module);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
88
fastdeploy/pybind/main.h
Normal file
88
fastdeploy/pybind/main.h
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <pybind11/numpy.h>
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include "fastdeploy/fastdeploy_runtime.h"
|
||||
|
||||
#ifdef ENABLE_VISION
|
||||
#include "fastdeploy/vision.h"
|
||||
#endif
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void BindBackend(pybind11::module&);
|
||||
void BindVision(pybind11::module&);
|
||||
|
||||
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
|
||||
|
||||
FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype);
|
||||
|
||||
void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor,
|
||||
bool share_buffer = false);
|
||||
|
||||
#ifdef ENABLE_VISION
|
||||
cv::Mat PyArrayToCvMat(pybind11::array& pyarray);
|
||||
#endif
|
||||
|
||||
template <typename T> FDDataType CTypeToFDDataType() {
|
||||
if (std::is_same<T, int32_t>::value) {
|
||||
return FDDataType::INT32;
|
||||
} else if (std::is_same<T, int64_t>::value) {
|
||||
return FDDataType::INT64;
|
||||
} else if (std::is_same<T, float>::value) {
|
||||
return FDDataType::FP32;
|
||||
} else if (std::is_same<T, double>::value) {
|
||||
return FDDataType::FP64;
|
||||
}
|
||||
FDASSERT(false,
|
||||
"CTypeToFDDataType only support int32/int64/float32/float64 now.");
|
||||
return FDDataType::FP32;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<pybind11::array>
|
||||
PyBackendInfer(T& self, const std::vector<std::string>& names,
|
||||
std::vector<pybind11::array>& data) {
|
||||
std::vector<FDTensor> inputs(data.size());
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
// TODO(jiangjiajun) here is considered to use user memory directly
|
||||
inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype());
|
||||
inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(),
|
||||
data[i].shape() + data[i].ndim());
|
||||
inputs[i].data.resize(data[i].nbytes());
|
||||
memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes());
|
||||
inputs[i].name = names[i];
|
||||
}
|
||||
|
||||
std::vector<FDTensor> outputs(self.NumOutputs());
|
||||
self.Infer(inputs, &outputs);
|
||||
|
||||
std::vector<pybind11::array> results;
|
||||
results.reserve(outputs.size());
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
|
||||
results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape));
|
||||
memcpy(results[i].mutable_data(), outputs[i].data.data(),
|
||||
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
49
fastdeploy/utils/perf.h
Normal file
49
fastdeploy/utils/perf.h
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#include <chrono> // NOLINT
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
class FASTDEPLOY_DECL TimeCounter {
|
||||
public:
|
||||
void Start() { begin_ = std::chrono::system_clock::now(); }
|
||||
|
||||
void End() { end_ = std::chrono::system_clock::now(); }
|
||||
|
||||
double Duration() {
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end_ - begin_);
|
||||
return static_cast<double>(duration.count()) *
|
||||
std::chrono::microseconds::period::num /
|
||||
std::chrono::microseconds::period::den;
|
||||
}
|
||||
|
||||
void PrintInfo(const std::string& prefix = "TimeCounter: ",
|
||||
bool print_out = true) {
|
||||
if (!print_out) {
|
||||
return;
|
||||
}
|
||||
FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::system_clock> begin_;
|
||||
std::chrono::time_point<std::chrono::system_clock> end_;
|
||||
};
|
||||
|
||||
} // namespace fastdeploy
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user