[Other] Seperate option from backends (#1048)

* Seperate option from backends

* Seperate option from backends

* Seperate option from backends

* Seperate option from backends
This commit is contained in:
Jason
2023-01-04 15:06:55 +08:00
committed by GitHub
parent 97a0a9e33f
commit 0c292c0766
15 changed files with 531 additions and 401 deletions

View File

@@ -1,180 +1,180 @@
# --- ---
# Language: Cpp Language: Cpp
# # BasedOnStyle: LLVM # BasedOnStyle: LLVM
# AccessModifierOffset: -1 AccessModifierOffset: -1
# AlignAfterOpenBracket: Align AlignAfterOpenBracket: Align
# AlignArrayOfStructures: None AlignArrayOfStructures: None
# AlignConsecutiveMacros: None AlignConsecutiveMacros: None
# AlignConsecutiveAssignments: None AlignConsecutiveAssignments: None
# AlignConsecutiveBitFields: None AlignConsecutiveBitFields: None
# AlignConsecutiveDeclarations: None AlignConsecutiveDeclarations: None
# AlignEscapedNewlines: Right AlignEscapedNewlines: Right
# AlignOperands: Align AlignOperands: Align
# AlignTrailingComments: true AlignTrailingComments: true
# AllowAllArgumentsOnNextLine: true AllowAllArgumentsOnNextLine: true
# AllowAllConstructorInitializersOnNextLine: true AllowAllConstructorInitializersOnNextLine: true
# AllowAllParametersOfDeclarationOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true
# AllowShortEnumsOnASingleLine: true AllowShortEnumsOnASingleLine: true
# AllowShortBlocksOnASingleLine: Never AllowShortBlocksOnASingleLine: Never
# AllowShortCaseLabelsOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false
# AllowShortFunctionsOnASingleLine: All AllowShortFunctionsOnASingleLine: All
# AllowShortLambdasOnASingleLine: All AllowShortLambdasOnASingleLine: All
# AllowShortIfStatementsOnASingleLine: Never AllowShortIfStatementsOnASingleLine: Never
# AllowShortLoopsOnASingleLine: false AllowShortLoopsOnASingleLine: false
# AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterDefinitionReturnType: None
# AlwaysBreakAfterReturnType: None AlwaysBreakAfterReturnType: None
# AlwaysBreakBeforeMultilineStrings: false AlwaysBreakBeforeMultilineStrings: false
# AlwaysBreakTemplateDeclarations: MultiLine AlwaysBreakTemplateDeclarations: MultiLine
# AttributeMacros: AttributeMacros:
# - __capability - __capability
# BinPackArguments: true BinPackArguments: true
# BinPackParameters: true BinPackParameters: true
# BraceWrapping: BraceWrapping:
# AfterCaseLabel: false AfterCaseLabel: false
# AfterClass: false AfterClass: false
# AfterControlStatement: Never AfterControlStatement: Never
# AfterEnum: false AfterEnum: false
# AfterFunction: false AfterFunction: false
# AfterNamespace: false AfterNamespace: false
# AfterObjCDeclaration: false AfterObjCDeclaration: false
# AfterStruct: false AfterStruct: false
# AfterUnion: false AfterUnion: false
# AfterExternBlock: false AfterExternBlock: false
# BeforeCatch: false BeforeCatch: false
# BeforeElse: false BeforeElse: false
# BeforeLambdaBody: false BeforeLambdaBody: false
# BeforeWhile: false BeforeWhile: false
# IndentBraces: false IndentBraces: false
# SplitEmptyFunction: true SplitEmptyFunction: true
# SplitEmptyRecord: true SplitEmptyRecord: true
# SplitEmptyNamespace: true SplitEmptyNamespace: true
# BreakBeforeBinaryOperators: None BreakBeforeBinaryOperators: None
# BreakBeforeConceptDeclarations: true BreakBeforeConceptDeclarations: true
# BreakBeforeBraces: Attach BreakBeforeBraces: Attach
# BreakBeforeInheritanceComma: false BreakBeforeInheritanceComma: false
# BreakInheritanceList: BeforeColon BreakInheritanceList: BeforeColon
# BreakBeforeTernaryOperators: true BreakBeforeTernaryOperators: true
# BreakConstructorInitializersBeforeComma: false BreakConstructorInitializersBeforeComma: false
# BreakConstructorInitializers: BeforeColon BreakConstructorInitializers: BeforeColon
# BreakAfterJavaFieldAnnotations: false BreakAfterJavaFieldAnnotations: false
# BreakStringLiterals: true BreakStringLiterals: true
# ColumnLimit: 80 ColumnLimit: 80
# # CommentPragmas: '^ IWYU pragma:' # CommentPragmas: '^ IWYU pragma:'
# # CommentPragmas: '^[^ ]' # CommentPragmas: '^[^ ]'
# CommentPragmas: '^\\.+' CommentPragmas: '^\\.+'
# CompactNamespaces: false CompactNamespaces: false
# ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerAllOnOneLineOrOnePerLine: false
# ConstructorInitializerIndentWidth: 4 ConstructorInitializerIndentWidth: 4
# ContinuationIndentWidth: 4 ContinuationIndentWidth: 4
# Cpp11BracedListStyle: true Cpp11BracedListStyle: true
# DeriveLineEnding: true DeriveLineEnding: true
# DerivePointerAlignment: false DerivePointerAlignment: false
# DisableFormat: false DisableFormat: false
# EmptyLineAfterAccessModifier: Never EmptyLineAfterAccessModifier: Never
# EmptyLineBeforeAccessModifier: LogicalBlock EmptyLineBeforeAccessModifier: LogicalBlock
# ExperimentalAutoDetectBinPacking: false ExperimentalAutoDetectBinPacking: false
# FixNamespaceComments: true FixNamespaceComments: true
# ForEachMacros: ForEachMacros:
# - foreach - foreach
# - Q_FOREACH - Q_FOREACH
# - BOOST_FOREACH - BOOST_FOREACH
# IfMacros: IfMacros:
# - KJ_IF_MAYBE - KJ_IF_MAYBE
# IncludeBlocks: Preserve IncludeBlocks: Preserve
# IncludeCategories: IncludeCategories:
# - Regex: '^"(llvm|llvm-c|clang|clang-c)/' - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
# Priority: 2 Priority: 2
# SortPriority: 0 SortPriority: 0
# CaseSensitive: false CaseSensitive: false
# - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Regex: '^(<|"(gtest|gmock|isl|json)/)'
# Priority: 3 Priority: 3
# SortPriority: 0 SortPriority: 0
# CaseSensitive: false CaseSensitive: false
# - Regex: '.*' - Regex: '.*'
# Priority: 1 Priority: 1
# SortPriority: 0 SortPriority: 0
# CaseSensitive: false CaseSensitive: false
# IncludeIsMainRegex: '(Test)?$' IncludeIsMainRegex: '(Test)?$'
# IncludeIsMainSourceRegex: '' IncludeIsMainSourceRegex: ''
# IndentAccessModifiers: false IndentAccessModifiers: false
# IndentCaseLabels: false IndentCaseLabels: false
# IndentCaseBlocks: false IndentCaseBlocks: false
# IndentGotoLabels: true IndentGotoLabels: true
# IndentPPDirectives: None IndentPPDirectives: None
# IndentExternBlock: AfterExternBlock IndentExternBlock: AfterExternBlock
# IndentRequires: false IndentRequires: false
# IndentWidth: 2 IndentWidth: 2
# IndentWrappedFunctionNames: false IndentWrappedFunctionNames: false
# InsertTrailingCommas: None InsertTrailingCommas: None
# JavaScriptQuotes: Leave JavaScriptQuotes: Leave
# JavaScriptWrapImports: true JavaScriptWrapImports: true
# KeepEmptyLinesAtTheStartOfBlocks: true KeepEmptyLinesAtTheStartOfBlocks: true
# LambdaBodyIndentation: Signature LambdaBodyIndentation: Signature
# MacroBlockBegin: '' MacroBlockBegin: ''
# MacroBlockEnd: '' MacroBlockEnd: ''
# MaxEmptyLinesToKeep: 1 MaxEmptyLinesToKeep: 1
# NamespaceIndentation: None NamespaceIndentation: None
# ObjCBinPackProtocolList: Auto ObjCBinPackProtocolList: Auto
# ObjCBlockIndentWidth: 2 ObjCBlockIndentWidth: 2
# ObjCBreakBeforeNestedBlockParam: true ObjCBreakBeforeNestedBlockParam: true
# ObjCSpaceAfterProperty: false ObjCSpaceAfterProperty: false
# ObjCSpaceBeforeProtocolList: true ObjCSpaceBeforeProtocolList: true
# PenaltyBreakAssignment: 2 PenaltyBreakAssignment: 2
# PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakBeforeFirstCallParameter: 19
# PenaltyBreakComment: 300 PenaltyBreakComment: 300
# PenaltyBreakFirstLessLess: 120 PenaltyBreakFirstLessLess: 120
# PenaltyBreakString: 1000 PenaltyBreakString: 1000
# PenaltyBreakTemplateDeclaration: 10 PenaltyBreakTemplateDeclaration: 10
# PenaltyExcessCharacter: 1000000 PenaltyExcessCharacter: 1000000
# PenaltyReturnTypeOnItsOwnLine: 60 PenaltyReturnTypeOnItsOwnLine: 60
# PenaltyIndentedWhitespace: 0 PenaltyIndentedWhitespace: 0
# PointerAlignment: Left PointerAlignment: Left
# PPIndentWidth: -1 PPIndentWidth: -1
# ReferenceAlignment: Pointer ReferenceAlignment: Pointer
# ReflowComments: false ReflowComments: false
# ShortNamespaceLines: 1 ShortNamespaceLines: 1
# SortIncludes: CaseSensitive SortIncludes: CaseSensitive
# SortJavaStaticImport: Before SortJavaStaticImport: Before
# SortUsingDeclarations: true SortUsingDeclarations: true
# SpaceAfterCStyleCast: false SpaceAfterCStyleCast: false
# SpaceAfterLogicalNot: false SpaceAfterLogicalNot: false
# SpaceAfterTemplateKeyword: true SpaceAfterTemplateKeyword: true
# SpaceBeforeAssignmentOperators: true SpaceBeforeAssignmentOperators: true
# SpaceBeforeCaseColon: false SpaceBeforeCaseColon: false
# SpaceBeforeCpp11BracedList: false SpaceBeforeCpp11BracedList: false
# SpaceBeforeCtorInitializerColon: true SpaceBeforeCtorInitializerColon: true
# SpaceBeforeInheritanceColon: true SpaceBeforeInheritanceColon: true
# SpaceBeforeParens: ControlStatements SpaceBeforeParens: ControlStatements
# SpaceAroundPointerQualifiers: Default SpaceAroundPointerQualifiers: Default
# SpaceBeforeRangeBasedForLoopColon: true SpaceBeforeRangeBasedForLoopColon: true
# SpaceInEmptyBlock: false SpaceInEmptyBlock: false
# SpaceInEmptyParentheses: false SpaceInEmptyParentheses: false
# SpacesBeforeTrailingComments: 2 SpacesBeforeTrailingComments: 2
# SpacesInAngles: Never SpacesInAngles: Never
# SpacesInConditionalStatement: false SpacesInConditionalStatement: false
# SpacesInContainerLiterals: true SpacesInContainerLiterals: true
# SpacesInCStyleCastParentheses: false SpacesInCStyleCastParentheses: false
# SpacesInLineCommentPrefix: SpacesInLineCommentPrefix:
# Minimum: 1 Minimum: 1
# Maximum: -1 Maximum: -1
# SpacesInParentheses: false SpacesInParentheses: false
# SpacesInSquareBrackets: false SpacesInSquareBrackets: false
# SpaceBeforeSquareBrackets: false SpaceBeforeSquareBrackets: false
# BitFieldColonSpacing: Both BitFieldColonSpacing: Both
# Standard: Latest Standard: Latest
# StatementAttributeLikeMacros: StatementAttributeLikeMacros:
# - Q_EMIT - Q_EMIT
# StatementMacros: StatementMacros:
# - Q_UNUSED - Q_UNUSED
# - QT_REQUIRE_VERSION - QT_REQUIRE_VERSION
# TabWidth: 8 TabWidth: 8
# UseCRLF: false UseCRLF: false
# UseTab: Never UseTab: Never
# WhitespaceSensitiveMacros: WhitespaceSensitiveMacros:
# - STRINGIZE - STRINGIZE
# - PP_STRINGIZE - PP_STRINGIZE
# - BOOST_PP_STRINGIZE - BOOST_PP_STRINGIZE
# - NS_SWIFT_NAME - NS_SWIFT_NAME
# - CF_SWIFT_NAME - CF_SWIFT_NAME
# ... ...
#

View File

@@ -24,14 +24,14 @@ repos:
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$ exclude: (?!.*third_party)^.*$
# - repo: local - repo: local
# hooks: hooks:
# - id: clang-format-with-version-check - id: clang-format-with-version-check
# name: clang-format name: clang-format
# description: Format files with ClangFormat. description: Format files with ClangFormat.
# entry: bash .clang_format.hook -i entry: bash .clang_format.hook -i
# language: system language: system
# files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ files: \.(c|cc|cxx|cpp|cu|hxx|proto)$
- repo: local - repo: local
hooks: hooks:

View File

@@ -20,50 +20,10 @@
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/lite/option.h"
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
namespace fastdeploy { namespace fastdeploy {
struct LiteBackendOption {
// cpu num threads
int threads = 1;
// lite power mode
// 0: LITE_POWER_HIGH
// 1: LITE_POWER_LOW
// 2: LITE_POWER_FULL
// 3: LITE_POWER_NO_BIND
// 4: LITE_POWER_RAND_HIGH
// 5: LITE_POWER_RAND_LOW
int power_mode = 3;
// enable fp16
bool enable_fp16 = false;
// enable int8
bool enable_int8 = false;
// optimized model dir for CxxConfig
std::string optimized_model_dir = "";
// TODO(qiuyanjun): support more options for lite backend.
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
std::string nnadapter_subgraph_partition_config_path = "";
std::string nnadapter_subgraph_partition_config_buffer = "";
std::string nnadapter_context_properties = "";
std::string nnadapter_model_cache_dir = "";
std::string nnadapter_mixed_precision_quantization_config_path = "";
std::map<std::string, std::vector<std::vector<int64_t>>>
nnadapter_dynamic_shape_info = {{"", {{0}}}};
std::vector<std::string> nnadapter_device_names = {};
bool enable_timvx = false;
bool enable_ascend = false;
bool enable_kunlunxin = false;
int device_id = 0;
int kunlunxin_l3_workspace_size = 0xfffc00;
bool kunlunxin_locked = false;
bool kunlunxin_autotune = true;
std::string kunlunxin_autotune_file = "";
std::string kunlunxin_precision = "int16";
bool kunlunxin_adaptive_seqlen = false;
bool kunlunxin_enable_multi_stream = false;
};
// Convert data type from paddle lite to fastdeploy // Convert data type from paddle lite to fastdeploy
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);

View File

@@ -0,0 +1,63 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace fastdeploy {
struct LiteBackendOption {
// cpu num threads
int threads = 1;
// lite power mode
// 0: LITE_POWER_HIGH
// 1: LITE_POWER_LOW
// 2: LITE_POWER_FULL
// 3: LITE_POWER_NO_BIND
// 4: LITE_POWER_RAND_HIGH
// 5: LITE_POWER_RAND_LOW
int power_mode = 3;
// enable fp16
bool enable_fp16 = false;
// enable int8
bool enable_int8 = false;
// optimized model dir for CxxConfig
std::string optimized_model_dir = "";
// TODO(qiuyanjun): support more options for lite backend.
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
std::string nnadapter_subgraph_partition_config_path = "";
std::string nnadapter_subgraph_partition_config_buffer = "";
std::string nnadapter_context_properties = "";
std::string nnadapter_model_cache_dir = "";
std::string nnadapter_mixed_precision_quantization_config_path = "";
std::map<std::string, std::vector<std::vector<int64_t>>>
nnadapter_dynamic_shape_info = {{"", {{0}}}};
std::vector<std::string> nnadapter_device_names = {};
bool enable_timvx = false;
bool enable_ascend = false;
bool enable_kunlunxin = false;
int device_id = 0;
int kunlunxin_l3_workspace_size = 0xfffc00;
bool kunlunxin_locked = false;
bool kunlunxin_autotune = true;
std::string kunlunxin_autotune_file = "";
std::string kunlunxin_precision = "int16";
bool kunlunxin_adaptive_seqlen = false;
bool kunlunxin_enable_multi_stream = false;
};
} // namespace fastdeploy

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <map>
#include <set>
namespace fastdeploy {
struct OpenVINOBackendOption {
std::string device = "CPU";
int cpu_thread_num = -1;
int num_streams = 0;
std::map<std::string, std::vector<int64_t>> shape_infos;
std::set<std::string> cpu_operators{"MulticlassNms"};
};
} // namespace fastdeploy

View File

@@ -21,18 +21,11 @@
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/unique_ptr.h"
#include "fastdeploy/backends/openvino/option.h"
#include "openvino/openvino.hpp" #include "openvino/openvino.hpp"
namespace fastdeploy { namespace fastdeploy {
struct OpenVINOBackendOption {
std::string device = "CPU";
int cpu_thread_num = -1;
int num_streams = 0;
std::map<std::string, std::vector<int64_t>> shape_infos;
std::set<std::string> cpu_operators{"MulticlassNms"};
};
class OpenVINOBackend : public BaseBackend { class OpenVINOBackend : public BaseBackend {
public: public:
static ov::Core core_; static ov::Core core_;

View File

@@ -0,0 +1,44 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace fastdeploy {
struct OrtBackendOption {
// -1 means default
// 0: ORT_DISABLE_ALL
// 1: ORT_ENABLE_BASIC
// 2: ORT_ENABLE_EXTENDED
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
int graph_optimization_level = -1;
int intra_op_num_threads = -1;
int inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL
// 1: ORT_PARALLEL
int execution_mode = -1;
bool use_gpu = false;
int gpu_id = 0;
void* external_stream_ = nullptr;
// inside parameter, maybe remove next version
bool remove_multiclass_nms_ = false;
std::map<std::string, std::string> custom_op_info_;
};
} // namespace fastdeploy

View File

@@ -21,6 +21,7 @@
#include <map> #include <map>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/ort/option.h"
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy { namespace fastdeploy {
@@ -31,27 +32,6 @@ struct OrtValueInfo {
ONNXTensorElementDataType dtype; ONNXTensorElementDataType dtype;
}; };
struct OrtBackendOption {
// -1 means default
// 0: ORT_DISABLE_ALL
// 1: ORT_ENABLE_BASIC
// 2: ORT_ENABLE_EXTENDED
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
int graph_optimization_level = -1;
int intra_op_num_threads = -1;
int inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL
// 1: ORT_PARALLEL
int execution_mode = -1;
bool use_gpu = false;
int gpu_id = 0;
void* external_stream_ = nullptr;
// inside parameter, maybe remove next version
bool remove_multiclass_nms_ = false;
std::map<std::string, std::string> custom_op_info_;
};
class OrtBackend : public BaseBackend { class OrtBackend : public BaseBackend {
public: public:
OrtBackend() {} OrtBackend() {}

View File

@@ -0,0 +1,79 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/tensorrt/option.h"
namespace fastdeploy {
struct IpuOption {
int ipu_device_num;
int ipu_micro_batch_size;
bool ipu_enable_pipelining;
int ipu_batches_per_step;
bool ipu_enable_fp16;
int ipu_replica_num;
float ipu_available_memory_proportion;
bool ipu_enable_half_partial;
};
struct PaddleBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false;
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
bool enable_mkldnn = true;
bool enable_log_info = false;
bool enable_trt = false;
TrtBackendOption trt_option;
bool collect_shape = false;
std::vector<std::string> trt_disabled_ops_{};
#ifdef WITH_IPU
bool use_ipu = true;
IpuOption ipu_option;
#else
bool use_ipu = false;
#endif
int mkldnn_cache_size = 1;
int cpu_thread_num = 8;
// initialize memory size(MB) for GPU
int gpu_mem_init_size = 100;
// gpu device id
int gpu_id = 0;
bool enable_pinned_memory = false;
void* external_stream_ = nullptr;
std::vector<std::string> delete_pass_names = {};
};
} // namespace fastdeploy

View File

@@ -13,9 +13,11 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/paddle/paddle_backend.h" #include "fastdeploy/backends/paddle/paddle_backend.h"
#include "fastdeploy/utils/path.h"
#include <sstream> #include <sstream>
#include "fastdeploy/utils/path.h"
namespace fastdeploy { namespace fastdeploy {
void PaddleBackend::BuildOption(const PaddleBackendOption& option) { void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
@@ -26,7 +28,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
config_.SetExecStream(option_.external_stream_); config_.SetExecStream(option_.external_stream_);
} }
if (option.enable_trt) { if (option.enable_trt) {
#ifdef ENABLE_TRT_BACKEND
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_); config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
auto precision = paddle_infer::PrecisionType::kFloat32; auto precision = paddle_infer::PrecisionType::kFloat32;
if (option.trt_option.enable_fp16) { if (option.trt_option.enable_fp16) {
@@ -46,11 +47,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
option.trt_option.max_batch_size, 3, option.trt_option.max_batch_size, 3,
precision, use_static); precision, use_static);
SetTRTDynamicShapeToConfig(option); SetTRTDynamicShapeToConfig(option);
#else
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so "
"will fallback to GPU with Paddle Inference Backend."
<< std::endl;
#endif
} }
} else if (option.use_ipu) { } else if (option.use_ipu) {
#ifdef WITH_IPU #ifdef WITH_IPU
@@ -100,7 +96,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
return false; return false;
} }
// The input/output information get from predictor is not right, use PaddleReader instead now // The input/output information get from predictor is not right, use
// PaddleReader instead now
std::string contents; std::string contents;
if (option.model_from_memory_) { if (option.model_from_memory_) {
@@ -116,14 +113,14 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
config_.EnableMemoryOptim(); config_.EnableMemoryOptim();
BuildOption(option); BuildOption(option);
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
// int8 mode
if (reader.is_quantize_model) { if (reader.is_quantize_model) {
if (option.use_gpu) { if (option.use_gpu) {
FDWARNING << "The loaded model is a quantized model, while inference on " FDWARNING << "The loaded model is a quantized model, while inference on "
"GPU, please use TensorRT backend to get better performance." "GPU, please use TensorRT backend to get better performance."
<< std::endl; << std::endl;
if (option.enable_trt) { if (option.enable_trt) {
#ifdef ENABLE_TRT_BACKEND
bool use_static = false; bool use_static = false;
if (option.trt_option.serialize_file != "") { if (option.trt_option.serialize_file != "") {
FDWARNING FDWARNING
@@ -139,7 +136,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
paddle_infer::PrecisionType::kInt8, paddle_infer::PrecisionType::kInt8,
use_static, false); use_static, false);
SetTRTDynamicShapeToConfig(option); SetTRTDynamicShapeToConfig(option);
#endif
} }
} }
if (option.enable_mkldnn) { if (option.enable_mkldnn) {
@@ -163,14 +159,13 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
outputs_desc_.resize(reader.num_outputs); outputs_desc_.resize(reader.num_outputs);
for (int i = 0; i < reader.num_outputs; ++i) { for (int i = 0; i < reader.num_outputs; ++i) {
std::string name(reader.outputs[i].name); std::string name(reader.outputs[i].name);
std::vector<int64_t> shape(reader.outputs[i].shape, std::vector<int64_t> shape(
reader.outputs[i].shape + reader.outputs[i].shape,
reader.outputs[i].rank); reader.outputs[i].shape + reader.outputs[i].rank);
outputs_desc_[i].name = name; outputs_desc_[i].name = name;
outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].shape.assign(shape.begin(), shape.end());
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype); outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
} }
#ifdef ENABLE_TRT_BACKEND
if (option.collect_shape) { if (option.collect_shape) {
// Set the shape info file. // Set the shape info file.
std::string curr_model_dir = "./"; std::string curr_model_dir = "./";
@@ -205,7 +200,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
<< " to set TensorRT dynamic shape." << std::endl; << " to set TensorRT dynamic shape." << std::endl;
config_.EnableTunedTensorRtDynamicShape(shape_range_info, false); config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
} }
#endif
predictor_ = paddle_infer::CreatePredictor(config_); predictor_ = paddle_infer::CreatePredictor(config_);
initialized_ = true; initialized_ = true;
return true; return true;
@@ -284,7 +278,6 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
return new_backend; return new_backend;
} }
#ifdef ENABLE_TRT_BACKEND
void PaddleBackend::SetTRTDynamicShapeToConfig( void PaddleBackend::SetTRTDynamicShapeToConfig(
const PaddleBackendOption& option) { const PaddleBackendOption& option) {
std::map<std::string, std::vector<int>> max_shape; std::map<std::string, std::vector<int>> max_shape;
@@ -370,7 +363,8 @@ void PaddleBackend::CollectShapeRun(
break; break;
} }
default: { default: {
FDASSERT(false, "Input data Paddle backend only supports " FDASSERT(false,
"Input data Paddle backend only supports "
"FP32/INT32/INT64 currently."); "FP32/INT32/INT64 currently.");
break; break;
} }
@@ -378,6 +372,5 @@ void PaddleBackend::CollectShapeRun(
} }
predictor->Run(); predictor->Run();
} }
#endif
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -20,74 +20,15 @@
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/paddle/option.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"
#endif #endif
#include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/unique_ptr.h"
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
#ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h"
#endif
namespace fastdeploy { namespace fastdeploy {
struct IpuOption {
int ipu_device_num;
int ipu_micro_batch_size;
bool ipu_enable_pipelining;
int ipu_batches_per_step;
bool ipu_enable_fp16;
int ipu_replica_num;
float ipu_available_memory_proportion;
bool ipu_enable_half_partial;
};
struct PaddleBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
std::string model_buffer_ = "";
std::string params_buffer_ = "";
size_t model_buffer_size_ = 0;
size_t params_buffer_size_ = 0;
bool model_from_memory_ = false;
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
bool enable_mkldnn = true;
bool enable_log_info = false;
bool enable_trt = false;
#ifdef ENABLE_TRT_BACKEND
TrtBackendOption trt_option;
bool collect_shape = false;
std::vector<std::string> trt_disabled_ops_{};
#endif
#ifdef WITH_IPU
bool use_ipu = true;
IpuOption ipu_option;
#else
bool use_ipu = false;
#endif
int mkldnn_cache_size = 1;
int cpu_thread_num = 8;
// initialize memory size(MB) for GPU
int gpu_mem_init_size = 100;
// gpu device id
int gpu_id = 0;
bool enable_pinned_memory = false;
void* external_stream_ = nullptr;
std::vector<std::string> delete_pass_names = {};
};
// convert FD device to paddle place type // convert FD device to paddle place type
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device); paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
@@ -132,7 +73,6 @@ class PaddleBackend : public BaseBackend {
std::vector<TensorInfo> GetOutputInfos() override; std::vector<TensorInfo> GetOutputInfos() override;
private: private:
#ifdef ENABLE_TRT_BACKEND
void void
CollectShapeRun(paddle_infer::Predictor* predictor, CollectShapeRun(paddle_infer::Predictor* predictor,
const std::map<std::string, std::vector<int>>& shape) const; const std::map<std::string, std::vector<int>>& shape) const;
@@ -142,7 +82,6 @@ class PaddleBackend : public BaseBackend {
std::map<std::string, std::vector<int>>* min_shape, std::map<std::string, std::vector<int>>* min_shape,
std::map<std::string, std::vector<int>>* opt_shape) const; std::map<std::string, std::vector<int>>* opt_shape) const;
void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option); void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option);
#endif
PaddleBackendOption option_; PaddleBackendOption option_;
paddle_infer::Config config_; paddle_infer::Config config_;
std::shared_ptr<paddle_infer::Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;

View File

@@ -0,0 +1,47 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
namespace fastdeploy {
struct PorosBackendOption {
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
int gpu_id = 0;
bool long_to_int = true;
// There is calculation precision in tf32 mode on A10, it can bring some
// performance improvement, but there may be diff
bool use_nvidia_tf32 = false;
// Threshold for the number of non-const ops
int32_t unconst_ops_thres = -1;
std::string poros_file = "";
std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
// TRT options
bool enable_fp16 = false;
bool enable_int8 = false;
bool is_dynamic = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
};
} // namespace fastdeploy

View File

@@ -20,35 +20,12 @@
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/poros/option.h"
#include "fastdeploy/backends/poros/common/compile.h" #include "fastdeploy/backends/poros/common/compile.h"
#include "fastdeploy/backends/poros/common/poros_module.h" #include "fastdeploy/backends/poros/common/poros_module.h"
namespace fastdeploy { namespace fastdeploy {
struct PorosBackendOption {
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
int gpu_id = 0;
bool long_to_int = true;
// There is calculation precision in tf32 mode on A10, it can bring some
// performance improvement, but there may be diff
bool use_nvidia_tf32 = false;
// Threshold for the number of non-const ops
int32_t unconst_ops_thres = -1;
std::string poros_file = "";
std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
// TRT options
bool enable_fp16 = false;
bool enable_int8 = false;
bool is_dynamic = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
};
// Convert data type from fastdeploy to poros // Convert data type from fastdeploy to poros
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype); at::ScalarType GetPorosDtype(const FDDataType& fd_dtype);

View File

@@ -0,0 +1,41 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace fastdeploy {
struct TrtBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
// format of input model
ModelFormat model_format = ModelFormat::AUTOREC;
int gpu_id = 0;
bool enable_fp16 = false;
bool enable_int8 = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
std::map<std::string, std::vector<int32_t>> max_shape;
std::map<std::string, std::vector<int32_t>> min_shape;
std::map<std::string, std::vector<int32_t>> opt_shape;
std::string serialize_file = "";
bool enable_pinned_memory = false;
void* external_stream_ = nullptr;
};
} // namespace fastdeploy

View File

@@ -25,6 +25,7 @@
#include "NvOnnxParser.h" #include "NvOnnxParser.h"
#include "fastdeploy/backends/backend.h" #include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/utils.h" #include "fastdeploy/backends/tensorrt/utils.h"
#include "fastdeploy/backends/tensorrt/option.h"
#include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/unique_ptr.h"
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
@@ -62,25 +63,6 @@ struct TrtValueInfo {
FDDataType original_dtype; // dtype of original ONNX/Paddle model FDDataType original_dtype; // dtype of original ONNX/Paddle model
}; };
struct TrtBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
// format of input model
ModelFormat model_format = ModelFormat::AUTOREC;
int gpu_id = 0;
bool enable_fp16 = false;
bool enable_int8 = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
std::map<std::string, std::vector<int32_t>> max_shape;
std::map<std::string, std::vector<int32_t>> min_shape;
std::map<std::string, std::vector<int32_t>> opt_shape;
std::string serialize_file = "";
bool enable_pinned_memory = false;
void* external_stream_ = nullptr;
};
std::vector<int> toVec(const nvinfer1::Dims& dim); std::vector<int> toVec(const nvinfer1::Dims& dim);
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype); size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
FDDataType GetFDDataType(const nvinfer1::DataType& dtype); FDDataType GetFDDataType(const nvinfer1::DataType& dtype);