mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-20 15:25:22 +08:00
[Other] Seperate option from backends (#1048)
* Seperate option from backends * Seperate option from backends * Seperate option from backends * Seperate option from backends
This commit is contained in:
360
.clang-format
360
.clang-format
@@ -1,180 +1,180 @@
|
|||||||
# ---
|
---
|
||||||
# Language: Cpp
|
Language: Cpp
|
||||||
# # BasedOnStyle: LLVM
|
# BasedOnStyle: LLVM
|
||||||
# AccessModifierOffset: -1
|
AccessModifierOffset: -1
|
||||||
# AlignAfterOpenBracket: Align
|
AlignAfterOpenBracket: Align
|
||||||
# AlignArrayOfStructures: None
|
AlignArrayOfStructures: None
|
||||||
# AlignConsecutiveMacros: None
|
AlignConsecutiveMacros: None
|
||||||
# AlignConsecutiveAssignments: None
|
AlignConsecutiveAssignments: None
|
||||||
# AlignConsecutiveBitFields: None
|
AlignConsecutiveBitFields: None
|
||||||
# AlignConsecutiveDeclarations: None
|
AlignConsecutiveDeclarations: None
|
||||||
# AlignEscapedNewlines: Right
|
AlignEscapedNewlines: Right
|
||||||
# AlignOperands: Align
|
AlignOperands: Align
|
||||||
# AlignTrailingComments: true
|
AlignTrailingComments: true
|
||||||
# AllowAllArgumentsOnNextLine: true
|
AllowAllArgumentsOnNextLine: true
|
||||||
# AllowAllConstructorInitializersOnNextLine: true
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
# AllowAllParametersOfDeclarationOnNextLine: true
|
AllowAllParametersOfDeclarationOnNextLine: true
|
||||||
# AllowShortEnumsOnASingleLine: true
|
AllowShortEnumsOnASingleLine: true
|
||||||
# AllowShortBlocksOnASingleLine: Never
|
AllowShortBlocksOnASingleLine: Never
|
||||||
# AllowShortCaseLabelsOnASingleLine: false
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
# AllowShortFunctionsOnASingleLine: All
|
AllowShortFunctionsOnASingleLine: All
|
||||||
# AllowShortLambdasOnASingleLine: All
|
AllowShortLambdasOnASingleLine: All
|
||||||
# AllowShortIfStatementsOnASingleLine: Never
|
AllowShortIfStatementsOnASingleLine: Never
|
||||||
# AllowShortLoopsOnASingleLine: false
|
AllowShortLoopsOnASingleLine: false
|
||||||
# AlwaysBreakAfterDefinitionReturnType: None
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
# AlwaysBreakAfterReturnType: None
|
AlwaysBreakAfterReturnType: None
|
||||||
# AlwaysBreakBeforeMultilineStrings: false
|
AlwaysBreakBeforeMultilineStrings: false
|
||||||
# AlwaysBreakTemplateDeclarations: MultiLine
|
AlwaysBreakTemplateDeclarations: MultiLine
|
||||||
# AttributeMacros:
|
AttributeMacros:
|
||||||
# - __capability
|
- __capability
|
||||||
# BinPackArguments: true
|
BinPackArguments: true
|
||||||
# BinPackParameters: true
|
BinPackParameters: true
|
||||||
# BraceWrapping:
|
BraceWrapping:
|
||||||
# AfterCaseLabel: false
|
AfterCaseLabel: false
|
||||||
# AfterClass: false
|
AfterClass: false
|
||||||
# AfterControlStatement: Never
|
AfterControlStatement: Never
|
||||||
# AfterEnum: false
|
AfterEnum: false
|
||||||
# AfterFunction: false
|
AfterFunction: false
|
||||||
# AfterNamespace: false
|
AfterNamespace: false
|
||||||
# AfterObjCDeclaration: false
|
AfterObjCDeclaration: false
|
||||||
# AfterStruct: false
|
AfterStruct: false
|
||||||
# AfterUnion: false
|
AfterUnion: false
|
||||||
# AfterExternBlock: false
|
AfterExternBlock: false
|
||||||
# BeforeCatch: false
|
BeforeCatch: false
|
||||||
# BeforeElse: false
|
BeforeElse: false
|
||||||
# BeforeLambdaBody: false
|
BeforeLambdaBody: false
|
||||||
# BeforeWhile: false
|
BeforeWhile: false
|
||||||
# IndentBraces: false
|
IndentBraces: false
|
||||||
# SplitEmptyFunction: true
|
SplitEmptyFunction: true
|
||||||
# SplitEmptyRecord: true
|
SplitEmptyRecord: true
|
||||||
# SplitEmptyNamespace: true
|
SplitEmptyNamespace: true
|
||||||
# BreakBeforeBinaryOperators: None
|
BreakBeforeBinaryOperators: None
|
||||||
# BreakBeforeConceptDeclarations: true
|
BreakBeforeConceptDeclarations: true
|
||||||
# BreakBeforeBraces: Attach
|
BreakBeforeBraces: Attach
|
||||||
# BreakBeforeInheritanceComma: false
|
BreakBeforeInheritanceComma: false
|
||||||
# BreakInheritanceList: BeforeColon
|
BreakInheritanceList: BeforeColon
|
||||||
# BreakBeforeTernaryOperators: true
|
BreakBeforeTernaryOperators: true
|
||||||
# BreakConstructorInitializersBeforeComma: false
|
BreakConstructorInitializersBeforeComma: false
|
||||||
# BreakConstructorInitializers: BeforeColon
|
BreakConstructorInitializers: BeforeColon
|
||||||
# BreakAfterJavaFieldAnnotations: false
|
BreakAfterJavaFieldAnnotations: false
|
||||||
# BreakStringLiterals: true
|
BreakStringLiterals: true
|
||||||
# ColumnLimit: 80
|
ColumnLimit: 80
|
||||||
# # CommentPragmas: '^ IWYU pragma:'
|
# CommentPragmas: '^ IWYU pragma:'
|
||||||
# # CommentPragmas: '^[^ ]'
|
# CommentPragmas: '^[^ ]'
|
||||||
# CommentPragmas: '^\\.+'
|
CommentPragmas: '^\\.+'
|
||||||
# CompactNamespaces: false
|
CompactNamespaces: false
|
||||||
# ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
ConstructorInitializerAllOnOneLineOrOnePerLine: false
|
||||||
# ConstructorInitializerIndentWidth: 4
|
ConstructorInitializerIndentWidth: 4
|
||||||
# ContinuationIndentWidth: 4
|
ContinuationIndentWidth: 4
|
||||||
# Cpp11BracedListStyle: true
|
Cpp11BracedListStyle: true
|
||||||
# DeriveLineEnding: true
|
DeriveLineEnding: true
|
||||||
# DerivePointerAlignment: false
|
DerivePointerAlignment: false
|
||||||
# DisableFormat: false
|
DisableFormat: false
|
||||||
# EmptyLineAfterAccessModifier: Never
|
EmptyLineAfterAccessModifier: Never
|
||||||
# EmptyLineBeforeAccessModifier: LogicalBlock
|
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||||
# ExperimentalAutoDetectBinPacking: false
|
ExperimentalAutoDetectBinPacking: false
|
||||||
# FixNamespaceComments: true
|
FixNamespaceComments: true
|
||||||
# ForEachMacros:
|
ForEachMacros:
|
||||||
# - foreach
|
- foreach
|
||||||
# - Q_FOREACH
|
- Q_FOREACH
|
||||||
# - BOOST_FOREACH
|
- BOOST_FOREACH
|
||||||
# IfMacros:
|
IfMacros:
|
||||||
# - KJ_IF_MAYBE
|
- KJ_IF_MAYBE
|
||||||
# IncludeBlocks: Preserve
|
IncludeBlocks: Preserve
|
||||||
# IncludeCategories:
|
IncludeCategories:
|
||||||
# - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
|
||||||
# Priority: 2
|
Priority: 2
|
||||||
# SortPriority: 0
|
SortPriority: 0
|
||||||
# CaseSensitive: false
|
CaseSensitive: false
|
||||||
# - Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
|
||||||
# Priority: 3
|
Priority: 3
|
||||||
# SortPriority: 0
|
SortPriority: 0
|
||||||
# CaseSensitive: false
|
CaseSensitive: false
|
||||||
# - Regex: '.*'
|
- Regex: '.*'
|
||||||
# Priority: 1
|
Priority: 1
|
||||||
# SortPriority: 0
|
SortPriority: 0
|
||||||
# CaseSensitive: false
|
CaseSensitive: false
|
||||||
# IncludeIsMainRegex: '(Test)?$'
|
IncludeIsMainRegex: '(Test)?$'
|
||||||
# IncludeIsMainSourceRegex: ''
|
IncludeIsMainSourceRegex: ''
|
||||||
# IndentAccessModifiers: false
|
IndentAccessModifiers: false
|
||||||
# IndentCaseLabels: false
|
IndentCaseLabels: false
|
||||||
# IndentCaseBlocks: false
|
IndentCaseBlocks: false
|
||||||
# IndentGotoLabels: true
|
IndentGotoLabels: true
|
||||||
# IndentPPDirectives: None
|
IndentPPDirectives: None
|
||||||
# IndentExternBlock: AfterExternBlock
|
IndentExternBlock: AfterExternBlock
|
||||||
# IndentRequires: false
|
IndentRequires: false
|
||||||
# IndentWidth: 2
|
IndentWidth: 2
|
||||||
# IndentWrappedFunctionNames: false
|
IndentWrappedFunctionNames: false
|
||||||
# InsertTrailingCommas: None
|
InsertTrailingCommas: None
|
||||||
# JavaScriptQuotes: Leave
|
JavaScriptQuotes: Leave
|
||||||
# JavaScriptWrapImports: true
|
JavaScriptWrapImports: true
|
||||||
# KeepEmptyLinesAtTheStartOfBlocks: true
|
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||||
# LambdaBodyIndentation: Signature
|
LambdaBodyIndentation: Signature
|
||||||
# MacroBlockBegin: ''
|
MacroBlockBegin: ''
|
||||||
# MacroBlockEnd: ''
|
MacroBlockEnd: ''
|
||||||
# MaxEmptyLinesToKeep: 1
|
MaxEmptyLinesToKeep: 1
|
||||||
# NamespaceIndentation: None
|
NamespaceIndentation: None
|
||||||
# ObjCBinPackProtocolList: Auto
|
ObjCBinPackProtocolList: Auto
|
||||||
# ObjCBlockIndentWidth: 2
|
ObjCBlockIndentWidth: 2
|
||||||
# ObjCBreakBeforeNestedBlockParam: true
|
ObjCBreakBeforeNestedBlockParam: true
|
||||||
# ObjCSpaceAfterProperty: false
|
ObjCSpaceAfterProperty: false
|
||||||
# ObjCSpaceBeforeProtocolList: true
|
ObjCSpaceBeforeProtocolList: true
|
||||||
# PenaltyBreakAssignment: 2
|
PenaltyBreakAssignment: 2
|
||||||
# PenaltyBreakBeforeFirstCallParameter: 19
|
PenaltyBreakBeforeFirstCallParameter: 19
|
||||||
# PenaltyBreakComment: 300
|
PenaltyBreakComment: 300
|
||||||
# PenaltyBreakFirstLessLess: 120
|
PenaltyBreakFirstLessLess: 120
|
||||||
# PenaltyBreakString: 1000
|
PenaltyBreakString: 1000
|
||||||
# PenaltyBreakTemplateDeclaration: 10
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
# PenaltyExcessCharacter: 1000000
|
PenaltyExcessCharacter: 1000000
|
||||||
# PenaltyReturnTypeOnItsOwnLine: 60
|
PenaltyReturnTypeOnItsOwnLine: 60
|
||||||
# PenaltyIndentedWhitespace: 0
|
PenaltyIndentedWhitespace: 0
|
||||||
# PointerAlignment: Left
|
PointerAlignment: Left
|
||||||
# PPIndentWidth: -1
|
PPIndentWidth: -1
|
||||||
# ReferenceAlignment: Pointer
|
ReferenceAlignment: Pointer
|
||||||
# ReflowComments: false
|
ReflowComments: false
|
||||||
# ShortNamespaceLines: 1
|
ShortNamespaceLines: 1
|
||||||
# SortIncludes: CaseSensitive
|
SortIncludes: CaseSensitive
|
||||||
# SortJavaStaticImport: Before
|
SortJavaStaticImport: Before
|
||||||
# SortUsingDeclarations: true
|
SortUsingDeclarations: true
|
||||||
# SpaceAfterCStyleCast: false
|
SpaceAfterCStyleCast: false
|
||||||
# SpaceAfterLogicalNot: false
|
SpaceAfterLogicalNot: false
|
||||||
# SpaceAfterTemplateKeyword: true
|
SpaceAfterTemplateKeyword: true
|
||||||
# SpaceBeforeAssignmentOperators: true
|
SpaceBeforeAssignmentOperators: true
|
||||||
# SpaceBeforeCaseColon: false
|
SpaceBeforeCaseColon: false
|
||||||
# SpaceBeforeCpp11BracedList: false
|
SpaceBeforeCpp11BracedList: false
|
||||||
# SpaceBeforeCtorInitializerColon: true
|
SpaceBeforeCtorInitializerColon: true
|
||||||
# SpaceBeforeInheritanceColon: true
|
SpaceBeforeInheritanceColon: true
|
||||||
# SpaceBeforeParens: ControlStatements
|
SpaceBeforeParens: ControlStatements
|
||||||
# SpaceAroundPointerQualifiers: Default
|
SpaceAroundPointerQualifiers: Default
|
||||||
# SpaceBeforeRangeBasedForLoopColon: true
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
# SpaceInEmptyBlock: false
|
SpaceInEmptyBlock: false
|
||||||
# SpaceInEmptyParentheses: false
|
SpaceInEmptyParentheses: false
|
||||||
# SpacesBeforeTrailingComments: 2
|
SpacesBeforeTrailingComments: 2
|
||||||
# SpacesInAngles: Never
|
SpacesInAngles: Never
|
||||||
# SpacesInConditionalStatement: false
|
SpacesInConditionalStatement: false
|
||||||
# SpacesInContainerLiterals: true
|
SpacesInContainerLiterals: true
|
||||||
# SpacesInCStyleCastParentheses: false
|
SpacesInCStyleCastParentheses: false
|
||||||
# SpacesInLineCommentPrefix:
|
SpacesInLineCommentPrefix:
|
||||||
# Minimum: 1
|
Minimum: 1
|
||||||
# Maximum: -1
|
Maximum: -1
|
||||||
# SpacesInParentheses: false
|
SpacesInParentheses: false
|
||||||
# SpacesInSquareBrackets: false
|
SpacesInSquareBrackets: false
|
||||||
# SpaceBeforeSquareBrackets: false
|
SpaceBeforeSquareBrackets: false
|
||||||
# BitFieldColonSpacing: Both
|
BitFieldColonSpacing: Both
|
||||||
# Standard: Latest
|
Standard: Latest
|
||||||
# StatementAttributeLikeMacros:
|
StatementAttributeLikeMacros:
|
||||||
# - Q_EMIT
|
- Q_EMIT
|
||||||
# StatementMacros:
|
StatementMacros:
|
||||||
# - Q_UNUSED
|
- Q_UNUSED
|
||||||
# - QT_REQUIRE_VERSION
|
- QT_REQUIRE_VERSION
|
||||||
# TabWidth: 8
|
TabWidth: 8
|
||||||
# UseCRLF: false
|
UseCRLF: false
|
||||||
# UseTab: Never
|
UseTab: Never
|
||||||
# WhitespaceSensitiveMacros:
|
WhitespaceSensitiveMacros:
|
||||||
# - STRINGIZE
|
- STRINGIZE
|
||||||
# - PP_STRINGIZE
|
- PP_STRINGIZE
|
||||||
# - BOOST_PP_STRINGIZE
|
- BOOST_PP_STRINGIZE
|
||||||
# - NS_SWIFT_NAME
|
- NS_SWIFT_NAME
|
||||||
# - CF_SWIFT_NAME
|
- CF_SWIFT_NAME
|
||||||
# ...
|
...
|
||||||
#
|
|
||||||
|
@@ -24,14 +24,14 @@ repos:
|
|||||||
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
|
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
|
||||||
exclude: (?!.*third_party)^.*$
|
exclude: (?!.*third_party)^.*$
|
||||||
|
|
||||||
# - repo: local
|
- repo: local
|
||||||
# hooks:
|
hooks:
|
||||||
# - id: clang-format-with-version-check
|
- id: clang-format-with-version-check
|
||||||
# name: clang-format
|
name: clang-format
|
||||||
# description: Format files with ClangFormat.
|
description: Format files with ClangFormat.
|
||||||
# entry: bash .clang_format.hook -i
|
entry: bash .clang_format.hook -i
|
||||||
# language: system
|
language: system
|
||||||
# files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
|
files: \.(c|cc|cxx|cpp|cu|hxx|proto)$
|
||||||
|
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
|
@@ -20,50 +20,10 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/backends/lite/option.h"
|
||||||
#include "paddle_api.h" // NOLINT
|
#include "paddle_api.h" // NOLINT
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct LiteBackendOption {
|
|
||||||
// cpu num threads
|
|
||||||
int threads = 1;
|
|
||||||
// lite power mode
|
|
||||||
// 0: LITE_POWER_HIGH
|
|
||||||
// 1: LITE_POWER_LOW
|
|
||||||
// 2: LITE_POWER_FULL
|
|
||||||
// 3: LITE_POWER_NO_BIND
|
|
||||||
// 4: LITE_POWER_RAND_HIGH
|
|
||||||
// 5: LITE_POWER_RAND_LOW
|
|
||||||
int power_mode = 3;
|
|
||||||
// enable fp16
|
|
||||||
bool enable_fp16 = false;
|
|
||||||
// enable int8
|
|
||||||
bool enable_int8 = false;
|
|
||||||
// optimized model dir for CxxConfig
|
|
||||||
std::string optimized_model_dir = "";
|
|
||||||
// TODO(qiuyanjun): support more options for lite backend.
|
|
||||||
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
|
|
||||||
std::string nnadapter_subgraph_partition_config_path = "";
|
|
||||||
std::string nnadapter_subgraph_partition_config_buffer = "";
|
|
||||||
std::string nnadapter_context_properties = "";
|
|
||||||
std::string nnadapter_model_cache_dir = "";
|
|
||||||
std::string nnadapter_mixed_precision_quantization_config_path = "";
|
|
||||||
std::map<std::string, std::vector<std::vector<int64_t>>>
|
|
||||||
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
|
||||||
std::vector<std::string> nnadapter_device_names = {};
|
|
||||||
bool enable_timvx = false;
|
|
||||||
bool enable_ascend = false;
|
|
||||||
bool enable_kunlunxin = false;
|
|
||||||
int device_id = 0;
|
|
||||||
int kunlunxin_l3_workspace_size = 0xfffc00;
|
|
||||||
bool kunlunxin_locked = false;
|
|
||||||
bool kunlunxin_autotune = true;
|
|
||||||
std::string kunlunxin_autotune_file = "";
|
|
||||||
std::string kunlunxin_precision = "int16";
|
|
||||||
bool kunlunxin_adaptive_seqlen = false;
|
|
||||||
bool kunlunxin_enable_multi_stream = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert data type from paddle lite to fastdeploy
|
// Convert data type from paddle lite to fastdeploy
|
||||||
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
|
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
|
||||||
|
|
||||||
|
63
fastdeploy/backends/lite/option.h
Executable file
63
fastdeploy/backends/lite/option.h
Executable file
@@ -0,0 +1,63 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct LiteBackendOption {
|
||||||
|
// cpu num threads
|
||||||
|
int threads = 1;
|
||||||
|
// lite power mode
|
||||||
|
// 0: LITE_POWER_HIGH
|
||||||
|
// 1: LITE_POWER_LOW
|
||||||
|
// 2: LITE_POWER_FULL
|
||||||
|
// 3: LITE_POWER_NO_BIND
|
||||||
|
// 4: LITE_POWER_RAND_HIGH
|
||||||
|
// 5: LITE_POWER_RAND_LOW
|
||||||
|
int power_mode = 3;
|
||||||
|
// enable fp16
|
||||||
|
bool enable_fp16 = false;
|
||||||
|
// enable int8
|
||||||
|
bool enable_int8 = false;
|
||||||
|
// optimized model dir for CxxConfig
|
||||||
|
std::string optimized_model_dir = "";
|
||||||
|
// TODO(qiuyanjun): support more options for lite backend.
|
||||||
|
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
|
||||||
|
std::string nnadapter_subgraph_partition_config_path = "";
|
||||||
|
std::string nnadapter_subgraph_partition_config_buffer = "";
|
||||||
|
std::string nnadapter_context_properties = "";
|
||||||
|
std::string nnadapter_model_cache_dir = "";
|
||||||
|
std::string nnadapter_mixed_precision_quantization_config_path = "";
|
||||||
|
std::map<std::string, std::vector<std::vector<int64_t>>>
|
||||||
|
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
||||||
|
std::vector<std::string> nnadapter_device_names = {};
|
||||||
|
bool enable_timvx = false;
|
||||||
|
bool enable_ascend = false;
|
||||||
|
bool enable_kunlunxin = false;
|
||||||
|
int device_id = 0;
|
||||||
|
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||||
|
bool kunlunxin_locked = false;
|
||||||
|
bool kunlunxin_autotune = true;
|
||||||
|
std::string kunlunxin_autotune_file = "";
|
||||||
|
std::string kunlunxin_precision = "int16";
|
||||||
|
bool kunlunxin_adaptive_seqlen = false;
|
||||||
|
bool kunlunxin_enable_multi_stream = false;
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
32
fastdeploy/backends/openvino/option.h
Normal file
32
fastdeploy/backends/openvino/option.h
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct OpenVINOBackendOption {
|
||||||
|
std::string device = "CPU";
|
||||||
|
int cpu_thread_num = -1;
|
||||||
|
int num_streams = 0;
|
||||||
|
std::map<std::string, std::vector<int64_t>> shape_infos;
|
||||||
|
std::set<std::string> cpu_operators{"MulticlassNms"};
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
@@ -21,18 +21,11 @@
|
|||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
#include "fastdeploy/utils/unique_ptr.h"
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
#include "fastdeploy/backends/openvino/option.h"
|
||||||
#include "openvino/openvino.hpp"
|
#include "openvino/openvino.hpp"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct OpenVINOBackendOption {
|
|
||||||
std::string device = "CPU";
|
|
||||||
int cpu_thread_num = -1;
|
|
||||||
int num_streams = 0;
|
|
||||||
std::map<std::string, std::vector<int64_t>> shape_infos;
|
|
||||||
std::set<std::string> cpu_operators{"MulticlassNms"};
|
|
||||||
};
|
|
||||||
|
|
||||||
class OpenVINOBackend : public BaseBackend {
|
class OpenVINOBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
static ov::Core core_;
|
static ov::Core core_;
|
||||||
|
44
fastdeploy/backends/ort/option.h
Normal file
44
fastdeploy/backends/ort/option.h
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct OrtBackendOption {
|
||||||
|
// -1 means default
|
||||||
|
// 0: ORT_DISABLE_ALL
|
||||||
|
// 1: ORT_ENABLE_BASIC
|
||||||
|
// 2: ORT_ENABLE_EXTENDED
|
||||||
|
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
|
||||||
|
int graph_optimization_level = -1;
|
||||||
|
int intra_op_num_threads = -1;
|
||||||
|
int inter_op_num_threads = -1;
|
||||||
|
// 0: ORT_SEQUENTIAL
|
||||||
|
// 1: ORT_PARALLEL
|
||||||
|
int execution_mode = -1;
|
||||||
|
bool use_gpu = false;
|
||||||
|
int gpu_id = 0;
|
||||||
|
void* external_stream_ = nullptr;
|
||||||
|
|
||||||
|
// inside parameter, maybe remove next version
|
||||||
|
bool remove_multiclass_nms_ = false;
|
||||||
|
std::map<std::string, std::string> custom_op_info_;
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
@@ -21,6 +21,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/backends/ort/option.h"
|
||||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
@@ -31,27 +32,6 @@ struct OrtValueInfo {
|
|||||||
ONNXTensorElementDataType dtype;
|
ONNXTensorElementDataType dtype;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OrtBackendOption {
|
|
||||||
// -1 means default
|
|
||||||
// 0: ORT_DISABLE_ALL
|
|
||||||
// 1: ORT_ENABLE_BASIC
|
|
||||||
// 2: ORT_ENABLE_EXTENDED
|
|
||||||
// 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert)
|
|
||||||
int graph_optimization_level = -1;
|
|
||||||
int intra_op_num_threads = -1;
|
|
||||||
int inter_op_num_threads = -1;
|
|
||||||
// 0: ORT_SEQUENTIAL
|
|
||||||
// 1: ORT_PARALLEL
|
|
||||||
int execution_mode = -1;
|
|
||||||
bool use_gpu = false;
|
|
||||||
int gpu_id = 0;
|
|
||||||
void* external_stream_ = nullptr;
|
|
||||||
|
|
||||||
// inside parameter, maybe remove next version
|
|
||||||
bool remove_multiclass_nms_ = false;
|
|
||||||
std::map<std::string, std::string> custom_op_info_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class OrtBackend : public BaseBackend {
|
class OrtBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
OrtBackend() {}
|
OrtBackend() {}
|
||||||
|
79
fastdeploy/backends/paddle/option.h
Normal file
79
fastdeploy/backends/paddle/option.h
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "fastdeploy/backends/tensorrt/option.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct IpuOption {
|
||||||
|
int ipu_device_num;
|
||||||
|
int ipu_micro_batch_size;
|
||||||
|
bool ipu_enable_pipelining;
|
||||||
|
int ipu_batches_per_step;
|
||||||
|
bool ipu_enable_fp16;
|
||||||
|
int ipu_replica_num;
|
||||||
|
float ipu_available_memory_proportion;
|
||||||
|
bool ipu_enable_half_partial;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PaddleBackendOption {
|
||||||
|
std::string model_file = ""; // Path of model file
|
||||||
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
|
|
||||||
|
std::string model_buffer_ = "";
|
||||||
|
std::string params_buffer_ = "";
|
||||||
|
size_t model_buffer_size_ = 0;
|
||||||
|
size_t params_buffer_size_ = 0;
|
||||||
|
bool model_from_memory_ = false;
|
||||||
|
|
||||||
|
#ifdef WITH_GPU
|
||||||
|
bool use_gpu = true;
|
||||||
|
#else
|
||||||
|
bool use_gpu = false;
|
||||||
|
#endif
|
||||||
|
bool enable_mkldnn = true;
|
||||||
|
|
||||||
|
bool enable_log_info = false;
|
||||||
|
|
||||||
|
bool enable_trt = false;
|
||||||
|
TrtBackendOption trt_option;
|
||||||
|
bool collect_shape = false;
|
||||||
|
std::vector<std::string> trt_disabled_ops_{};
|
||||||
|
|
||||||
|
#ifdef WITH_IPU
|
||||||
|
bool use_ipu = true;
|
||||||
|
IpuOption ipu_option;
|
||||||
|
#else
|
||||||
|
bool use_ipu = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int mkldnn_cache_size = 1;
|
||||||
|
int cpu_thread_num = 8;
|
||||||
|
// initialize memory size(MB) for GPU
|
||||||
|
int gpu_mem_init_size = 100;
|
||||||
|
// gpu device id
|
||||||
|
int gpu_id = 0;
|
||||||
|
bool enable_pinned_memory = false;
|
||||||
|
void* external_stream_ = nullptr;
|
||||||
|
|
||||||
|
std::vector<std::string> delete_pass_names = {};
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
@@ -13,9 +13,11 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/backends/paddle/paddle_backend.h"
|
#include "fastdeploy/backends/paddle/paddle_backend.h"
|
||||||
#include "fastdeploy/utils/path.h"
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "fastdeploy/utils/path.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||||
@@ -26,7 +28,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
config_.SetExecStream(option_.external_stream_);
|
config_.SetExecStream(option_.external_stream_);
|
||||||
}
|
}
|
||||||
if (option.enable_trt) {
|
if (option.enable_trt) {
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
||||||
auto precision = paddle_infer::PrecisionType::kFloat32;
|
auto precision = paddle_infer::PrecisionType::kFloat32;
|
||||||
if (option.trt_option.enable_fp16) {
|
if (option.trt_option.enable_fp16) {
|
||||||
@@ -46,11 +47,6 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
option.trt_option.max_batch_size, 3,
|
option.trt_option.max_batch_size, 3,
|
||||||
precision, use_static);
|
precision, use_static);
|
||||||
SetTRTDynamicShapeToConfig(option);
|
SetTRTDynamicShapeToConfig(option);
|
||||||
#else
|
|
||||||
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so "
|
|
||||||
"will fallback to GPU with Paddle Inference Backend."
|
|
||||||
<< std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
} else if (option.use_ipu) {
|
} else if (option.use_ipu) {
|
||||||
#ifdef WITH_IPU
|
#ifdef WITH_IPU
|
||||||
@@ -100,7 +96,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The input/output information get from predictor is not right, use PaddleReader instead now
|
// The input/output information get from predictor is not right, use
|
||||||
|
// PaddleReader instead now
|
||||||
std::string contents;
|
std::string contents;
|
||||||
|
|
||||||
if (option.model_from_memory_) {
|
if (option.model_from_memory_) {
|
||||||
@@ -116,14 +113,14 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
config_.EnableMemoryOptim();
|
config_.EnableMemoryOptim();
|
||||||
BuildOption(option);
|
BuildOption(option);
|
||||||
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
||||||
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
|
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
|
||||||
|
// int8 mode
|
||||||
if (reader.is_quantize_model) {
|
if (reader.is_quantize_model) {
|
||||||
if (option.use_gpu) {
|
if (option.use_gpu) {
|
||||||
FDWARNING << "The loaded model is a quantized model, while inference on "
|
FDWARNING << "The loaded model is a quantized model, while inference on "
|
||||||
"GPU, please use TensorRT backend to get better performance."
|
"GPU, please use TensorRT backend to get better performance."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
if (option.enable_trt) {
|
if (option.enable_trt) {
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
bool use_static = false;
|
bool use_static = false;
|
||||||
if (option.trt_option.serialize_file != "") {
|
if (option.trt_option.serialize_file != "") {
|
||||||
FDWARNING
|
FDWARNING
|
||||||
@@ -139,7 +136,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
paddle_infer::PrecisionType::kInt8,
|
paddle_infer::PrecisionType::kInt8,
|
||||||
use_static, false);
|
use_static, false);
|
||||||
SetTRTDynamicShapeToConfig(option);
|
SetTRTDynamicShapeToConfig(option);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (option.enable_mkldnn) {
|
if (option.enable_mkldnn) {
|
||||||
@@ -163,14 +159,13 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
outputs_desc_.resize(reader.num_outputs);
|
outputs_desc_.resize(reader.num_outputs);
|
||||||
for (int i = 0; i < reader.num_outputs; ++i) {
|
for (int i = 0; i < reader.num_outputs; ++i) {
|
||||||
std::string name(reader.outputs[i].name);
|
std::string name(reader.outputs[i].name);
|
||||||
std::vector<int64_t> shape(reader.outputs[i].shape,
|
std::vector<int64_t> shape(
|
||||||
reader.outputs[i].shape +
|
reader.outputs[i].shape,
|
||||||
reader.outputs[i].rank);
|
reader.outputs[i].shape + reader.outputs[i].rank);
|
||||||
outputs_desc_[i].name = name;
|
outputs_desc_[i].name = name;
|
||||||
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||||
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
|
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
|
||||||
}
|
}
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
if (option.collect_shape) {
|
if (option.collect_shape) {
|
||||||
// Set the shape info file.
|
// Set the shape info file.
|
||||||
std::string curr_model_dir = "./";
|
std::string curr_model_dir = "./";
|
||||||
@@ -205,7 +200,6 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
<< " to set TensorRT dynamic shape." << std::endl;
|
<< " to set TensorRT dynamic shape." << std::endl;
|
||||||
config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
|
config_.EnableTunedTensorRtDynamicShape(shape_range_info, false);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
predictor_ = paddle_infer::CreatePredictor(config_);
|
predictor_ = paddle_infer::CreatePredictor(config_);
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
return true;
|
return true;
|
||||||
@@ -284,7 +278,6 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(void* stream, int device_id) {
|
|||||||
return new_backend;
|
return new_backend;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
void PaddleBackend::SetTRTDynamicShapeToConfig(
|
void PaddleBackend::SetTRTDynamicShapeToConfig(
|
||||||
const PaddleBackendOption& option) {
|
const PaddleBackendOption& option) {
|
||||||
std::map<std::string, std::vector<int>> max_shape;
|
std::map<std::string, std::vector<int>> max_shape;
|
||||||
@@ -370,7 +363,8 @@ void PaddleBackend::CollectShapeRun(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
FDASSERT(false, "Input data Paddle backend only supports "
|
FDASSERT(false,
|
||||||
|
"Input data Paddle backend only supports "
|
||||||
"FP32/INT32/INT64 currently.");
|
"FP32/INT32/INT64 currently.");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -378,6 +372,5 @@ void PaddleBackend::CollectShapeRun(
|
|||||||
}
|
}
|
||||||
predictor->Run();
|
predictor->Run();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -20,74 +20,15 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/backends/paddle/option.h"
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
#ifdef ENABLE_PADDLE_FRONTEND
|
||||||
#include "paddle2onnx/converter.h"
|
#include "paddle2onnx/converter.h"
|
||||||
#endif
|
#endif
|
||||||
#include "fastdeploy/utils/unique_ptr.h"
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
#include "paddle_inference_api.h" // NOLINT
|
#include "paddle_inference_api.h" // NOLINT
|
||||||
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct IpuOption {
|
|
||||||
int ipu_device_num;
|
|
||||||
int ipu_micro_batch_size;
|
|
||||||
bool ipu_enable_pipelining;
|
|
||||||
int ipu_batches_per_step;
|
|
||||||
bool ipu_enable_fp16;
|
|
||||||
int ipu_replica_num;
|
|
||||||
float ipu_available_memory_proportion;
|
|
||||||
bool ipu_enable_half_partial;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct PaddleBackendOption {
|
|
||||||
std::string model_file = ""; // Path of model file
|
|
||||||
std::string params_file = ""; // Path of parameters file, can be empty
|
|
||||||
|
|
||||||
std::string model_buffer_ = "";
|
|
||||||
std::string params_buffer_ = "";
|
|
||||||
size_t model_buffer_size_ = 0;
|
|
||||||
size_t params_buffer_size_ = 0;
|
|
||||||
bool model_from_memory_ = false;
|
|
||||||
|
|
||||||
#ifdef WITH_GPU
|
|
||||||
bool use_gpu = true;
|
|
||||||
#else
|
|
||||||
bool use_gpu = false;
|
|
||||||
#endif
|
|
||||||
bool enable_mkldnn = true;
|
|
||||||
|
|
||||||
bool enable_log_info = false;
|
|
||||||
|
|
||||||
bool enable_trt = false;
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
TrtBackendOption trt_option;
|
|
||||||
bool collect_shape = false;
|
|
||||||
std::vector<std::string> trt_disabled_ops_{};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WITH_IPU
|
|
||||||
bool use_ipu = true;
|
|
||||||
IpuOption ipu_option;
|
|
||||||
#else
|
|
||||||
bool use_ipu = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int mkldnn_cache_size = 1;
|
|
||||||
int cpu_thread_num = 8;
|
|
||||||
// initialize memory size(MB) for GPU
|
|
||||||
int gpu_mem_init_size = 100;
|
|
||||||
// gpu device id
|
|
||||||
int gpu_id = 0;
|
|
||||||
bool enable_pinned_memory = false;
|
|
||||||
void* external_stream_ = nullptr;
|
|
||||||
|
|
||||||
std::vector<std::string> delete_pass_names = {};
|
|
||||||
};
|
|
||||||
|
|
||||||
// convert FD device to paddle place type
|
// convert FD device to paddle place type
|
||||||
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
|
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
|
||||||
|
|
||||||
@@ -132,7 +73,6 @@ class PaddleBackend : public BaseBackend {
|
|||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
|
||||||
void
|
void
|
||||||
CollectShapeRun(paddle_infer::Predictor* predictor,
|
CollectShapeRun(paddle_infer::Predictor* predictor,
|
||||||
const std::map<std::string, std::vector<int>>& shape) const;
|
const std::map<std::string, std::vector<int>>& shape) const;
|
||||||
@@ -142,7 +82,6 @@ class PaddleBackend : public BaseBackend {
|
|||||||
std::map<std::string, std::vector<int>>* min_shape,
|
std::map<std::string, std::vector<int>>* min_shape,
|
||||||
std::map<std::string, std::vector<int>>* opt_shape) const;
|
std::map<std::string, std::vector<int>>* opt_shape) const;
|
||||||
void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option);
|
void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option);
|
||||||
#endif
|
|
||||||
PaddleBackendOption option_;
|
PaddleBackendOption option_;
|
||||||
paddle_infer::Config config_;
|
paddle_infer::Config config_;
|
||||||
std::shared_ptr<paddle_infer::Predictor> predictor_;
|
std::shared_ptr<paddle_infer::Predictor> predictor_;
|
||||||
|
47
fastdeploy/backends/poros/option.h
Executable file
47
fastdeploy/backends/poros/option.h
Executable file
@@ -0,0 +1,47 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct PorosBackendOption {
|
||||||
|
#ifdef WITH_GPU
|
||||||
|
bool use_gpu = true;
|
||||||
|
#else
|
||||||
|
bool use_gpu = false;
|
||||||
|
#endif
|
||||||
|
int gpu_id = 0;
|
||||||
|
bool long_to_int = true;
|
||||||
|
// There is calculation precision in tf32 mode on A10, it can bring some
|
||||||
|
// performance improvement, but there may be diff
|
||||||
|
bool use_nvidia_tf32 = false;
|
||||||
|
// Threshold for the number of non-const ops
|
||||||
|
int32_t unconst_ops_thres = -1;
|
||||||
|
std::string poros_file = "";
|
||||||
|
std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
|
||||||
|
// TRT options
|
||||||
|
bool enable_fp16 = false;
|
||||||
|
bool enable_int8 = false;
|
||||||
|
bool is_dynamic = false;
|
||||||
|
size_t max_batch_size = 32;
|
||||||
|
size_t max_workspace_size = 1 << 30;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
@@ -20,35 +20,12 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/backends/poros/option.h"
|
||||||
#include "fastdeploy/backends/poros/common/compile.h"
|
#include "fastdeploy/backends/poros/common/compile.h"
|
||||||
#include "fastdeploy/backends/poros/common/poros_module.h"
|
#include "fastdeploy/backends/poros/common/poros_module.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct PorosBackendOption {
|
|
||||||
#ifdef WITH_GPU
|
|
||||||
bool use_gpu = true;
|
|
||||||
#else
|
|
||||||
bool use_gpu = false;
|
|
||||||
#endif
|
|
||||||
int gpu_id = 0;
|
|
||||||
bool long_to_int = true;
|
|
||||||
// There is calculation precision in tf32 mode on A10, it can bring some
|
|
||||||
// performance improvement, but there may be diff
|
|
||||||
bool use_nvidia_tf32 = false;
|
|
||||||
// Threshold for the number of non-const ops
|
|
||||||
int32_t unconst_ops_thres = -1;
|
|
||||||
std::string poros_file = "";
|
|
||||||
std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
|
|
||||||
// TRT options
|
|
||||||
bool enable_fp16 = false;
|
|
||||||
bool enable_int8 = false;
|
|
||||||
bool is_dynamic = false;
|
|
||||||
size_t max_batch_size = 32;
|
|
||||||
size_t max_workspace_size = 1 << 30;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert data type from fastdeploy to poros
|
// Convert data type from fastdeploy to poros
|
||||||
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype);
|
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype);
|
||||||
|
|
||||||
|
41
fastdeploy/backends/tensorrt/option.h
Executable file
41
fastdeploy/backends/tensorrt/option.h
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
struct TrtBackendOption {
|
||||||
|
std::string model_file = ""; // Path of model file
|
||||||
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
|
// format of input model
|
||||||
|
ModelFormat model_format = ModelFormat::AUTOREC;
|
||||||
|
|
||||||
|
int gpu_id = 0;
|
||||||
|
bool enable_fp16 = false;
|
||||||
|
bool enable_int8 = false;
|
||||||
|
size_t max_batch_size = 32;
|
||||||
|
size_t max_workspace_size = 1 << 30;
|
||||||
|
std::map<std::string, std::vector<int32_t>> max_shape;
|
||||||
|
std::map<std::string, std::vector<int32_t>> min_shape;
|
||||||
|
std::map<std::string, std::vector<int32_t>> opt_shape;
|
||||||
|
std::string serialize_file = "";
|
||||||
|
bool enable_pinned_memory = false;
|
||||||
|
void* external_stream_ = nullptr;
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
@@ -25,6 +25,7 @@
|
|||||||
#include "NvOnnxParser.h"
|
#include "NvOnnxParser.h"
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
#include "fastdeploy/backends/tensorrt/utils.h"
|
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||||
|
#include "fastdeploy/backends/tensorrt/option.h"
|
||||||
#include "fastdeploy/utils/unique_ptr.h"
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
|
||||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||||
@@ -62,25 +63,6 @@ struct TrtValueInfo {
|
|||||||
FDDataType original_dtype; // dtype of original ONNX/Paddle model
|
FDDataType original_dtype; // dtype of original ONNX/Paddle model
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TrtBackendOption {
|
|
||||||
std::string model_file = ""; // Path of model file
|
|
||||||
std::string params_file = ""; // Path of parameters file, can be empty
|
|
||||||
// format of input model
|
|
||||||
ModelFormat model_format = ModelFormat::AUTOREC;
|
|
||||||
|
|
||||||
int gpu_id = 0;
|
|
||||||
bool enable_fp16 = false;
|
|
||||||
bool enable_int8 = false;
|
|
||||||
size_t max_batch_size = 32;
|
|
||||||
size_t max_workspace_size = 1 << 30;
|
|
||||||
std::map<std::string, std::vector<int32_t>> max_shape;
|
|
||||||
std::map<std::string, std::vector<int32_t>> min_shape;
|
|
||||||
std::map<std::string, std::vector<int32_t>> opt_shape;
|
|
||||||
std::string serialize_file = "";
|
|
||||||
bool enable_pinned_memory = false;
|
|
||||||
void* external_stream_ = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<int> toVec(const nvinfer1::Dims& dim);
|
std::vector<int> toVec(const nvinfer1::Dims& dim);
|
||||||
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
|
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
|
||||||
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
||||||
|
Reference in New Issue
Block a user