[Other] [Part2] Upgrade runtime module (#1080)

[Other] Upgrade runtime module
This commit is contained in:
Jason
2023-01-09 13:22:51 +08:00
committed by GitHub
parent cbf88a46fa
commit 4aa4ebd7c3
53 changed files with 312 additions and 374 deletions

View File

@@ -185,15 +185,15 @@ configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc) configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc) file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu) file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu) file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/op_cuda_kernels/*.cu)
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc) file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/ort/*.cc)
file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc) file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/paddle/*.cc)
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc) file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/*.cc)
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cpp)
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/openvino/*.cc)
file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc) file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/rknpu2/*.cc)
file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc) file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/sophgo/*.cc)
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/lite/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc) file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc)
file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc) file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
@@ -289,7 +289,7 @@ if(ENABLE_POROS_BACKEND)
else () else ()
message(STATUS "site-packages: ${Python3_SITELIB}") message(STATUS "site-packages: ${Python3_SITELIB}")
endif () endif ()
include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common) include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/common)
# find trt # find trt
if(NOT WITH_GPU) if(NOT WITH_GPU)
message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF") message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
@@ -377,7 +377,7 @@ if(ENABLE_TRT_BACKEND)
add_definitions(-DENABLE_TRT_BACKEND) add_definitions(-DENABLE_TRT_BACKEND)
include_directories(${TRT_INC_DIR}) include_directories(${TRT_INC_DIR})
include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/common) include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/common)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS}) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH) find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH)
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH) find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH)
@@ -574,7 +574,7 @@ install(
DESTINATION ${CMAKE_INSTALL_PREFIX}/include DESTINATION ${CMAKE_INSTALL_PREFIX}/include
FILES_MATCHING FILES_MATCHING
PATTERN "*.h" PATTERN "*.h"
PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/*/*.h" PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/*/*.h"
) )
install( install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install

48
fastdeploy/fastdeploy_model.cc Executable file → Normal file
View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/fastdeploy_model.h" #include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {
@@ -42,8 +43,7 @@ bool IsSupported(const std::vector<Backend>& backends, Backend backend) {
bool FastDeployModel::InitRuntimeWithSpecifiedBackend() { bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
if (!IsBackendAvailable(runtime_option.backend)) { if (!IsBackendAvailable(runtime_option.backend)) {
FDERROR << runtime_option.backend FDERROR << runtime_option.backend
<< " is not compiled with current FastDeploy library." << " is not compiled with current FastDeploy library." << std::endl;
<< std::endl;
return false; return false;
} }
@@ -57,42 +57,58 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
if (use_gpu) { if (use_gpu) {
if (!IsSupported(valid_gpu_backends, runtime_option.backend)) { if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
FDERROR << "The valid gpu backends of model " << ModelName() << " are " << Str(valid_gpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid gpu backends of model " << ModelName() << " are "
<< Str(valid_gpu_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else if (use_rknpu) { } else if (use_rknpu) {
if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) { if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) {
FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
<< Str(valid_rknpu_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else if (use_sophgotpu) { } else if (use_sophgotpu) {
if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) { if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
<< Str(valid_rknpu_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else if (use_timvx) { } else if (use_timvx) {
if (!IsSupported(valid_timvx_backends, runtime_option.backend)) { if (!IsSupported(valid_timvx_backends, runtime_option.backend)) {
FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid timvx backends of model " << ModelName() << " are "
<< Str(valid_timvx_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else if (use_ascend) { } else if (use_ascend) {
if (!IsSupported(valid_ascend_backends, runtime_option.backend)) { if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
FDERROR << "The valid ascend backends of model " << ModelName() << " are " << Str(valid_ascend_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid ascend backends of model " << ModelName() << " are "
<< Str(valid_ascend_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else if (use_kunlunxin) { } else if (use_kunlunxin) {
if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) { if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
FDERROR << "The valid kunlunxin backends of model " << ModelName() << " are " << Str(valid_kunlunxin_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid kunlunxin backends of model " << ModelName()
<< " are " << Str(valid_kunlunxin_backends) << ", "
<< runtime_option.backend << " is not supported." << std::endl;
return false; return false;
} }
} else if(use_ipu) { } else if (use_ipu) {
if (!IsSupported(valid_ipu_backends, runtime_option.backend)) { if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
FDERROR << "The valid ipu backends of model " << ModelName() << " are " << Str(valid_ipu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid ipu backends of model " << ModelName() << " are "
<< Str(valid_ipu_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} else { } else {
if (!IsSupported(valid_cpu_backends, runtime_option.backend)) { if (!IsSupported(valid_cpu_backends, runtime_option.backend)) {
FDERROR << "The valid cpu backends of model " << ModelName() << " are " << Str(valid_cpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid cpu backends of model " << ModelName() << " are "
<< Str(valid_cpu_backends) << ", " << runtime_option.backend
<< " is not supported." << std::endl;
return false; return false;
} }
} }
@@ -135,16 +151,12 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
return false; return false;
#endif #endif
} }
FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now." << std::endl; FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
<< std::endl;
return false; return false;
} }
bool FastDeployModel::InitRuntime() { bool FastDeployModel::InitRuntime() {
if (!runtime_option.model_from_memory_) {
FDASSERT(
CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
"ModelFormatCheck Failed.");
}
if (runtime_initialized_) { if (runtime_initialized_) {
FDERROR << "The model is already initialized, cannot be initliazed again." FDERROR << "The model is already initialized, cannot be initliazed again."
<< std::endl; << std::endl;
@@ -298,7 +310,6 @@ bool FastDeployModel::CreateKunlunXinBackend() {
return false; return false;
} }
bool FastDeployModel::CreateASCENDBackend() { bool FastDeployModel::CreateASCENDBackend() {
if (valid_ascend_backends.size() == 0) { if (valid_ascend_backends.size() == 0) {
FDERROR << "There's no valid ascend backends for model: " << ModelName() FDERROR << "There's no valid ascend backends for model: " << ModelName()
@@ -322,7 +333,6 @@ bool FastDeployModel::CreateASCENDBackend() {
return false; return false;
} }
bool FastDeployModel::CreateIpuBackend() { bool FastDeployModel::CreateIpuBackend() {
if (valid_ipu_backends.size() == 0) { if (valid_ipu_backends.size() == 0) {
FDERROR << "There's no valid ipu backends for model: " << ModelName() FDERROR << "There's no valid ipu backends for model: " << ModelName()

View File

@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/rknpu2/option.h"
#include "fastdeploy/pybind/main.h" #include "fastdeploy/pybind/main.h"
#include "fastdeploy/runtime/backends/rknpu2/option.h"
namespace fastdeploy { namespace fastdeploy {
void BindRKNPU2Config(pybind11::module& m) { void BindRKNPU2Config(pybind11::module& m) {
pybind11::enum_<fastdeploy::rknpu2::CpuName>( pybind11::enum_<fastdeploy::rknpu2::CpuName>(

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/lite/lite_backend.h" #include "fastdeploy/runtime/backends/lite/lite_backend.h"
#include <cstring> #include <cstring>
@@ -43,7 +43,7 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
option_ = option; option_ = option;
std::vector<paddle::lite_api::Place> valid_places; std::vector<paddle::lite_api::Place> valid_places;
if (option_.enable_int8) { if (option_.enable_int8) {
if(option_.enable_kunlunxin) { if (option_.enable_kunlunxin) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
} else { } else {
@@ -54,7 +54,7 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
<< "inference with int8 precision!" << std::endl; << "inference with int8 precision!" << std::endl;
} }
if (option_.enable_fp16) { if (option_.enable_fp16) {
if(option_.enable_kunlunxin){ if (option_.enable_kunlunxin) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
} else { } else {
@@ -66,7 +66,9 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
if (supported_fp16_) { if (supported_fp16_) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl; FDINFO << "The device supports FP16, Lite::Backend will inference with "
"FP16 precision."
<< std::endl;
} else { } else {
FDWARNING << "The device doesn't support FP16, will fallback to FP32."; FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
} }
@@ -74,14 +76,18 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
} }
if (!option_.nnadapter_subgraph_partition_config_path.empty()) { if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
std::vector<char> nnadapter_subgraph_partition_config_buffer; std::vector<char> nnadapter_subgraph_partition_config_buffer;
if (ReadFile(option_.nnadapter_subgraph_partition_config_path, &nnadapter_subgraph_partition_config_buffer, false)) { if (ReadFile(option_.nnadapter_subgraph_partition_config_path,
&nnadapter_subgraph_partition_config_buffer, false)) {
if (!nnadapter_subgraph_partition_config_buffer.empty()) { if (!nnadapter_subgraph_partition_config_buffer.empty()) {
std::string nnadapter_subgraph_partition_config_string(nnadapter_subgraph_partition_config_buffer.data(), nnadapter_subgraph_partition_config_buffer.size()); std::string nnadapter_subgraph_partition_config_string(
config_.set_nnadapter_subgraph_partition_config_buffer(nnadapter_subgraph_partition_config_string); nnadapter_subgraph_partition_config_buffer.data(),
nnadapter_subgraph_partition_config_buffer.size());
config_.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_string);
} }
} }
} }
if(option_.enable_timvx) { if (option_.enable_timvx) {
config_.set_nnadapter_device_names({"verisilicon_timvx"}); config_.set_nnadapter_device_names({"verisilicon_timvx"});
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
@@ -91,32 +97,30 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
} }
if(option_.enable_ascend){ if (option_.enable_ascend) {
if (option_.nnadapter_device_names.empty()) {
if(option_.nnadapter_device_names.empty()){
config_.set_nnadapter_device_names({"huawei_ascend_npu"}); config_.set_nnadapter_device_names({"huawei_ascend_npu"});
} else { } else {
config_.set_nnadapter_device_names(option_.nnadapter_device_names); config_.set_nnadapter_device_names(option_.nnadapter_device_names);
} }
if(!option_.nnadapter_context_properties.empty()){ if (!option_.nnadapter_context_properties.empty()) {
config_.set_nnadapter_context_properties(option_.nnadapter_context_properties); config_.set_nnadapter_context_properties(
option_.nnadapter_context_properties);
} }
if(!option_.nnadapter_model_cache_dir.empty()){ if (!option_.nnadapter_model_cache_dir.empty()) {
config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir); config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
} }
if(!option_.nnadapter_mixed_precision_quantization_config_path.empty()){ if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) {
config_.set_nnadapter_mixed_precision_quantization_config_path( config_.set_nnadapter_mixed_precision_quantization_config_path(
option_.nnadapter_mixed_precision_quantization_config_path option_.nnadapter_mixed_precision_quantization_config_path);
);
} }
if(!option_.nnadapter_subgraph_partition_config_path.empty()){ if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
config_.set_nnadapter_subgraph_partition_config_path( config_.set_nnadapter_subgraph_partition_config_path(
option_.nnadapter_subgraph_partition_config_path option_.nnadapter_subgraph_partition_config_path);
);
} }
valid_places.push_back( valid_places.push_back(
@@ -127,16 +131,20 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
} }
if(option_.enable_kunlunxin){ if (option_.enable_kunlunxin) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)}); paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
config_.set_xpu_dev_per_thread(option_.device_id); config_.set_xpu_dev_per_thread(option_.device_id);
config_.set_xpu_workspace_l3_size_per_thread(option_.kunlunxin_l3_workspace_size); config_.set_xpu_workspace_l3_size_per_thread(
config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size, option_.kunlunxin_locked); option_.kunlunxin_l3_workspace_size);
config_.set_xpu_conv_autotune(option_.kunlunxin_autotune, option_.kunlunxin_autotune_file); config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size,
config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision, option_.kunlunxin_adaptive_seqlen); option_.kunlunxin_locked);
config_.set_xpu_conv_autotune(option_.kunlunxin_autotune,
option_.kunlunxin_autotune_file);
config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision,
option_.kunlunxin_adaptive_seqlen);
if (option_.kunlunxin_enable_multi_stream) { if (option_.kunlunxin_enable_multi_stream) {
config_.enable_xpu_multi_stream(); config_.enable_xpu_multi_stream();
} }
@@ -155,10 +163,9 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
} }
bool LiteBackend::ReadFile(const std::string& filename, bool LiteBackend::ReadFile(const std::string& filename,
std::vector<char>* contents, std::vector<char>* contents, const bool binary) {
const bool binary) { FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r"); if (!fp) {
if (!fp){
FDERROR << "Cannot open file " << filename << "." << std::endl; FDERROR << "Cannot open file " << filename << "." << std::endl;
return false; return false;
} }
@@ -168,7 +175,7 @@ bool LiteBackend::ReadFile(const std::string& filename,
contents->clear(); contents->clear();
contents->resize(size); contents->resize(size);
size_t offset = 0; size_t offset = 0;
char *ptr = reinterpret_cast<char *>(&(contents->at(0))); char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
while (offset < size) { while (offset < size) {
size_t already_read = fread(ptr, 1, size - offset, fp); size_t already_read = fread(ptr, 1, size - offset, fp);
offset += already_read; offset += already_read;
@@ -196,7 +203,8 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
if (option_.optimized_model_dir != "") { if (option_.optimized_model_dir != "") {
FDINFO << "Optimzed model dir is not empty, will save optimized model to: " FDINFO << "Optimzed model dir is not empty, will save optimized model to: "
<< option_.optimized_model_dir << std::endl; << option_.optimized_model_dir << std::endl;
predictor_->SaveOptimizedModel(option_.optimized_model_dir, predictor_->SaveOptimizedModel(
option_.optimized_model_dir,
paddle::lite_api::LiteModelType::kNaiveBuffer); paddle::lite_api::LiteModelType::kNaiveBuffer);
} }
@@ -221,7 +229,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
auto shape = tensor->shape(); auto shape = tensor->shape();
info.shape.assign(shape.begin(), shape.end()); info.shape.assign(shape.begin(), shape.end());
info.name = output_names[i]; info.name = output_names[i];
if(!option_.enable_kunlunxin){ if (!option_.enable_kunlunxin) {
info.dtype = LiteDataTypeToFD(tensor->precision()); info.dtype = LiteDataTypeToFD(tensor->precision());
} }
outputs_desc_.emplace_back(info); outputs_desc_.emplace_back(info);
@@ -250,8 +258,7 @@ TensorInfo LiteBackend::GetOutputInfo(int index) {
std::vector<TensorInfo> LiteBackend::GetOutputInfos() { return outputs_desc_; } std::vector<TensorInfo> LiteBackend::GetOutputInfos() { return outputs_desc_; }
bool LiteBackend::Infer(std::vector<FDTensor>& inputs, bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs, bool copy_to_fd) {
bool copy_to_fd) {
if (inputs.size() != inputs_desc_.size()) { if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[LiteBackend] Size of inputs(" << inputs.size() FDERROR << "[LiteBackend] Size of inputs(" << inputs.size()
<< ") should keep same with the inputs of this model(" << ") should keep same with the inputs of this model("
@@ -270,25 +277,25 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
tensor->Resize(inputs[i].shape); tensor->Resize(inputs[i].shape);
if (inputs[i].dtype == FDDataType::FP32) { if (inputs[i].dtype == FDDataType::FP32) {
tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>( tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>(
reinterpret_cast<const float*>(const_cast<void*>( reinterpret_cast<const float*>(
inputs[i].CpuData()))); const_cast<void*>(inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::INT32) { } else if (inputs[i].dtype == FDDataType::INT32) {
tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>( tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>(
reinterpret_cast<const int*>(const_cast<void*>( reinterpret_cast<const int*>(const_cast<void*>(inputs[i].CpuData())));
inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::INT8) { } else if (inputs[i].dtype == FDDataType::INT8) {
tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>( tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>(
reinterpret_cast<const int8_t*>(const_cast<void*>( reinterpret_cast<const int8_t*>(
inputs[i].CpuData()))); const_cast<void*>(inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::UINT8) { } else if (inputs[i].dtype == FDDataType::UINT8) {
tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>( tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>(
reinterpret_cast<const uint8_t*>(const_cast<void*>( reinterpret_cast<const uint8_t*>(
inputs[i].CpuData()))); const_cast<void*>(inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::INT64) { } else if (inputs[i].dtype == FDDataType::INT64) {
#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_ARM64)) #if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || \
defined(_M_ARM64))
tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>( tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>(
reinterpret_cast<const int64_t*>(const_cast<void*>( reinterpret_cast<const int64_t*>(
inputs[i].CpuData()))); const_cast<void*>(inputs[i].CpuData())));
#else #else
FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!"); FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!");
#endif #endif
@@ -302,7 +309,7 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
outputs->resize(outputs_desc_.size()); outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) { for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto tensor = predictor_->GetOutput(i); auto tensor = predictor_->GetOutput(i);
if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){ if (outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())) {
outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision()); outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
} }
(*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype, (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,

View File

@@ -19,8 +19,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/lite/option.h" #include "fastdeploy/runtime/backends/lite/option.h"
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/openvino/ov_backend.h" #include "fastdeploy/runtime/backends/openvino/ov_backend.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"
#endif #endif

View File

@@ -19,9 +19,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/unique_ptr.h"
#include "fastdeploy/backends/openvino/option.h" #include "fastdeploy/runtime/backends/openvino/option.h"
#include "openvino/openvino.hpp" #include "openvino/openvino.hpp"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -104,8 +104,8 @@ void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) {
ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type"); ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type");
output_size_ = output_size_ =
ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size"); ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size");
FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 && FDASSERT(
output_size_[3] > 0, output_size_.size() == 4 && output_size_[2] > 0 && output_size_[3] > 0,
"The output size of adaptive pool must be positive."); "The output size of adaptive pool must be positive.");
} }
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -25,7 +25,7 @@
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
#ifdef WITH_GPU #ifdef WITH_GPU
#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h" #include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
#endif #endif
namespace fastdeploy { namespace fastdeploy {

View File

@@ -14,10 +14,12 @@
#ifndef NON_64_PLATFORM #ifndef NON_64_PLATFORM
#include "fastdeploy/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
#include <algorithm>
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#include <algorithm>
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/ort/ort_backend.h" #include "fastdeploy/runtime/backends/ort/ort_backend.h"
#include <memory> #include <memory>
#include "fastdeploy/backends/ort/ops/adaptive_pool2d.h"
#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
#include "fastdeploy/backends/ort/utils.h"
#include "fastdeploy/core/float16.h" #include "fastdeploy/core/float16.h"
#include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h"
#include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
#include "fastdeploy/runtime/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"

View File

@@ -20,8 +20,8 @@
#include <vector> #include <vector>
#include <map> #include <map>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/ort/option.h" #include "fastdeploy/runtime/backends/ort/option.h"
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/ort/utils.h" #include "fastdeploy/runtime/backends/ort/utils.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -19,7 +19,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
namespace fastdeploy { namespace fastdeploy {

View File

@@ -19,7 +19,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/tensorrt/option.h" #include "fastdeploy/runtime/backends/tensorrt/option.h"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/paddle/paddle_backend.h" #include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
#include <sstream> #include <sstream>

View File

@@ -19,8 +19,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/paddle/option.h" #include "fastdeploy/runtime/backends/paddle/option.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"
#endif #endif

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/paddle/paddle_backend.h"
#include "fastdeploy/core/float16.h" #include "fastdeploy/core/float16.h"
#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
namespace fastdeploy { namespace fastdeploy {
paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) { paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {

View File

@@ -19,9 +19,9 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "iengine.h" #include "iengine.h" // NOLINT
#include "poros_module.h" #include "poros_module.h" // NOLINT
#include "torch/script.h" #include "torch/script.h" // NOLINT
namespace baidu { namespace baidu {
namespace mirana { namespace mirana {

View File

@@ -16,12 +16,12 @@
#include <string> #include <string>
//from pytorch // from pytorch
#include "ATen/core/interned_strings.h" #include "ATen/core/interned_strings.h" // NOLINT
#include "torch/csrc/jit/ir/ir.h" #include "torch/csrc/jit/ir/ir.h" // NOLINT
#include "torch/script.h" #include "torch/script.h" // NOLINT
#include "plugin_create.h" #include "plugin_create.h" // NOLINT
namespace baidu { namespace baidu {
namespace mirana { namespace mirana {

View File

@@ -36,7 +36,7 @@ IPlugin* create_plugin(const std::string& plugin_name,
void create_all_plugins(const plugin_creator_map_t& plugin_creator_map, void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
std::unordered_map<std::string, IPlugin*>& plugin_m); std::unordered_map<std::string, IPlugin*>& plugin_m);
//void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m); // void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
template <typename PluginType> IPlugin* default_plugin_creator() { template <typename PluginType> IPlugin* default_plugin_creator() {
return new (std::nothrow) PluginType; return new (std::nothrow) PluginType;

View File

@@ -14,8 +14,8 @@
#pragma once #pragma once
#include "torch/csrc/jit/jit_log.h" #include "torch/csrc/jit/jit_log.h" // NOLINT
#include "torch/script.h" #include "torch/script.h" // NOLINT
#include <string> #include <string>
// #include "ATen/Context.h" // #include "ATen/Context.h"
@@ -37,20 +37,21 @@ struct PorosOptions {
bool use_nvidia_tf32 = false; bool use_nvidia_tf32 = false;
}; };
class PorosModule : public torch::jit::Module { class PorosModule : public torch::jit::Module {
public: public:
PorosModule(torch::jit::Module module) : torch::jit::Module(module) {} PorosModule(torch::jit::Module module) : torch::jit::Module(module) {} // NOLINT
~PorosModule() = default; ~PorosModule() = default;
void to_device(Device device) { _options.device = device; } void to_device(Device device) { _options.device = device; }
//c10::IValue forward(std::vector<c10::IValue> inputs); // c10::IValue forward(std::vector<c10::IValue> inputs);
//void save(const std::string& filename); // void save(const std::string& filename);
public: public:
PorosOptions _options; PorosOptions _options;
}; };
//via porosmodule.save // via porosmodule.save
std::unique_ptr<PorosModule> Load(const std::string& filename, std::unique_ptr<PorosModule> Load(const std::string& filename,
const PorosOptions& options); const PorosOptions& options);

View File

@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/poros/poros_backend.h" #include "fastdeploy/runtime/backends/poros/poros_backend.h"
#include <sys/time.h> #include <sys/time.h>
namespace fastdeploy { namespace fastdeploy {

View File

@@ -19,10 +19,10 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/poros/option.h" #include "fastdeploy/runtime/backends/poros/option.h"
#include "fastdeploy/backends/poros/common/compile.h" #include "fastdeploy/runtime/backends/poros/common/compile.h"
#include "fastdeploy/backends/poros/common/poros_module.h" #include "fastdeploy/runtime/backends/poros/common/poros_module.h"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/poros/poros_backend.h" #include "fastdeploy/runtime/backends/poros/poros_backend.h"
#ifdef WITH_GPU #ifdef WITH_GPU
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
@@ -129,7 +129,8 @@ at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
numel * sizeof(double)); numel * sizeof(double));
} }
} else { } else {
FDASSERT(false, "Unrecognized data type while calling " FDASSERT(false,
"Unrecognized data type while calling "
"PorosBackend::CreatePorosValue()."); "PorosBackend::CreatePorosValue().");
} }
return poros_value; return poros_value;

View File

@@ -11,7 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/rknpu2/rknpu2_backend.h" #include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
namespace fastdeploy { namespace fastdeploy {
RKNPU2Backend::~RKNPU2Backend() { RKNPU2Backend::~RKNPU2Backend() {
@@ -76,9 +77,8 @@ void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
/*************************************************************** /***************************************************************
* @name InitFromRKNN * @name InitFromRKNN
* @brief Initialize RKNN model * @brief Initialize RKNN model
* @param model_file: Binary data for the RKNN model or the path of RKNN model. * @param model_file: Binary data for the RKNN model or the path of RKNN
* params_file: None *model. params_file: None option: config
* option: config
* @return bool * @return bool
* @note None * @note None
***************************************************************/ ***************************************************************/
@@ -232,8 +232,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
return false; return false;
} }
// If the output dimension is 3, the runtime will automatically change it to 4. // If the output dimension is 3, the runtime will automatically change it
// Obviously, this is wrong, and manual correction is required here. // to 4. Obviously, this is wrong, and manual correction is required here.
int n_dims = output_attrs_[i].n_dims; int n_dims = output_attrs_[i].n_dims;
if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) { if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) {
n_dims--; n_dims--;
@@ -263,7 +263,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
* @note None * @note None
***************************************************************/ ***************************************************************/
void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) { void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], " printf(
"index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
"n_elems=%d, size=%d, fmt=%s, type=%s, " "n_elems=%d, size=%d, fmt=%s, type=%s, "
"qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n", "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n",
attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1], attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
@@ -357,7 +358,8 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
// The data type of output data is changed to FP32 // The data type of output data is changed to FP32
output_attrs_[i].type = RKNN_TENSOR_FLOAT32; output_attrs_[i].type = RKNN_TENSOR_FLOAT32;
// default output type is depend on model, this requires float32 to compute top5 // default output type is depend on model, this requires float32 to
// compute top5
ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]); ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]);
// set output memory and attribute // set output memory and attribute
@@ -452,8 +454,8 @@ FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
* @return None * @return None
* @note None * @note None
***************************************************************/ ***************************************************************/
rknn_tensor_type rknn_tensor_type RKNPU2Backend::FDDataTypeToRknnTensorType(
RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) { fastdeploy::FDDataType type) {
if (type == FDDataType::FP16) { if (type == FDDataType::FP16) {
return rknn_tensor_type::RKNN_TENSOR_FLOAT16; return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
} }

View File

@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/rknpu2/option.h" #include "fastdeploy/runtime/backends/rknpu2/option.h"
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "rknn_api.h" // NOLINT #include "rknn_api.h" // NOLINT
#include <cstring> #include <cstring>

View File

@@ -11,7 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/sophgo/sophgo_backend.h" #include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
#include <assert.h> #include <assert.h>

View File

@@ -13,11 +13,11 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "bmruntime_interface.h" // NOLINT #include "bmruntime_interface.h" // NOLINT
#include "bmlib_runtime.h" // NOLINT #include "bmlib_runtime.h" // NOLINT
#include "fastdeploy/backends/sophgo/option.h" #include "fastdeploy/runtime/backends/sophgo/option.h"
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <memory> #include <memory>

View File

@@ -97,9 +97,8 @@ void AdaptivePool2d::serialize(void* buffer) const noexcept {
FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()"); FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()");
} }
nvinfer1::DataType nvinfer1::DataType AdaptivePool2d::getOutputDataType(
AdaptivePool2d::getOutputDataType(int index, int index, const nvinfer1::DataType* inputType,
const nvinfer1::DataType* inputType,
int nbInputs) const noexcept { int nbInputs) const noexcept {
return inputType[0]; return inputType[0];
} }

View File

@@ -14,7 +14,7 @@
#pragma once #pragma once
#include "common.h" // NOLINT #include "common.h" // NOLINT
#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h" #include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/tensorrt/trt_backend.h" #include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
#include "fastdeploy/function/cuda_cast.h"
#include <cstring> #include <cstring>
#include <unordered_map> #include <unordered_map>
#include "NvInferRuntime.h" #include "NvInferRuntime.h"
#include "fastdeploy/function/cuda_cast.h"
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#ifdef ENABLE_PADDLE_FRONTEND #ifdef ENABLE_PADDLE_FRONTEND
#include "paddle2onnx/converter.h" #include "paddle2onnx/converter.h"
@@ -215,9 +215,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
outputs_desc_.resize(onnx_reader.num_outputs); outputs_desc_.resize(onnx_reader.num_outputs);
for (int i = 0; i < onnx_reader.num_inputs; ++i) { for (int i = 0; i < onnx_reader.num_inputs; ++i) {
std::string name(onnx_reader.inputs[i].name); std::string name(onnx_reader.inputs[i].name);
std::vector<int64_t> shape(onnx_reader.inputs[i].shape, std::vector<int64_t> shape(
onnx_reader.inputs[i].shape + onnx_reader.inputs[i].shape,
onnx_reader.inputs[i].rank); onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank);
inputs_desc_[i].name = name; inputs_desc_[i].name = name;
inputs_desc_[i].shape.assign(shape.begin(), shape.end()); inputs_desc_[i].shape.assign(shape.begin(), shape.end());
inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype); inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
@@ -238,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
for (int i = 0; i < onnx_reader.num_outputs; ++i) { for (int i = 0; i < onnx_reader.num_outputs; ++i) {
std::string name(onnx_reader.outputs[i].name); std::string name(onnx_reader.outputs[i].name);
std::vector<int64_t> shape(onnx_reader.outputs[i].shape, std::vector<int64_t> shape(
onnx_reader.outputs[i].shape + onnx_reader.outputs[i].shape,
onnx_reader.outputs[i].rank); onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank);
outputs_desc_[i].name = name; outputs_desc_[i].name = name;
outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].shape.assign(shape.begin(), shape.end());
outputs_desc_[i].dtype = outputs_desc_[i].dtype =
@@ -313,8 +313,8 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
return false; return false;
} }
for (size_t i = 0; i < outputs->size(); ++i) { for (size_t i = 0; i < outputs->size(); ++i) {
// if the final output tensor's dtype is different from the model output tensor's dtype, // if the final output tensor's dtype is different from the model output
// then we need cast the data to the final output's dtype // tensor's dtype, then we need cast the data to the final output's dtype
auto model_output_dtype = auto model_output_dtype =
GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype()); GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
if ((*outputs)[i].dtype != model_output_dtype) { if ((*outputs)[i].dtype != model_output_dtype) {
@@ -369,7 +369,8 @@ void TrtBackend::GetInputOutputInfo() {
outputs_desc_[i].original_dtype; outputs_desc_[i].original_dtype;
} }
// Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_ // Re-read the tensor infos from TRT model and write into inputs_desc_ and
// outputs_desc_
std::vector<TrtValueInfo>().swap(inputs_desc_); std::vector<TrtValueInfo>().swap(inputs_desc_);
std::vector<TrtValueInfo>().swap(outputs_desc_); std::vector<TrtValueInfo>().swap(outputs_desc_);
inputs_desc_.clear(); inputs_desc_.clear();

View File

@@ -23,9 +23,9 @@
#include "NvInfer.h" #include "NvInfer.h"
#include "NvOnnxParser.h" #include "NvOnnxParser.h"
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/backends/tensorrt/utils.h" #include "fastdeploy/runtime/backends/tensorrt/utils.h"
#include "fastdeploy/backends/tensorrt/option.h" #include "fastdeploy/runtime/backends/tensorrt/option.h"
#include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/unique_ptr.h"
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 { class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "fastdeploy/backends/tensorrt/utils.h" #include "fastdeploy/runtime/backends/tensorrt/utils.h"
namespace fastdeploy { namespace fastdeploy {

View File

@@ -82,4 +82,43 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "UNKNOWN-ModelFormat"; out << "UNKNOWN-ModelFormat";
return out; return out;
} }
std::vector<Backend> GetAvailableBackends() {
std::vector<Backend> backends;
#ifdef ENABLE_ORT_BACKEND
backends.push_back(Backend::ORT);
#endif
#ifdef ENABLE_TRT_BACKEND
backends.push_back(Backend::TRT);
#endif
#ifdef ENABLE_PADDLE_BACKEND
backends.push_back(Backend::PDINFER);
#endif
#ifdef ENABLE_POROS_BACKEND
backends.push_back(Backend::POROS);
#endif
#ifdef ENABLE_OPENVINO_BACKEND
backends.push_back(Backend::OPENVINO);
#endif
#ifdef ENABLE_LITE_BACKEND
backends.push_back(Backend::LITE);
#endif
#ifdef ENABLE_RKNPU2_BACKEND
backends.push_back(Backend::RKNPU2);
#endif
#ifdef ENABLE_SOPHGO_BACKEND
backends.push_back(Backend::SOPHGOTPU);
#endif
return backends;
}
bool IsBackendAvailable(const Backend& backend) {
std::vector<Backend> backends = GetAvailableBackends();
for (size_t i = 0; i < backends.size(); ++i) {
if (backend == backends[i]) {
return true;
}
}
return false;
}
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -25,6 +25,7 @@
namespace fastdeploy { namespace fastdeploy {
/*! Inference backend supported in FastDeploy */ /*! Inference backend supported in FastDeploy */
enum Backend { enum Backend {
UNKNOWN, ///< Unknown inference backend UNKNOWN, ///< Unknown inference backend
@@ -38,6 +39,17 @@ enum Backend {
SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
}; };
/**
* @brief Get all the available inference backend in FastDeploy
*/
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
/**
* @brief Check if the inference backend available
*/
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
enum FASTDEPLOY_DECL Device { enum FASTDEPLOY_DECL Device {
CPU, CPU,
GPU, GPU,
@@ -69,11 +81,8 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}} {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
}; };
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
const ModelFormat& f);
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -18,35 +18,35 @@
#include "fastdeploy/utils/utils.h" #include "fastdeploy/utils/utils.h"
#ifdef ENABLE_ORT_BACKEND #ifdef ENABLE_ORT_BACKEND
#include "fastdeploy/backends/ort/ort_backend.h" #include "fastdeploy/runtime/backends/ort/ort_backend.h"
#endif #endif
#ifdef ENABLE_TRT_BACKEND #ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h" #include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
#endif #endif
#ifdef ENABLE_PADDLE_BACKEND #ifdef ENABLE_PADDLE_BACKEND
#include "fastdeploy/backends/paddle/paddle_backend.h" #include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
#endif #endif
#ifdef ENABLE_POROS_BACKEND #ifdef ENABLE_POROS_BACKEND
#include "fastdeploy/backends/poros/poros_backend.h" #include "fastdeploy/runtime/backends/poros/poros_backend.h"
#endif #endif
#ifdef ENABLE_OPENVINO_BACKEND #ifdef ENABLE_OPENVINO_BACKEND
#include "fastdeploy/backends/openvino/ov_backend.h" #include "fastdeploy/runtime/backends/openvino/ov_backend.h"
#endif #endif
#ifdef ENABLE_LITE_BACKEND #ifdef ENABLE_LITE_BACKEND
#include "fastdeploy/backends/lite/lite_backend.h" #include "fastdeploy/runtime/backends/lite/lite_backend.h"
#endif #endif
#ifdef ENABLE_RKNPU2_BACKEND #ifdef ENABLE_RKNPU2_BACKEND
#include "fastdeploy/backends/rknpu2/rknpu2_backend.h" #include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
#endif #endif
#ifdef ENABLE_SOPHGO_BACKEND #ifdef ENABLE_SOPHGO_BACKEND
#include "fastdeploy/backends/sophgo/sophgo_backend.h" #include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
#endif #endif
namespace fastdeploy { namespace fastdeploy {

View File

@@ -19,7 +19,7 @@
*/ */
#pragma once #pragma once
#include "fastdeploy/backends/backend.h" #include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/core/fd_tensor.h" #include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/runtime/runtime_option.h" #include "fastdeploy/runtime/runtime_option.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"

View File

@@ -18,127 +18,6 @@
namespace fastdeploy { namespace fastdeploy {
std::vector<Backend> GetAvailableBackends() {
std::vector<Backend> backends;
#ifdef ENABLE_ORT_BACKEND
backends.push_back(Backend::ORT);
#endif
#ifdef ENABLE_TRT_BACKEND
backends.push_back(Backend::TRT);
#endif
#ifdef ENABLE_PADDLE_BACKEND
backends.push_back(Backend::PDINFER);
#endif
#ifdef ENABLE_POROS_BACKEND
backends.push_back(Backend::POROS);
#endif
#ifdef ENABLE_OPENVINO_BACKEND
backends.push_back(Backend::OPENVINO);
#endif
#ifdef ENABLE_LITE_BACKEND
backends.push_back(Backend::LITE);
#endif
#ifdef ENABLE_RKNPU2_BACKEND
backends.push_back(Backend::RKNPU2);
#endif
#ifdef ENABLE_SOPHGO_BACKEND
backends.push_back(Backend::SOPHGOTPU);
#endif
return backends;
}
bool IsBackendAvailable(const Backend& backend) {
std::vector<Backend> backends = GetAvailableBackends();
for (size_t i = 0; i < backends.size(); ++i) {
if (backend == backends[i]) {
return true;
}
}
return false;
}
bool CheckModelFormat(const std::string& model_file,
const ModelFormat& model_format) {
if (model_format == ModelFormat::PADDLE) {
if (model_file.size() < 8 ||
model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
FDERROR << "With model format of ModelFormat::PADDLE, the model file "
"should ends with `.pdmodel`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::ONNX) {
if (model_file.size() < 5 ||
model_file.substr(model_file.size() - 5, 5) != ".onnx") {
FDERROR << "With model format of ModelFormat::ONNX, the model file "
"should ends with `.onnx`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::RKNN) {
if (model_file.size() < 5 ||
model_file.substr(model_file.size() - 5, 5) != ".rknn") {
FDERROR << "With model format of ModelFormat::RKNN, the model file "
"should ends with `.rknn`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::TORCHSCRIPT) {
if (model_file.size() < 3 ||
model_file.substr(model_file.size() - 3, 3) != ".pt") {
FDERROR
<< "With model format of ModelFormat::TORCHSCRIPT, the model file "
"should ends with `.pt`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::SOPHGO) {
if (model_file.size() < 7 ||
model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
"should ends with `.bmodel`, but now it's "
<< model_file << std::endl;
return false;
}
} else {
FDERROR
<< "Only support model format with frontend ModelFormat::PADDLE / "
"ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
<< std::endl;
return false;
}
return true;
}
ModelFormat GuessModelFormat(const std::string& model_file) {
if (model_file.size() > 8 &&
model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
FDINFO << "Model Format: PaddlePaddle." << std::endl;
return ModelFormat::PADDLE;
} else if (model_file.size() > 5 &&
model_file.substr(model_file.size() - 5, 5) == ".onnx") {
FDINFO << "Model Format: ONNX." << std::endl;
return ModelFormat::ONNX;
} else if (model_file.size() > 3 &&
model_file.substr(model_file.size() - 3, 3) == ".pt") {
FDINFO << "Model Format: Torchscript." << std::endl;
return ModelFormat::TORCHSCRIPT;
} else if (model_file.size() > 5 &&
model_file.substr(model_file.size() - 5, 5) == ".rknn") {
FDINFO << "Model Format: RKNN." << std::endl;
return ModelFormat::RKNN;
} else if (model_file.size() > 7 &&
model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
FDINFO << "Model Format: SOPHGO." << std::endl;
return ModelFormat::SOPHGO;
}
FDERROR << "Cannot guess which model format you are using, please set "
"RuntimeOption::model_format manually."
<< std::endl;
return ModelFormat::PADDLE;
}
void RuntimeOption::SetModelPath(const std::string& model_path, void RuntimeOption::SetModelPath(const std::string& model_path,
const std::string& params_path, const std::string& params_path,
const ModelFormat& format) { const ModelFormat& format) {

View File

@@ -24,31 +24,17 @@
#include <map> #include <map>
#include <vector> #include <vector>
#include "fastdeploy/runtime/enum_variables.h" #include "fastdeploy/runtime/enum_variables.h"
#include "fastdeploy/backends/lite/option.h" #include "fastdeploy/runtime/backends/lite/option.h"
#include "fastdeploy/backends/openvino/option.h" #include "fastdeploy/runtime/backends/openvino/option.h"
#include "fastdeploy/backends/ort/option.h" #include "fastdeploy/runtime/backends/ort/option.h"
#include "fastdeploy/backends/paddle/option.h" #include "fastdeploy/runtime/backends/paddle/option.h"
#include "fastdeploy/backends/poros/option.h" #include "fastdeploy/runtime/backends/poros/option.h"
#include "fastdeploy/backends/rknpu2/option.h" #include "fastdeploy/runtime/backends/rknpu2/option.h"
#include "fastdeploy/backends/sophgo/option.h" #include "fastdeploy/runtime/backends/sophgo/option.h"
#include "fastdeploy/backends/tensorrt/option.h" #include "fastdeploy/runtime/backends/tensorrt/option.h"
namespace fastdeploy { namespace fastdeploy {
/**
* @brief Get all the available inference backend in FastDeploy
*/
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
/**
* @brief Check if the inference backend available
*/
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
bool CheckModelFormat(const std::string& model_file,
const ModelFormat& model_format);
ModelFormat GuessModelFormat(const std::string& model_file);
/*! @brief Option object used when create a new Runtime object /*! @brief Option object used when create a new Runtime object
*/ */
struct FASTDEPLOY_DECL RuntimeOption { struct FASTDEPLOY_DECL RuntimeOption {