[Other] Refine Paddle Lite backend (#1202)

* Refine Paddle Lite backend

* fix error

* Fix compile error

* Fix build error

* modify cpu configure

* fix error

* Fix comment

* Fix error
This commit is contained in:
Jason
2023-02-02 17:40:03 +08:00
committed by GitHub
parent 56e4e56abc
commit 3c5824c862
5 changed files with 258 additions and 198 deletions

View File

@@ -0,0 +1,159 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/runtime/backends/lite/lite_backend.h"
// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
// When compiling the FastDeploy dynamic library, namely,
// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite
// static library, you need to include the fake registration
// codes of Paddle Lite. When you compile the FastDeploy static
// library and depends on the Paddle Lite static library,
// WITH_STATIC_LIB=ON, you do not need to include the fake
// registration codes for Paddle Lite, but wait until you
// use the FastDeploy static library.
#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB)))
#warning You are compiling the FastDeploy dynamic library with \
Paddle Lite static lib We will automatically add some registration \
codes for ops, kernels and passes for Paddle Lite.
#include "paddle_use_kernels.h" // NOLINT
#include "paddle_use_ops.h" // NOLINT
#include "paddle_use_passes.h" // NOLINT
#endif
#include <cstring>
namespace fastdeploy {
#if defined(__arm__) || defined(__aarch64__)
#define FD_LITE_HOST TARGET(kARM)
#elif defined(__x86_64__)
#define FD_LITE_HOST TARGET(kX86)
#endif
std::vector<paddle::lite_api::Place> GetPlacesForCpu(
const LiteBackendOption& option) {
std::vector<paddle::lite_api::Place> valid_places;
valid_places.push_back(
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)});
if (option.enable_fp16) {
paddle::lite_api::MobileConfig check_fp16_config;
if (check_fp16_config.check_fp16_valid()) {
valid_places.push_back(
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)});
} else {
FDWARNING << "Current CPU doesn't support float16 precision, will "
"fallback to float32."
<< std::endl;
}
}
valid_places.push_back(
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)});
return valid_places;
}
void LiteBackend::ConfigureCpu(const LiteBackendOption& option) {
config_.set_valid_places(GetPlacesForCpu(option));
}
void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
std::vector<paddle::lite_api::Place> valid_places;
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
if (option.enable_fp16) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
}
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
config_.set_xpu_dev_per_thread(option.device_id);
config_.set_xpu_workspace_l3_size_per_thread(
option.kunlunxin_l3_workspace_size);
config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size,
option.kunlunxin_locked);
config_.set_xpu_conv_autotune(option.kunlunxin_autotune,
option.kunlunxin_autotune_file);
config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
option.kunlunxin_adaptive_seqlen);
if (option.kunlunxin_enable_multi_stream) {
config_.enable_xpu_multi_stream();
}
auto cpu_places = GetPlacesForCpu(option);
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
config_.set_valid_places(valid_places);
}
void LiteBackend::ConfigureTimvx(const LiteBackendOption& option) {
config_.set_nnadapter_device_names({"verisilicon_timvx"});
std::vector<paddle::lite_api::Place> valid_places;
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
auto cpu_places = GetPlacesForCpu(option);
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
config_.set_valid_places(valid_places);
ConfigureNNAdapter(option);
}
void LiteBackend::ConfigureAscend(const LiteBackendOption& option) {
config_.set_nnadapter_device_names({"huawei_ascend_npu"});
std::vector<paddle::lite_api::Place> valid_places;
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
auto cpu_places = GetPlacesForCpu(option);
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
config_.set_valid_places(valid_places);
ConfigureNNAdapter(option);
}
void LiteBackend::ConfigureNNAdapter(const LiteBackendOption& option) {
if (!option.nnadapter_subgraph_partition_config_path.empty()) {
std::vector<char> nnadapter_subgraph_partition_config_buffer;
if (ReadFile(option.nnadapter_subgraph_partition_config_path,
&nnadapter_subgraph_partition_config_buffer, false)) {
if (!nnadapter_subgraph_partition_config_buffer.empty()) {
std::string nnadapter_subgraph_partition_config_string(
nnadapter_subgraph_partition_config_buffer.data(),
nnadapter_subgraph_partition_config_buffer.size());
config_.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_string);
}
}
}
if (!option.nnadapter_context_properties.empty()) {
config_.set_nnadapter_context_properties(
option.nnadapter_context_properties);
}
if (!option.nnadapter_model_cache_dir.empty()) {
config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir);
}
if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) {
config_.set_nnadapter_mixed_precision_quantization_config_path(
option.nnadapter_mixed_precision_quantization_config_path);
}
if (!option.nnadapter_subgraph_partition_config_path.empty()) {
config_.set_nnadapter_subgraph_partition_config_path(
option.nnadapter_subgraph_partition_config_path);
}
config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info);
}
} // namespace fastdeploy

View File

@@ -14,164 +14,41 @@
#include "fastdeploy/runtime/backends/lite/lite_backend.h"
// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
// When compiling the FastDeploy dynamic library, namely,
// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite
// When compiling the FastDeploy dynamic library, namely,
// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite
// static library, you need to include the fake registration
// codes of Paddle Lite. When you compile the FastDeploy static
// library and depends on the Paddle Lite static library,
// WITH_STATIC_LIB=ON, you do not need to include the fake
// registration codes for Paddle Lite, but wait until you
// codes of Paddle Lite. When you compile the FastDeploy static
// library and depends on the Paddle Lite static library,
// WITH_STATIC_LIB=ON, you do not need to include the fake
// registration codes for Paddle Lite, but wait until you
// use the FastDeploy static library.
#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB)))
#warning You are compiling the FastDeploy dynamic library with \
Paddle Lite static lib We will automatically add some registration \
codes for ops, kernels and passes for Paddle Lite.
#include "paddle_use_ops.h" // NOLINT
#include "paddle_use_kernels.h" // NOLINT
#include "paddle_use_passes.h" // NOLINT
#include "paddle_use_kernels.h" // NOLINT
#include "paddle_use_ops.h" // NOLINT
#include "paddle_use_passes.h" // NOLINT
#endif
#include <cstring>
namespace fastdeploy {
// Convert data type from paddle lite to fastdeploy
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) {
if (dtype == paddle::lite_api::PrecisionType::kFloat) {
return FDDataType::FP32;
} else if (dtype == paddle::lite_api::PrecisionType::kInt8) {
return FDDataType::INT8;
} else if (dtype == paddle::lite_api::PrecisionType::kInt32) {
return FDDataType::INT32;
} else if (dtype == paddle::lite_api::PrecisionType::kInt64) {
return FDDataType::INT64;
} else if (dtype == paddle::lite_api::PrecisionType::kInt16) {
return FDDataType::INT16;
} else if (dtype == paddle::lite_api::PrecisionType::kUInt8) {
return FDDataType::UINT8;
} else if (dtype == paddle::lite_api::PrecisionType::kFP64) {
return FDDataType::FP64;
}
FDASSERT(false, "Unexpected data type of %d.", dtype);
return FDDataType::FP32;
}
void LiteBackend::BuildOption(const LiteBackendOption& option) {
option_ = option;
std::vector<paddle::lite_api::Place> valid_places;
if (option_.enable_int8) {
if (option_.enable_kunlunxin) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
<< "inference with int8 precision!" << std::endl;
if (option_.device == Device::CPU) {
ConfigureCpu(option_);
} else if (option_.device == Device::TIMVX) {
ConfigureTimvx(option_);
} else if (option_.device == Device::KUNLUNXIN) {
ConfigureKunlunXin(option_);
} else if (option_.device == Device::ASCEND) {
ConfigureAscend(option_);
}
if (option_.enable_fp16) {
if (option_.enable_kunlunxin) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
} else {
paddle::lite_api::MobileConfig check_fp16_config;
// Determine whether the device supports the FP16
// instruction set (or whether it is an arm device
// of the armv8.2 architecture)
supported_fp16_ = check_fp16_config.check_fp16_valid();
if (supported_fp16_) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
FDINFO << "The device supports FP16, Lite::Backend will inference with "
"FP16 precision."
<< std::endl;
} else {
FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
}
}
}
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
std::vector<char> nnadapter_subgraph_partition_config_buffer;
if (ReadFile(option_.nnadapter_subgraph_partition_config_path,
&nnadapter_subgraph_partition_config_buffer, false)) {
if (!nnadapter_subgraph_partition_config_buffer.empty()) {
std::string nnadapter_subgraph_partition_config_string(
nnadapter_subgraph_partition_config_buffer.data(),
nnadapter_subgraph_partition_config_buffer.size());
config_.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_string);
}
}
}
if (option_.enable_timvx) {
config_.set_nnadapter_device_names({"verisilicon_timvx"});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
if (option_.enable_ascend) {
if (option_.nnadapter_device_names.empty()) {
config_.set_nnadapter_device_names({"huawei_ascend_npu"});
} else {
config_.set_nnadapter_device_names(option_.nnadapter_device_names);
}
if (!option_.nnadapter_context_properties.empty()) {
config_.set_nnadapter_context_properties(
option_.nnadapter_context_properties);
}
if (!option_.nnadapter_model_cache_dir.empty()) {
config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
}
if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) {
config_.set_nnadapter_mixed_precision_quantization_config_path(
option_.nnadapter_mixed_precision_quantization_config_path);
}
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
config_.set_nnadapter_subgraph_partition_config_path(
option_.nnadapter_subgraph_partition_config_path);
}
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
if (option_.enable_kunlunxin) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
config_.set_xpu_dev_per_thread(option_.device_id);
config_.set_xpu_workspace_l3_size_per_thread(
option_.kunlunxin_l3_workspace_size);
config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size,
option_.kunlunxin_locked);
config_.set_xpu_conv_autotune(option_.kunlunxin_autotune,
option_.kunlunxin_autotune_file);
config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision,
option_.kunlunxin_adaptive_seqlen);
if (option_.kunlunxin_enable_multi_stream) {
config_.enable_xpu_multi_stream();
}
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
}
config_.set_valid_places(valid_places);
if (option_.threads > 0) {
config_.set_threads(option_.threads);
if (option_.cpu_threads > 0) {
config_.set_threads(option_.cpu_threads);
}
if (option_.power_mode > 0) {
config_.set_power_mode(
@@ -179,29 +56,6 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
}
}
bool LiteBackend::ReadFile(const std::string& filename,
std::vector<char>* contents, const bool binary) {
FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
if (!fp) {
FDERROR << "Cannot open file " << filename << "." << std::endl;
return false;
}
fseek(fp, 0, SEEK_END);
size_t size = ftell(fp);
fseek(fp, 0, SEEK_SET);
contents->clear();
contents->resize(size);
size_t offset = 0;
char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
while (offset < size) {
size_t already_read = fread(ptr, 1, size - offset, fp);
offset += already_read;
ptr += already_read;
}
fclose(fp);
return true;
}
bool LiteBackend::InitFromPaddle(const std::string& model_file,
const std::string& params_file,
const LiteBackendOption& option) {
@@ -246,7 +100,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
auto shape = tensor->shape();
info.shape.assign(shape.begin(), shape.end());
info.name = output_names[i];
if (!option_.enable_kunlunxin) {
if (!option_.device == Device::KUNLUNXIN) {
info.dtype = LiteDataTypeToFD(tensor->precision());
}
outputs_desc_.emplace_back(info);
@@ -337,4 +191,49 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}
bool ReadFile(const std::string& filename, std::vector<char>* contents,
bool binary) {
FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
if (!fp) {
FDERROR << "Cannot open file " << filename << "." << std::endl;
return false;
}
fseek(fp, 0, SEEK_END);
size_t size = ftell(fp);
fseek(fp, 0, SEEK_SET);
contents->clear();
contents->resize(size);
size_t offset = 0;
char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
while (offset < size) {
size_t already_read = fread(ptr, 1, size - offset, fp);
offset += already_read;
ptr += already_read;
}
fclose(fp);
return true;
}
// Convert data type from paddle lite to fastdeploy
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) {
if (dtype == paddle::lite_api::PrecisionType::kFloat) {
return FDDataType::FP32;
} else if (dtype == paddle::lite_api::PrecisionType::kInt8) {
return FDDataType::INT8;
} else if (dtype == paddle::lite_api::PrecisionType::kInt32) {
return FDDataType::INT32;
} else if (dtype == paddle::lite_api::PrecisionType::kInt64) {
return FDDataType::INT64;
} else if (dtype == paddle::lite_api::PrecisionType::kInt16) {
return FDDataType::INT16;
} else if (dtype == paddle::lite_api::PrecisionType::kUInt8) {
return FDDataType::UINT8;
} else if (dtype == paddle::lite_api::PrecisionType::kFP64) {
return FDDataType::FP64;
}
FDASSERT(false, "Unexpected data type of %s.",
paddle::lite_api::PrecisionToStr(dtype).c_str());
return FDDataType::FP32;
}
} // namespace fastdeploy

View File

@@ -19,13 +19,12 @@
#include <string>
#include <vector>
#include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/runtime/backends/lite/option.h"
#include "paddle_api.h" // NOLINT
#include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/runtime/backends/lite/option.h"
namespace fastdeploy {
// Convert data type from paddle lite to fastdeploy
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
class LiteBackend : public BaseBackend {
public:
@@ -51,15 +50,26 @@ class LiteBackend : public BaseBackend {
std::vector<TensorInfo> GetOutputInfos() override;
private:
void ConfigureCpu(const LiteBackendOption& option);
void ConfigureTimvx(const LiteBackendOption& option);
void ConfigureAscend(const LiteBackendOption& option);
void ConfigureKunlunXin(const LiteBackendOption& option);
void ConfigureNNAdapter(const LiteBackendOption& option);
paddle::lite_api::CxxConfig config_;
std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
std::vector<TensorInfo> inputs_desc_;
std::vector<TensorInfo> outputs_desc_;
std::map<std::string, int> inputs_order_;
LiteBackendOption option_;
bool supported_fp16_ = false;
bool ReadFile(const std::string& filename,
std::vector<char>* contents,
const bool binary = true);
};
// Convert data type from paddle lite to fastdeploy
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
// Helper function to read file
bool ReadFile(const std::string& filename,
std::vector<char>* contents,
bool binary = true);
} // namespace fastdeploy

View File

@@ -17,7 +17,7 @@
#include "fastdeploy/core/fd_type.h"
// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
#if (defined(WITH_LITE_STATIC) && defined(WITH_STATIC_LIB))
// Whether to output some warning messages when using the
// Whether to output some warning messages when using the
// FastDepoy static library, default OFF. These messages
// are only reserve for debugging.
#if defined(WITH_STATIC_WARNING)
@@ -36,7 +36,7 @@ ops, kernels and passes for Paddle Lite.
#include <memory>
#include <string>
#include <vector>
#include <map>
#include <map>
namespace fastdeploy {
@@ -51,24 +51,19 @@ enum LitePowerMode {
};
struct LiteBackendOption {
// cpu num threads
int threads = 1;
// lite power mode
// 0: LITE_POWER_HIGH
// 1: LITE_POWER_LOW
// 2: LITE_POWER_FULL
// 3: LITE_POWER_NO_BIND
// 4: LITE_POWER_RAND_HIGH
// 5: LITE_POWER_RAND_LOW
int power_mode = 3;
// enable fp16
/// Paddle Lite power mode for mobile device.
LitePowerMode power_mode = LITE_POWER_NO_BIND;
/// Number of threads while use CPU
int cpu_threads = 1;
/// Enable use half precision
bool enable_fp16 = false;
// enable int8
/// Enable use int8 precision for quantized model
bool enable_int8 = false;
Device device = Device::CPU;
// optimized model dir for CxxConfig
std::string optimized_model_dir = "";
// TODO(qiuyanjun): support more options for lite backend.
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
std::string nnadapter_subgraph_partition_config_path = "";
std::string nnadapter_subgraph_partition_config_buffer = "";
std::string nnadapter_context_properties = "";
@@ -77,9 +72,6 @@ struct LiteBackendOption {
std::map<std::string, std::vector<std::vector<int64_t>>>
nnadapter_dynamic_shape_info = {{"", {{0}}}};
std::vector<std::string> nnadapter_device_names = {};
bool enable_timvx = false;
bool enable_ascend = false;
bool enable_kunlunxin = false;
int device_id = 0;
int kunlunxin_l3_workspace_size = 0xfffc00;
bool kunlunxin_locked = false;

View File

@@ -58,7 +58,7 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
void RuntimeOption::UseTimVX() {
device = Device::TIMVX;
paddle_lite_option.enable_timvx = true;
paddle_lite_option.device = device;
}
void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
@@ -68,7 +68,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
bool adaptive_seqlen,
bool enable_multi_stream) {
device = Device::KUNLUNXIN;
paddle_lite_option.enable_kunlunxin = true;
paddle_lite_option.device = device;
paddle_lite_option.device_id = kunlunxin_id;
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
paddle_lite_option.kunlunxin_locked = locked;
@@ -81,7 +81,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
void RuntimeOption::UseAscend() {
device = Device::ASCEND;
paddle_lite_option.enable_ascend = true;
paddle_lite_option.device = device;
}
void RuntimeOption::UseSophgo() {
@@ -96,7 +96,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
void RuntimeOption::SetCpuThreadNum(int thread_num) {
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
cpu_thread_num = thread_num;
paddle_lite_option.threads = thread_num;
paddle_lite_option.cpu_threads = thread_num;
ort_option.intra_op_num_threads = thread_num;
}