[Backend] Add KunlunXin XPU deploy support (#747)

* add xpu support

* fix docs

* update code

* update doc

* update code

* update yolov5

* update cmake

* add int64_t data support

* fix

* update download links

* add en doc

* update code

* update xpu options

* update doc

* update doc

* update doc

* update lib links

* update doc

* update code

* update lite xpu link

* update xpu lib

* update doc

* update en doc
This commit is contained in:
yeliang2258
2022-12-15 21:17:14 +08:00
committed by GitHub
parent 6e79df40d9
commit 5be839b322
39 changed files with 870 additions and 58 deletions

View File

@@ -43,24 +43,33 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
option_ = option;
std::vector<paddle::lite_api::Place> valid_places;
if (option_.enable_int8) {
valid_places.push_back(
if(option_.enable_xpu) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
<< "inference with int8 precision!" << std::endl;
}
if (option_.enable_fp16) {
paddle::lite_api::MobileConfig check_fp16_config;
// Determine whether the device supports the FP16
// instruction set (or whether it is an arm device
// of the armv8.2 architecture)
supported_fp16_ = check_fp16_config.check_fp16_valid();
if (supported_fp16_) {
if(option_.enable_xpu){
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
FDINFO << "Your device is supported fp16 ! Lite::Backend will "
<< "inference with fp16 precision!" << std::endl;
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
} else {
FDWARNING << "This device is not supported fp16, will skip fp16 option.";
paddle::lite_api::MobileConfig check_fp16_config;
// Determine whether the device supports the FP16
// instruction set (or whether it is an arm device
// of the armv8.2 architecture)
supported_fp16_ = check_fp16_config.check_fp16_valid();
if (supported_fp16_) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl;
} else {
FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
}
}
}
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
@@ -81,8 +90,24 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
valid_places.push_back(
if(option_.enable_xpu){
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
config_.set_xpu_dev_per_thread(option_.device_id);
config_.set_xpu_workspace_l3_size_per_thread(option_.xpu_l3_workspace_size);
config_.set_xpu_l3_cache_method(option_.xpu_l3_workspace_size, option_.xpu_locked);
config_.set_xpu_conv_autotune(option_.xpu_autotune, option_.xpu_autotune_file);
config_.set_xpu_multi_encoder_method(option_.xpu_precision, option_.xpu_adaptive_seqlen);
if (option_.xpu_enable_multi_stream) {
config_.enable_xpu_multi_stream();
}
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
}
config_.set_valid_places(valid_places);
if (option_.threads > 0) {
config_.set_threads(option_.threads);
@@ -160,7 +185,9 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
auto shape = tensor->shape();
info.shape.assign(shape.begin(), shape.end());
info.name = output_names[i];
info.dtype = LiteDataTypeToFD(tensor->precision());
if(!option_.enable_xpu){
info.dtype = LiteDataTypeToFD(tensor->precision());
}
outputs_desc_.emplace_back(info);
}
@@ -239,6 +266,9 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto tensor = predictor_->GetOutput(i);
if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){
outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
}
(*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
outputs_desc_[i].name);
memcpy((*outputs)[i].MutableData(), tensor->data<void>(),