mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-25 09:31:38 +08:00
fix conflicts for ascend
This commit is contained in:
@@ -43,24 +43,33 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
||||
option_ = option;
|
||||
std::vector<paddle::lite_api::Place> valid_places;
|
||||
if (option_.enable_int8) {
|
||||
valid_places.push_back(
|
||||
if(option_.enable_xpu) {
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
|
||||
} else {
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
|
||||
}
|
||||
FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
|
||||
<< "inference with int8 precision!" << std::endl;
|
||||
}
|
||||
if (option_.enable_fp16) {
|
||||
paddle::lite_api::MobileConfig check_fp16_config;
|
||||
// Determine whether the device supports the FP16
|
||||
// instruction set (or whether it is an arm device
|
||||
// of the armv8.2 architecture)
|
||||
supported_fp16_ = check_fp16_config.check_fp16_valid();
|
||||
if (supported_fp16_) {
|
||||
if(option_.enable_xpu){
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
|
||||
FDINFO << "Your device is supported fp16 ! Lite::Backend will "
|
||||
<< "inference with fp16 precision!" << std::endl;
|
||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
|
||||
} else {
|
||||
FDWARNING << "This device is not supported fp16, will skip fp16 option.";
|
||||
paddle::lite_api::MobileConfig check_fp16_config;
|
||||
// Determine whether the device supports the FP16
|
||||
// instruction set (or whether it is an arm device
|
||||
// of the armv8.2 architecture)
|
||||
supported_fp16_ = check_fp16_config.check_fp16_valid();
|
||||
if (supported_fp16_) {
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
|
||||
FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl;
|
||||
} else {
|
||||
FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
|
||||
@@ -90,9 +99,24 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
|
||||
}
|
||||
|
||||
valid_places.push_back(
|
||||
|
||||
if(option_.enable_xpu){
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
|
||||
config_.set_xpu_dev_per_thread(option_.device_id);
|
||||
config_.set_xpu_workspace_l3_size_per_thread(option_.xpu_l3_workspace_size);
|
||||
config_.set_xpu_l3_cache_method(option_.xpu_l3_workspace_size, option_.xpu_locked);
|
||||
config_.set_xpu_conv_autotune(option_.xpu_autotune, option_.xpu_autotune_file);
|
||||
config_.set_xpu_multi_encoder_method(option_.xpu_precision, option_.xpu_adaptive_seqlen);
|
||||
if (option_.xpu_enable_multi_stream) {
|
||||
config_.enable_xpu_multi_stream();
|
||||
}
|
||||
} else {
|
||||
valid_places.push_back(
|
||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
|
||||
}
|
||||
config_.set_valid_places(valid_places);
|
||||
if (option_.threads > 0) {
|
||||
config_.set_threads(option_.threads);
|
||||
@@ -170,7 +194,9 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
|
||||
auto shape = tensor->shape();
|
||||
info.shape.assign(shape.begin(), shape.end());
|
||||
info.name = output_names[i];
|
||||
info.dtype = LiteDataTypeToFD(tensor->precision());
|
||||
if(!option_.enable_xpu){
|
||||
info.dtype = LiteDataTypeToFD(tensor->precision());
|
||||
}
|
||||
outputs_desc_.emplace_back(info);
|
||||
}
|
||||
|
||||
@@ -216,28 +242,28 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
// Adjust dims only, allocate lazy.
|
||||
tensor->Resize(inputs[i].shape);
|
||||
if (inputs[i].dtype == FDDataType::FP32) {
|
||||
tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kARM>(
|
||||
tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>(
|
||||
reinterpret_cast<const float*>(const_cast<void*>(
|
||||
inputs[i].CpuData())));
|
||||
} else if (inputs[i].dtype == FDDataType::INT32) {
|
||||
tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kARM>(
|
||||
tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>(
|
||||
reinterpret_cast<const int*>(const_cast<void*>(
|
||||
inputs[i].CpuData())));
|
||||
} else if (inputs[i].dtype == FDDataType::INT8) {
|
||||
tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kARM>(
|
||||
tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>(
|
||||
reinterpret_cast<const int8_t*>(const_cast<void*>(
|
||||
inputs[i].CpuData())));
|
||||
} else if (inputs[i].dtype == FDDataType::UINT8) {
|
||||
tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kARM>(
|
||||
tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>(
|
||||
reinterpret_cast<const uint8_t*>(const_cast<void*>(
|
||||
inputs[i].CpuData())));
|
||||
} else if (inputs[i].dtype == FDDataType::INT64) {
|
||||
#ifdef __aarch64__
|
||||
tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kARM>(
|
||||
#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_ARM64))
|
||||
tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>(
|
||||
reinterpret_cast<const int64_t*>(const_cast<void*>(
|
||||
inputs[i].CpuData())));
|
||||
#else
|
||||
FDASSERT(false, "FDDataType::INT64 is not support for Arm v7 now!");
|
||||
FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!");
|
||||
#endif
|
||||
} else {
|
||||
FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype);
|
||||
@@ -249,6 +275,9 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
outputs->resize(outputs_desc_.size());
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
auto tensor = predictor_->GetOutput(i);
|
||||
if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){
|
||||
outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
|
||||
}
|
||||
(*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
|
||||
outputs_desc_[i].name);
|
||||
memcpy((*outputs)[i].MutableData(), tensor->data<void>(),
|
||||
|
||||
Reference in New Issue
Block a user