[Backend] Support onnxruntime DirectML inference. (#1304)

* Fix links in readme

* Fix links in readme

* Update PPOCRv2/v3 examples

* Update auto compression configs

* Add neww quantization  support for paddleclas model

* Update quantized Yolov6s model download link

* Improve PPOCR comments

* Add English doc for quantization

* Fix PPOCR rec model bug

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add Ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Support DirectML in onnxruntime

* Support onnxruntime DirectML

* Support onnxruntime DirectML

* Support onnxruntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Remove DirectML vision model example

* Imporve OnnxRuntime DirectML

* Imporve OnnxRuntime DirectML

* fix opencv cmake in Windows

* recheck codestyle
This commit is contained in:
yunyaoXYY
2023-02-17 10:53:51 +08:00
committed by GitHub
parent efa46563f3
commit c38b7d4377
22 changed files with 393 additions and 60 deletions

View File

@@ -41,20 +41,19 @@ bool CheckBackendSupported(const std::vector<Backend>& backends,
return false;
}
bool FastDeployModel::IsSupported(const std::vector<Backend>& backends,
bool FastDeployModel::IsSupported(const std::vector<Backend>& backends,
Backend backend) {
#ifdef ENABLE_BENCHMARK
if (runtime_option.benchmark_option.enable_profile) {
FDWARNING << "In benchmark mode, we don't check to see if "
<< "the backend [" << backend
<< "] is supported for current model!"
<< std::endl;
FDWARNING << "In benchmark mode, we don't check to see if "
<< "the backend [" << backend
<< "] is supported for current model!" << std::endl;
return true;
}
return CheckBackendSupported(backends, backend);
#else
return CheckBackendSupported(backends, backend);
#endif
#else
return CheckBackendSupported(backends, backend);
#endif
}
bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
@@ -70,6 +69,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
bool use_timvx = (runtime_option.device == Device::TIMVX);
bool use_ascend = (runtime_option.device == Device::ASCEND);
bool use_directml = (runtime_option.device == Device::DIRECTML);
bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN);
if (use_gpu) {
@@ -107,6 +107,13 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
<< " is not supported." << std::endl;
return false;
}
} else if (use_directml) {
if (!IsSupported(valid_directml_backends, runtime_option.backend)) {
FDERROR << "The valid directml backends of model " << ModelName()
<< " are " << Str(valid_directml_backends) << ", "
<< runtime_option.backend << " is not supported." << std::endl;
return false;
}
} else if (use_kunlunxin) {
if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
FDERROR << "The valid kunlunxin backends of model " << ModelName()
@@ -155,6 +162,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
return CreateTimVXBackend();
} else if (runtime_option.device == Device::ASCEND) {
return CreateASCENDBackend();
} else if (runtime_option.device == Device::DIRECTML) {
return CreateDirectMLBackend();
} else if (runtime_option.device == Device::KUNLUNXIN) {
return CreateKunlunXinBackend();
} else if (runtime_option.device == Device::SOPHGOTPUD) {
@@ -168,8 +177,9 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
return false;
#endif
}
FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
<< std::endl;
FDERROR
<< "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
<< std::endl;
return false;
}
@@ -350,6 +360,30 @@ bool FastDeployModel::CreateASCENDBackend() {
return false;
}
bool FastDeployModel::CreateDirectMLBackend() {
if (valid_directml_backends.size() == 0) {
FDERROR << "There's no valid directml backends for model: " << ModelName()
<< std::endl;
return false;
}
for (size_t i = 0; i < valid_directml_backends.size(); ++i) {
if (!IsBackendAvailable(valid_directml_backends[i])) {
continue;
}
runtime_option.backend = valid_directml_backends[i];
runtime_ = std::unique_ptr<Runtime>(new Runtime());
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
FDERROR << "Found no valid directml backend for model: " << ModelName()
<< std::endl;
return false;
}
bool FastDeployModel::CreateIpuBackend() {
if (valid_ipu_backends.size() == 0) {
FDERROR << "There's no valid ipu backends for model: " << ModelName()
@@ -384,13 +418,13 @@ bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
tc.End();
if (time_of_runtime_.size() > 50000) {
FDWARNING << "There are already 50000 records of runtime, will force to "
"disable record time of runtime now."
"disable record time of runtime now."
<< std::endl;
enable_record_time_of_runtime_ = false;
}
time_of_runtime_.push_back(tc.Duration());
}
return ret;
}
@@ -434,7 +468,7 @@ std::map<std::string, float> FastDeployModel::PrintStatisInfoOfRuntime() {
statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
statis_info_of_runtime_dict["avg_time"] = avg_time;
statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
return statis_info_of_runtime_dict;
}
} // namespace fastdeploy