[Backend] Support onnxruntime DirectML inference. (#1304)

* Fix links in readme * Fix links in readme * Update PPOCRv2/v3 examples * Update auto compression configs * Add neww quantization support for paddleclas model * Update quantized Yolov6s model download link * Improve PPOCR comments * Add English doc for quantization * Fix PPOCR rec model bug * Add new paddleseg quantization support * Add new paddleseg quantization support * Add new paddleseg quantization support * Add new paddleseg quantization support * Add Ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Support DirectML in onnxruntime * Support onnxruntime DirectML * Support onnxruntime DirectML * Support onnxruntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Remove DirectML vision model example * Imporve OnnxRuntime DirectML * Imporve OnnxRuntime DirectML * fix opencv cmake in Windows * recheck codestyle
2025-10-07 17:41:52 +08:00 · 2023-02-17 10:53:51 +08:00
parent efa46563f3
commit c38b7d4377
22 changed files with 393 additions and 60 deletions
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -41,20 +41,19 @@ bool CheckBackendSupported(const std::vector<Backend>& backends,
  return false;
 }

-bool FastDeployModel::IsSupported(const std::vector<Backend>& backends, 
+bool FastDeployModel::IsSupported(const std::vector<Backend>& backends,
                                  Backend backend) {
 #ifdef ENABLE_BENCHMARK
  if (runtime_option.benchmark_option.enable_profile) {
-    FDWARNING << "In benchmark mode, we don't check to see if " 
-              << "the backend [" << backend 
-              << "] is supported for current model!"
-              << std::endl;
+    FDWARNING << "In benchmark mode, we don't check to see if "
+              << "the backend [" << backend
+              << "] is supported for current model!" << std::endl;
    return true;
  }
-  return CheckBackendSupported(backends, backend);  
-#else  
  return CheckBackendSupported(backends, backend);
-#endif  
+#else
+  return CheckBackendSupported(backends, backend);
+#endif
 }

 bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
@@ -70,6 +69,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
  bool use_timvx = (runtime_option.device == Device::TIMVX);
  bool use_ascend = (runtime_option.device == Device::ASCEND);
+  bool use_directml = (runtime_option.device == Device::DIRECTML);
  bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN);

  if (use_gpu) {
@@ -107,6 +107,13 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
              << " is not supported." << std::endl;
      return false;
    }
+  } else if (use_directml) {
+    if (!IsSupported(valid_directml_backends, runtime_option.backend)) {
+      FDERROR << "The valid directml backends of model " << ModelName()
+              << " are " << Str(valid_directml_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
  } else if (use_kunlunxin) {
    if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
      FDERROR << "The valid kunlunxin backends of model " << ModelName()
@@ -155,6 +162,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return CreateTimVXBackend();
  } else if (runtime_option.device == Device::ASCEND) {
    return CreateASCENDBackend();
+  } else if (runtime_option.device == Device::DIRECTML) {
+    return CreateDirectMLBackend();
  } else if (runtime_option.device == Device::KUNLUNXIN) {
    return CreateKunlunXinBackend();
  } else if (runtime_option.device == Device::SOPHGOTPUD) {
@@ -168,8 +177,9 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return false;
 #endif
  }
-  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
-          << std::endl;
+  FDERROR
+      << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
+      << std::endl;
  return false;
 }

@@ -350,6 +360,30 @@ bool FastDeployModel::CreateASCENDBackend() {
  return false;
 }

+bool FastDeployModel::CreateDirectMLBackend() {
+  if (valid_directml_backends.size() == 0) {
+    FDERROR << "There's no valid directml backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_directml_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_directml_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_directml_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid directml backend for model: " << ModelName()
+          << std::endl;
+  return false;
+}
+
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
@@ -384,13 +418,13 @@ bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
    tc.End();
    if (time_of_runtime_.size() > 50000) {
      FDWARNING << "There are already 50000 records of runtime, will force to "
-                    "disable record time of runtime now."
+                   "disable record time of runtime now."
                << std::endl;
      enable_record_time_of_runtime_ = false;
    }
    time_of_runtime_.push_back(tc.Duration());
  }
-  
+
  return ret;
 }

@@ -434,7 +468,7 @@ std::map<std::string, float> FastDeployModel::PrintStatisInfoOfRuntime() {
  statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
  statis_info_of_runtime_dict["avg_time"] = avg_time;
  statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
-  
+
  return statis_info_of_runtime_dict;
 }
 }  // namespace fastdeploy