[Backend] Support onnxruntime DirectML inference. (#1304)

* Fix links in readme * Fix links in readme * Update PPOCRv2/v3 examples * Update auto compression configs * Add neww quantization support for paddleclas model * Update quantized Yolov6s model download link * Improve PPOCR comments * Add English doc for quantization * Fix PPOCR rec model bug * Add new paddleseg quantization support * Add new paddleseg quantization support * Add new paddleseg quantization support * Add new paddleseg quantization support * Add Ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Add ascend model list * Support DirectML in onnxruntime * Support onnxruntime DirectML * Support onnxruntime DirectML * Support onnxruntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Support OnnxRuntime DirectML * Remove DirectML vision model example * Imporve OnnxRuntime DirectML * Imporve OnnxRuntime DirectML * fix opencv cmake in Windows * recheck codestyle
2025-10-06 09:07:10 +08:00 · 2023-02-17 10:53:51 +08:00
parent efa46563f3
commit c38b7d4377
22 changed files with 393 additions and 60 deletions
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -1,3 +1,4 @@
+
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +14,7 @@
 // limitations under the License.

 #include "fastdeploy/runtime/backends/ort/ort_backend.h"
+
 #include "fastdeploy/core/float16.h"
 #include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h"
 #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
@@ -24,13 +26,12 @@

 #include <memory>

-
 namespace fastdeploy {

 std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
    std::vector<OrtCustomOp*>();

-void OrtBackend::BuildOption(const OrtBackendOption& option) {
+bool OrtBackend::BuildOption(const OrtBackendOption& option) {
  option_ = option;
  if (option.graph_optimization_level >= 0) {
    session_options_.SetGraphOptimizationLevel(
@@ -45,6 +46,53 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
  if (option.execution_mode >= 0) {
    session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
  }
+
+#ifdef WITH_DIRECTML
+  // If use DirectML
+  if (option.device == Device::DIRECTML) {
+    auto all_providers = Ort::GetAvailableProviders();
+    bool support_dml = false;
+    std::string providers_msg = "";
+    for (size_t i = 0; i < all_providers.size(); ++i) {
+      providers_msg = providers_msg + all_providers[i] + ", ";
+      if (all_providers[i] == "DmlExecutionProvider") {
+        support_dml = true;
+      }
+    }
+
+    if (!support_dml) {
+      FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
+                   "support DirectML, the available providers are "
+                << providers_msg << "will fallback to CPUExecutionProvider."
+                << "Please check if DirectML is installed successfully."
+                << std::endl;
+      option_.device = Device::CPU;
+    } else {
+      // Must set as below when use dml.
+      session_options_.DisableMemPattern();
+      session_options_.SetExecutionMode(ExecutionMode(0));
+
+      // DML session_option
+      OrtApi const& ortApi = Ort::GetApi();
+      const OrtDmlApi* ortDmlApi;
+      ortApi.GetExecutionProviderApi(
+          "DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ortDmlApi));
+      OrtStatus* onnx_dml_status =
+          ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options_,
+                                                               0);
+      if (onnx_dml_status != nullptr) {
+        FDERROR
+            << "DirectML is not support in your machine, the program will exit."
+            << std::endl;
+        ortApi.ReleaseStatus(onnx_dml_status);
+        return false;
+      }
+    }
+    return true;
+  }
+#endif
+
+  // CUDA
  if (option.device == Device::GPU) {
    auto all_providers = Ort::GetAvailableProviders();
    bool support_cuda = false;
@@ -70,11 +118,14 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
      }
      session_options_.AppendExecutionProvider_CUDA(cuda_options);
    }
+    return true;
  }
+  return true;
 }

 bool OrtBackend::Init(const RuntimeOption& option) {
-  if (option.device != Device::CPU && option.device != Device::GPU) {
+  if (option.device != Device::CPU && option.device != Device::GPU &&
+      option.device != Device::DIRECTML) {
    FDERROR
        << "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
        << option.device << "." << std::endl;
@@ -169,7 +220,11 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
    return false;
  }

-  BuildOption(option);
+  if (!BuildOption(option)) {
+    FDERROR << "Create Ort option fail." << std::endl;
+    return false;
+  }
+
  InitCustomOperators();
  session_ = {env_, model_file.data(), model_file.size(), session_options_};
  binding_ = std::make_shared<Ort::IoBinding>(session_);
@@ -355,4 +410,4 @@ void OrtBackend::InitCustomOperators() {
 #endif
 }

-}  // namespace fastdeploy
+}  // namespace fastdeploy