diff --git a/benchmark/benchmark_ppcls.py b/benchmark/benchmark_ppcls.py
index 039a07cc9..8eeeb8cfc 100755
--- a/benchmark/benchmark_ppcls.py
+++ b/benchmark/benchmark_ppcls.py
@@ -16,9 +16,6 @@ import fastdeploy as fd
 import cv2
 import os
 import numpy as np
-import pynvml
-import psutil
-import GPUtil
 import time
 
 
@@ -112,6 +109,8 @@ def build_option(args):
 
 
 def get_current_memory_mb(gpu_id=None):
+    import pynvml
+    import psutil
     pid = os.getpid()
     p = psutil.Process(pid)
     info = p.memory_full_info()
@@ -126,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
 
 
 def get_current_gputil(gpu_id):
+    import GPUtil
     GPUs = GPUtil.getGPUs()
     gpu_load = GPUs[gpu_id].load
     return gpu_load
diff --git a/benchmark/benchmark_ppdet.py b/benchmark/benchmark_ppdet.py
index 6cabc4d4e..6d08aafb8 100755
--- a/benchmark/benchmark_ppdet.py
+++ b/benchmark/benchmark_ppdet.py
@@ -16,11 +16,6 @@ import fastdeploy as fd
 import cv2
 import os
 import numpy as np
-import datetime
-import json
-import pynvml
-import psutil
-import GPUtil
 import time
 
 
@@ -114,6 +109,8 @@ def build_option(args):
 
 
 def get_current_memory_mb(gpu_id=None):
+    import pynvml
+    import psutil
     pid = os.getpid()
     p = psutil.Process(pid)
     info = p.memory_full_info()
@@ -128,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
 
 
 def get_current_gputil(gpu_id):
+    import GPUtil
     GPUs = GPUtil.getGPUs()
     gpu_load = GPUs[gpu_id].load
     return gpu_load
diff --git a/benchmark/benchmark_ppseg.py b/benchmark/benchmark_ppseg.py
index ef57e3715..7d9df9f07 100755
--- a/benchmark/benchmark_ppseg.py
+++ b/benchmark/benchmark_ppseg.py
@@ -16,9 +16,6 @@ import fastdeploy as fd
 import cv2
 import os
 import numpy as np
-import pynvml
-import psutil
-import GPUtil
 import time
 
 
@@ -112,6 +109,8 @@ def build_option(args):
 
 
 def get_current_memory_mb(gpu_id=None):
+    import pynvml
+    import psutil
     pid = os.getpid()
     p = psutil.Process(pid)
     info = p.memory_full_info()
@@ -126,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
 
 
 def get_current_gputil(gpu_id):
+    import GPUtil
     GPUs = GPUtil.getGPUs()
     gpu_load = GPUs[gpu_id].load
     return gpu_load
diff --git a/benchmark/benchmark_yolo.py b/benchmark/benchmark_yolo.py
index aa6927c83..dd63cefb6 100755
--- a/benchmark/benchmark_yolo.py
+++ b/benchmark/benchmark_yolo.py
@@ -16,11 +16,6 @@ import fastdeploy as fd
 import cv2
 import os
 import numpy as np
-import datetime
-import json
-import pynvml
-import psutil
-import GPUtil
 import time
 
 
@@ -114,6 +109,8 @@ def build_option(args):
 
 
 def get_current_memory_mb(gpu_id=None):
+    import pynvml
+    import psutil
     pid = os.getpid()
     p = psutil.Process(pid)
     info = p.memory_full_info()
@@ -128,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
 
 
 def get_current_gputil(gpu_id):
+    import GPUtil
     GPUs = GPUtil.getGPUs()
     gpu_load = GPUs[gpu_id].load
     return gpu_load
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
index 9d78d39fe..24ce15ab7 100644
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@@ -1,4 +1 @@
 numpy
-pynvml
-psutil
-GPUtil
diff --git a/examples/runtime/README.md b/examples/runtime/README.md
index b434bc99e..2f739b860 100755
--- a/examples/runtime/README.md
+++ b/examples/runtime/README.md
@@ -1,6 +1,6 @@
 # FastDeploy Runtime examples
 
-FastDeploy Runtime C++ 推理示例如下
+FastDeploy Runtime 推理示例如下
 
 ## Python 示例
 
@@ -12,6 +12,7 @@ FastDeploy Runtime C++ 推理示例如下
 | python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU)  |
 | python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) |
 | python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) |
+| python/infer_onnx_onnxruntime.py | Python | Deploy ONNX model with ONNX Runtime(CPU/GPU) |
 
 ## C++ 示例
 
@@ -23,6 +24,7 @@ FastDeploy Runtime C++ 推理示例如下
 | cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
 | cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) |
 | cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) |
+| cpp/infer_onnx_onnxruntime.cc | C++ | Deploy ONNX model with ONNX Runtime(CPU/GPU) |
 
 ## 详细部署文档
 
diff --git a/examples/runtime/cpp/infer_onnx_onnxruntime.cc b/examples/runtime/cpp/infer_onnx_onnxruntime.cc
new file mode 100644
index 000000000..4c27c1f65
--- /dev/null
+++ b/examples/runtime/cpp/infer_onnx_onnxruntime.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  std::string model_file = "mobilenetv2.onnx";
+
+  // setup option
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
+  runtime_option.UseOrtBackend();
+  runtime_option.SetCpuThreadNum(12);
+
+  // **** GPU ****
+  // To use GPU, use the following commented code
+  // runtime_option.UseGpu(0);
+
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.resize(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
+  
+  //get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
index d8d036a03..612966d73 100644
--- a/examples/runtime/cpp/infer_paddle_onnxruntime.cc
+++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
@@ -25,6 +25,11 @@ int main(int argc, char* argv[]) {
   runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
   runtime_option.UseOrtBackend();
   runtime_option.SetCpuThreadNum(12);
+
+  // **** GPU ****
+  // To use GPU, use the following commented code
+  // runtime_option.UseGpu(0);
+
   // init runtime
   std::unique_ptr<fd::Runtime> runtime =
       std::unique_ptr<fd::Runtime>(new fd::Runtime());
diff --git a/examples/runtime/python/infer_onnx_onnxruntime.py b/examples/runtime/python/infer_onnx_onnxruntime.py
new file mode 100644
index 000000000..ccb3ce5ec
--- /dev/null
+++ b/examples/runtime/python/infer_onnx_onnxruntime.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+from fastdeploy import ModelFormat
+import numpy as np
+
+# 下载模型并解压
+model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.onnx"
+fd.download(model_url, path=".")
+
+option = fd.RuntimeOption()
+
+option.set_model_path("mobilenetv2.onnx", model_format=ModelFormat.ONNX)
+
+# **** CPU 配置 ****
+option.use_cpu()
+option.use_ort_backend()
+option.set_cpu_thread_num(12)
+
+# **** GPU 配置 ****
+# 如需使用GPU，使用如下注释代码
+# option.use_gpu(0)
+
+# 初始化构造runtime
+runtime = fd.Runtime(option)
+
+# 获取模型输入名
+input_name = runtime.get_input_info(0).name
+
+# 构造随机数据进行推理
+results = runtime.infer({
+    input_name: np.random.rand(1, 3, 224, 224).astype("float32")
+})
+
+print(results[0].shape)