diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
index c79e679c3..23e1ccf79 100755
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -10,11 +10,14 @@ include_directories(${FASTDEPLOY_INCS})
 
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
 add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
+add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
 endif()
diff --git a/benchmark/cpp/benchmark_ppcls.cc b/benchmark/cpp/benchmark_ppcls.cc
new file mode 100755
index 000000000..a62fcf80f
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppcls.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+int main(int argc, char* argv[]) {
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  // Set max_batch_size 1 for best performance
+  option.trt_option.max_batch_size = 1;
+  auto model_file = FLAGS_model + sep + "inference.pdmodel";
+  auto params_file = FLAGS_model + sep + "inference.pdiparams";
+  auto config_file = FLAGS_model + sep + "inference_cls.yaml";
+  auto model_ppcls = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+  fastdeploy::vision::ClassifyResult res;
+  BENCHMARK_MODEL(model_ppcls, model_ppcls.Predict(im, &res))
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
index 5541696d5..b93121f54 100755
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,20 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "macros.h"
 #include "flags.h"
+#include "macros.h"
 #include "option.h"
 
 int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option)) {
-    PrintUsage();
-    return false;
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
   }
-  PrintBenchmarkInfo();
+  auto im = cv::imread(FLAGS_image);
   auto model_file = FLAGS_model + sep + "model.pdmodel";
   auto params_file = FLAGS_model + sep + "model.pdiparams";
   auto config_file = FLAGS_model + sep + "infer_cfg.yml";
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
index 6ab3c5990..3dc84c487 100755
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,20 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "macros.h"
 #include "flags.h"
+#include "macros.h"
 #include "option.h"
 
 int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option)) {
-    PrintUsage();
-    return false;
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
   }
-  PrintBenchmarkInfo();
+  auto im = cv::imread(FLAGS_image);
   auto model_yolov5 =
       fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
   fastdeploy::vision::DetectionResult res;
@@ -34,4 +31,4 @@ int main(int argc, char* argv[]) {
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
+}
\ No newline at end of file
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index fd20e685c..e4c2f8d19 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -15,11 +15,12 @@
 #pragma once
 
 #include "gflags/gflags.h"
+#include "fastdeploy/benchmark/utils.h"
 
 #ifdef WIN32
-const char sep = '\\';
+static const char sep = '\\';
 #else
-const char sep = '/';
+static const char sep = '/';
 #endif
 
 DEFINE_string(model, "", "Directory of the inference model.");
@@ -44,7 +45,7 @@ DEFINE_bool(
     collect_memory_info, false, "Whether to collect memory info");
 DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
 
-void PrintUsage() {
+static void PrintUsage() {
   std::cout << "Usage: infer_demo --model model_path --image img_path --device "
                "[cpu|gpu|xpu] --backend "
                "[default|ort|paddle|ov|trt|paddle_trt|lite] "
@@ -55,7 +56,7 @@ void PrintUsage() {
   std::cout << "Default value of use_fp16: false" << std::endl;
 }
 
-void PrintBenchmarkInfo() {
+static void PrintBenchmarkInfo() {
   // Get model name
   std::vector<std::string> model_names;
   fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
@@ -76,7 +77,9 @@ void PrintBenchmarkInfo() {
     ss << "device_id: " << FLAGS_device_id << std::endl;
   }
   ss << "backend: " << FLAGS_backend << std::endl;
-  ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  if (FLAGS_device == "cpu") {
+    ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  }
   ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
   ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
   if (FLAGS_collect_memory_info) {
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
index 9989255e5..ef865ed6e 100755
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -16,7 +16,9 @@
 
 #include "fastdeploy/vision.h"
 
-static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
+                        int argc, char* argv[], bool remove_flags) {
+  google::ParseCommandLineFlags(&argc, &argv, remove_flags);
   if (FLAGS_profile_mode == "runtime") {
     option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
   }
@@ -29,10 +31,11 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
       option->UseTrtBackend();
       if (FLAGS_backend == "paddle_trt") {
-        option->EnablePaddleToTrt();
+        option->UsePaddleInferBackend();
+        option->paddle_infer_option.enable_trt = true;
       }
       if (FLAGS_use_fp16) {
-        option->EnableTrtFP16();
+        option->trt_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -40,6 +43,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with GPU, only support "
                    "default/ort/paddle/trt/paddle_trt now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else if (FLAGS_device == "cpu") {
@@ -53,7 +57,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "lite") {
       option->UsePaddleLiteBackend();
       if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
+        option->paddle_lite_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -61,6 +65,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with CPU, only support "
                    "default/ort/ov/paddle/lite now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else if (FLAGS_device == "xpu") {
@@ -72,7 +77,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "lite") {
       option->UsePaddleLiteBackend();
       if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
+        option->paddle_lite_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -80,13 +85,15 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with XPU, only support "
                    "default/ort/paddle/lite now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else {
     std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
               << " is not supported." << std::endl;
+    PrintUsage();
     return false;
   }
-
+  PrintBenchmarkInfo();
   return true;
 }
diff --git a/benchmark/python/benchmark_ppcls.py b/benchmark/python/benchmark_ppcls.py
index 20a62c9fc..34477456b 100755
--- a/benchmark/python/benchmark_ppcls.py
+++ b/benchmark/python/benchmark_ppcls.py
@@ -102,7 +102,10 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
+                # Set max_batch_size 1 for best performance
+                option.trt_option.max_batch_size = 1
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppdet.py b/benchmark/python/benchmark_ppdet.py
index c2b1da6b1..eca33df09 100755
--- a/benchmark/python/benchmark_ppdet.py
+++ b/benchmark/python/benchmark_ppdet.py
@@ -115,7 +115,8 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppocr.py b/benchmark/python/benchmark_ppocr.py
old mode 100644
new mode 100755
index 2de86be68..90f11de34
--- a/benchmark/python/benchmark_ppocr.py
+++ b/benchmark/python/benchmark_ppocr.py
@@ -92,8 +92,9 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_trt_collect_shape()
-                option.enable_paddle_to_trt()
+                option.paddle_infer_option.collect_trt_shape = True
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppseg.py b/benchmark/python/benchmark_ppseg.py
index 9408e7b64..4ff4a3808 100755
--- a/benchmark/python/benchmark_ppseg.py
+++ b/benchmark/python/benchmark_ppseg.py
@@ -96,8 +96,9 @@ def build_option(args):
                 option.set_trt_input_shape("x", [1, 3, 192, 192],
                                            [1, 3, 192, 192], [1, 3, 192, 192])
             if backend == "paddle_trt":
-                option.enable_paddle_trt_collect_shape()
-                option.enable_paddle_to_trt()
+                option.paddle_infer_option.collect_trt_shape = True
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_uie.py b/benchmark/python/benchmark_uie.py
old mode 100644
new mode 100755
index 44c562d7e..e197ef16e
--- a/benchmark/python/benchmark_uie.py
+++ b/benchmark/python/benchmark_uie.py
@@ -76,8 +76,9 @@ def build_option(args):
     else:
         option.use_trt_backend()
         if args.backend == 'paddle_trt':
-            option.enable_paddle_to_trt()
-            option.enable_paddle_trt_collect_shape()
+            option.paddle_infer_option.collect_trt_shape = True
+            option.use_paddle_infer_backend()
+            option.paddle_infer_option.enable_trt = True
         trt_file = os.path.join(args.model_dir, "infer.trt")
         option.set_trt_input_shape(
             'input_ids',
diff --git a/benchmark/python/benchmark_yolo.py b/benchmark/python/benchmark_yolo.py
index e0ee0ce23..a07127200 100755
--- a/benchmark/python/benchmark_yolo.py
+++ b/benchmark/python/benchmark_yolo.py
@@ -85,7 +85,8 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":