Revert "[Benchmark]Benchmark cpp for YOLOv5" (#1250)

Revert "[Benchmark]Benchmark cpp for YOLOv5 (#1224)" This reverts commit c487359e33.
2025-10-05 08:37:06 +08:00 · 2023-02-07 22:14:48 +08:00
parent c487359e33
commit c25aa71fa9
27 changed files with 44 additions and 422 deletions
--- a/benchmark/python/README.md
+++ b/benchmark/python/README.md
--- a/benchmark/python/benchmark_ernie_seq_cls.py
+++ b/benchmark/python/benchmark_ernie_seq_cls.py
--- a/benchmark/python/benchmark_ppcls.py
+++ b/benchmark/python/benchmark_ppcls.py
@@ -19,7 +19,6 @@ import numpy as np
 import time
 from tqdm import tqdm 
 def parse_arguments():
    import argparse
    import ast
@@ -281,8 +280,7 @@ if __name__ == '__main__':
            result = model.predict(im_ori)
            profile_time = model.get_profile_time()
            dump_result["runtime"] = profile_time * 1000
-            f.writelines("Runtime(ms): {} \n".format(
+            f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
                str(dump_result["runtime"])))
            print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
        else:
            # end2end
@@ -294,8 +292,7 @@ if __name__ == '__main__':
                result = model.predict(im_ori)
            end = time.time()
            dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0
-            f.writelines("End2End(ms): {} \n".format(
+            f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
                str(dump_result["end2end"])))
            print("End2End(ms): {} \n".format(str(dump_result["end2end"])))
        if enable_collect_memory_info:
--- a/benchmark/python/benchmark_ppdet.py
+++ b/benchmark/python/benchmark_ppdet.py
@@ -17,9 +17,9 @@ import cv2
 import os
 import numpy as np
 import time
 from sympy import EX
 from tqdm import tqdm
 def parse_arguments():
    import argparse
    import ast
@@ -164,8 +164,8 @@ def build_option(args):
                format(backend))                
    else:
        raise Exception(
-            "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.".
+            "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.".format(
-            format(device))
+                device))
    return option
@@ -340,8 +340,7 @@ if __name__ == '__main__':
            result = model.predict(im_ori)
            profile_time = model.get_profile_time()
            dump_result["runtime"] = profile_time * 1000
-            f.writelines("Runtime(ms): {} \n".format(
+            f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
                str(dump_result["runtime"])))
            print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
        else:
            # end2end
@@ -353,8 +352,7 @@ if __name__ == '__main__':
                result = model.predict(im_ori)
            end = time.time()
            dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0
-            f.writelines("End2End(ms): {} \n".format(
+            f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
                str(dump_result["end2end"])))
            print("End2End(ms): {} \n".format(str(dump_result["end2end"])))
        if enable_collect_memory_info:
--- a/benchmark/python/benchmark_ppocr.py
+++ b/benchmark/python/benchmark_ppocr.py
--- a/benchmark/python/benchmark_ppseg.py
+++ b/benchmark/python/benchmark_ppseg.py
--- a/benchmark/python/benchmark_uie.py
+++ b/benchmark/python/benchmark_uie.py
--- a/benchmark/python/benchmark_yolo.py
+++ b/benchmark/python/benchmark_yolo.py
--- a/benchmark/python/convert_info.py
+++ b/benchmark/python/convert_info.py
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -1,17 +0,0 @@
 PROJECT(infer_demo C CXX)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 # specify the decompress directory of FastDeploy SDK
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake)
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
 else()
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
 endif()
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -1,110 +0,0 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/benchmark/utils.h"
 #include "fastdeploy/vision.h"
 #include "flags.h"
 bool RunModel(std::string model_file, std::string image_file, size_t warmup,
              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
              std::string gpu_mem_file_name) {
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option)) {
    PrintUsage();
    return false;
  }
  if (FLAGS_profile_mode == "runtime") {
    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
  }
  auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
  if (!model.Initialized()) {
    std::cerr << "Failed to initialize." << std::endl;
    return false;
  }
  auto im = cv::imread(image_file);
  // For Runtime
  if (FLAGS_profile_mode == "runtime") {
    fastdeploy::vision::DetectionResult res;
    if (!model.Predict(im, &res)) {
      std::cerr << "Failed to predict." << std::endl;
      return false;
    }
    double profile_time = model.GetProfileTime() * 1000;
    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
    auto vis_im = fastdeploy::vision::VisDetection(im, res);
    cv::imwrite("vis_result.jpg", vis_im);
    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  } else {
    // For End2End
    // Step1: warm up for warmup times
    std::cout << "Warmup " << warmup << " times..." << std::endl;
    for (int i = 0; i < warmup; i++) {
      fastdeploy::vision::DetectionResult res;
      if (!model.Predict(im, &res)) {
        std::cerr << "Failed to predict." << std::endl;
        return false;
      }
    }
    std::vector<float> end2end_statis;
    // Step2: repeat for repeats times
    std::cout << "Counting time..." << std::endl;
    fastdeploy::TimeCounter tc;
    fastdeploy::vision::DetectionResult res;
    for (int i = 0; i < repeats; i++) {
      if (FLAGS_collect_memory_info && i % dump_period == 0) {
        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
                                                         FLAGS_device_id);
      }
      tc.Start();
      if (!model.Predict(im, &res)) {
        std::cerr << "Failed to predict." << std::endl;
        return false;
      }
      tc.End();
      end2end_statis.push_back(tc.Duration() * 1000);
    }
    float end2end = std::accumulate(end2end_statis.end() - repeats,
                                    end2end_statis.end(), 0.f) /
                    repeats;
    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
    auto vis_im = fastdeploy::vision::VisDetection(im, res);
    cv::imwrite("vis_result.jpg", vis_im);
    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  }
  return true;
 }
 int main(int argc, char* argv[]) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  int repeats = FLAGS_repeat;
  int warmup = FLAGS_warmup;
  int dump_period = FLAGS_dump_period;
  std::string cpu_mem_file_name = "result_cpu.txt";
  std::string gpu_mem_file_name = "result_gpu.txt";
  // Run model
  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
               cpu_mem_file_name, gpu_mem_file_name) != true) {
    exit(1);
  }
  if (FLAGS_collect_memory_info) {
    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
    std::cout << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
    std::cout << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
  }
  return 0;
 }
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -1,99 +0,0 @@
 // Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "gflags/gflags.h"
 #include "fastdeploy/utils/perf.h"
 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
 DEFINE_string(device, "cpu",
              "Type of inference device, support 'cpu' or 'gpu'.");
 DEFINE_int32(device_id, 0, "device(gpu) id.");
 DEFINE_int32(warmup, 200, "Number of warmup for profiling.");
 DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
 DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
 DEFINE_string(backend, "default",
              "The inference runtime backend, support: ['default', 'ort', "
              "'paddle', 'ov', 'trt', 'paddle_trt']");
 DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
 DEFINE_bool(
    include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
 DEFINE_bool(
    use_fp16, false,
    "Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend");
 DEFINE_bool(
    collect_memory_info, false, "Whether to collect memory info");
 DEFINE_int32(dump_period, 100, "How often to collect memory info.");
 void PrintUsage() {
  std::cout << "Usage: infer_demo --model model_path --image img_path --device "
               "[cpu|gpu] --backend "
               "[default|ort|paddle|ov|trt|paddle_trt] "
               "--use_fp16 false"
            << std::endl;
  std::cout << "Default value of device: cpu" << std::endl;
  std::cout << "Default value of backend: default" << std::endl;
  std::cout << "Default value of use_fp16: false" << std::endl;
 }
 bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
  if (FLAGS_device == "gpu") {
    option->UseGpu();
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "paddle") {
      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
      option->UseTrtBackend();
      option->SetTrtInputShape("input", {1, 3, 112, 112});
      if (FLAGS_backend == "paddle_trt") {
        option->EnablePaddleToTrt();
      }
      if (FLAGS_use_fp16) {
        option->EnableTrtFP16();
      }
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
      std::cout << "While inference with GPU, only support "
                   "default/ort/paddle/trt/paddle_trt now, "
                << FLAGS_backend << " is not supported." << std::endl;
      return false;
    }
  } else if (FLAGS_device == "cpu") {
    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "ov") {
      option->UseOpenVINOBackend();
    } else if (FLAGS_backend == "paddle") {
      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
      std::cout << "While inference with CPU, only support "
                   "default/ort/ov/paddle now, "
                << FLAGS_backend << " is not supported." << std::endl;
      return false;
    }
  } else {
    std::cerr << "Only support device CPU/GPU now, " << FLAGS_device
              << " is not supported." << std::endl;
    return false;
  }
  return true;
 }
--- a/benchmark/python/requirements.txt
+++ b/benchmark/python/requirements.txt
--- a/benchmark/python/run_benchmark_ernie_seq_cls.sh
+++ b/benchmark/python/run_benchmark_ernie_seq_cls.sh
--- a/benchmark/python/run_benchmark_ppcls.sh
+++ b/benchmark/python/run_benchmark_ppcls.sh
--- a/benchmark/python/run_benchmark_ppdet.sh
+++ b/benchmark/python/run_benchmark_ppdet.sh
--- a/benchmark/python/run_benchmark_ppocr.sh
+++ b/benchmark/python/run_benchmark_ppocr.sh
--- a/benchmark/python/run_benchmark_ppseg.sh
+++ b/benchmark/python/run_benchmark_ppseg.sh
--- a/benchmark/python/run_benchmark_uie.sh
+++ b/benchmark/python/run_benchmark_uie.sh
--- a/benchmark/python/run_benchmark_yolo.sh
+++ b/benchmark/python/run_benchmark_yolo.sh
--- a/fastdeploy/benchmark/benchmark.h
+++ b/fastdeploy/benchmark/benchmark.h
--- a/fastdeploy/benchmark/option.h
+++ b/fastdeploy/benchmark/option.h
@@ -26,7 +26,7 @@ struct BenchmarkOption {
  int warmup = 50;              ///< Warmup for backend inference.
  int repeats = 100;            ///< Repeats for backend inference.
  bool enable_profile = false;  ///< Whether to use profile or not.
-  bool include_h2d_d2h = false;  ///< Whether to include time of H2D_D2H for time of runtime. // NOLINT
+  bool include_h2d_d2h = false; ///< Whether to include time of H2D_D2H for time of runtime.
  friend std::ostream& operator<<(
    std::ostream& output, const BenchmarkOption &option) {
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -1,93 +0,0 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/benchmark/utils.h"
 namespace fastdeploy {
 namespace benchmark {
 void DumpCurrentCpuMemoryUsage(const std::string& name) {
  int iPid = static_cast<int>(getpid());
  std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
  FILE* pp = popen(command.data(), "r");
  if (!pp) return;
  char tmp[1024];
  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
    std::ofstream write;
    write.open(name, std::ios::app);
    write << tmp;
    write.close();
  }
  pclose(pp);
  return;
 }
 void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
  std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
                        " --query-gpu=index,uuid,name,timestamp,memory.total,"
                        "memory.free,memory.used,utilization.gpu,utilization."
                        "memory --format=csv,noheader,nounits";
  FILE* pp = popen(command.data(), "r");
  if (!pp) return;
  char tmp[1024];
  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
    std::ofstream write;
    write.open(name, std::ios::app);
    write << tmp;
    write.close();
  }
  pclose(pp);
  return;
 }
 float GetCpuMemoryUsage(const std::string& name) {
  std::ifstream read(name);
  std::string line;
  float max_cpu_mem = -1;
  while (getline(read, line)) {
    std::stringstream ss(line);
    std::string tmp;
    std::vector<std::string> nums;
    while (getline(ss, tmp, ' ')) {
      tmp = strip(tmp);
      if (tmp.empty()) continue;
      nums.push_back(tmp);
    }
    max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
  }
  return max_cpu_mem / 1024;
 }
 float GetGpuMemoryUsage(const std::string& name) {
  std::ifstream read(name);
  std::string line;
  float max_gpu_mem = -1;
  while (getline(read, line)) {
    std::stringstream ss(line);
    std::string tmp;
    std::vector<std::string> nums;
    while (getline(ss, tmp, ',')) {
      tmp = strip(tmp);
      if (tmp.empty()) continue;
      nums.push_back(tmp);
    }
    max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
  }
  return max_gpu_mem;
 }
 }  // namespace benchmark
 }  // namespace fastdeploy
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -1,53 +0,0 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <sys/types.h>
 #include <unistd.h>
 #include <cmath>
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
 namespace benchmark {
 // Remove the ch characters at both ends of str
 std::string strip(const std::string& str, char ch = ' ') {
  int i = 0;
  while (str[i] == ch) {
    i++;
  }
  int j = str.size() - 1;
  while (str[j] == ch) {
    j--;
  }
  return str.substr(i, j + 1 - i);
 }
 // Record current cpu memory usage into file
 FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
 // Record current gpu memory usage into file
 FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
                                               int device_id);
 // Get Max cpu memory usage
 FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
 // Get Max gpu memory usage
 FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
 }  // namespace benchmark
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/backends/lite/option.h
+++ b/fastdeploy/runtime/backends/lite/option.h
@@ -81,5 +81,4 @@ struct LiteBackendOption {
    nnadapter_dynamic_shape_info = {{"", {{0}}}};
  std::vector<std::string> nnadapter_device_names = {};
 };
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h