From 47b1d27fbb11e88dd4ccfe13d11e4561c1d9eb4e Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Mon, 13 Feb 2023 16:12:54 +0800 Subject: [PATCH] [Benchmark] Add macros for benchmark (#1301) * add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com> --- benchmark/cpp/benchmark_ppyolov8.cc | 110 ++++------------------------ benchmark/cpp/benchmark_yolov5.cc | 97 ++++-------------------- benchmark/cpp/flags.h | 73 ------------------ benchmark/cpp/macros.h | 70 ++++++++++++++++++ benchmark/cpp/option.h | 92 +++++++++++++++++++++++ 5 files changed, 190 insertions(+), 252 deletions(-) mode change 100755 => 100644 benchmark/cpp/benchmark_yolov5.cc create mode 100755 benchmark/cpp/macros.h create mode 100755 benchmark/cpp/option.h diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc index cff374200..545474635 100644 --- a/benchmark/cpp/benchmark_ppyolov8.cc +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/benchmark/utils.h" -#include "fastdeploy/vision.h" #include "flags.h" +#include "macros.h" +#include "option.h" #ifdef WIN32 const char sep = '\\'; @@ -22,104 +22,24 @@ const char sep = '\\'; const char sep = '/'; #endif -bool RunModel(std::string model_dir, std::string image_file, size_t warmup, - size_t repeats, size_t dump_period, std::string cpu_mem_file_name, - std::string gpu_mem_file_name) { +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto im = cv::imread(FLAGS_image); // Initialization auto option = fastdeploy::RuntimeOption(); if (!CreateRuntimeOption(&option)) { PrintUsage(); return false; } - auto model_file = model_dir + sep + "model.pdmodel"; - auto params_file = model_dir + sep + "model.pdiparams"; - auto config_file = model_dir + sep + "infer_cfg.yml"; - - if (FLAGS_profile_mode == "runtime") { - option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup); - } - auto model = fastdeploy::vision::detection::PaddleYOLOv8( + auto model_file = FLAGS_model + sep + "model.pdmodel"; + auto params_file = FLAGS_model + sep + "model.pdiparams"; + auto config_file = FLAGS_model + sep + "infer_cfg.yml"; + auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8( model_file, params_file, config_file, option); - if (!model.Initialized()) { - std::cerr << "Failed to initialize." << std::endl; - return false; - } - auto im = cv::imread(image_file); - // For Runtime - if (FLAGS_profile_mode == "runtime") { - fastdeploy::vision::DetectionResult res; - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - double profile_time = model.GetProfileTime() * 1000; - std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl; - auto vis_im = fastdeploy::vision::VisDetection(im, res); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; - } else { - // For End2End - // Step1: warm up for warmup times - std::cout << "Warmup " << warmup << " times..." << std::endl; - for (int i = 0; i < warmup; i++) { - fastdeploy::vision::DetectionResult res; - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - } - std::vector end2end_statis; - // Step2: repeat for repeats times - std::cout << "Counting time..." << std::endl; - fastdeploy::TimeCounter tc; - fastdeploy::vision::DetectionResult res; - for (int i = 0; i < repeats; i++) { - if (FLAGS_collect_memory_info && i % dump_period == 0) { - fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); -#if defined(WITH_GPU) - fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, - FLAGS_device_id); -#endif - } - tc.Start(); - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - tc.End(); - end2end_statis.push_back(tc.Duration() * 1000); - } - float end2end = std::accumulate(end2end_statis.end() - repeats, - end2end_statis.end(), 0.f) / - repeats; - std::cout << "End2End(ms): " << end2end << "ms." << std::endl; - auto vis_im = fastdeploy::vision::VisDetection(im, res); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; - } - - return true; -} - -int main(int argc, char* argv[]) { - google::ParseCommandLineFlags(&argc, &argv, true); - int repeats = FLAGS_repeat; - int warmup = FLAGS_warmup; - int dump_period = FLAGS_dump_period; - std::string cpu_mem_file_name = "result_cpu.txt"; - std::string gpu_mem_file_name = "result_gpu.txt"; - // Run model - if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, - cpu_mem_file_name, gpu_mem_file_name) != true) { - exit(1); - } - if (FLAGS_collect_memory_info) { - float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); - std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; -#if defined(WITH_GPU) - float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); - std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; -#endif - } + fastdeploy::vision::DetectionResult res; + BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res)) + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; return 0; -} +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100755 new mode 100644 index 2e5df6b1c..5b2cab855 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -12,96 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/benchmark/utils.h" -#include "fastdeploy/vision.h" #include "flags.h" +#include "macros.h" +#include "option.h" -bool RunModel(std::string model_file, std::string image_file, size_t warmup, - size_t repeats, size_t sampling_interval) { +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto im = cv::imread(FLAGS_image); // Initialization auto option = fastdeploy::RuntimeOption(); if (!CreateRuntimeOption(&option)) { PrintUsage(); return false; } - if (FLAGS_profile_mode == "runtime") { - option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup); - } - auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option); - if (!model.Initialized()) { - std::cerr << "Failed to initialize." << std::endl; - return false; - } - auto im = cv::imread(image_file); - // For collect memory info - fastdeploy::benchmark::ResourceUsageMonitor resource_moniter( - sampling_interval, FLAGS_device_id); - if (FLAGS_collect_memory_info) { - resource_moniter.Start(); - } - // For Runtime - if (FLAGS_profile_mode == "runtime") { - fastdeploy::vision::DetectionResult res; - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - double profile_time = model.GetProfileTime() * 1000; - std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl; - auto vis_im = fastdeploy::vision::VisDetection(im, res); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; - } else { - // For End2End - // Step1: warm up for warmup times - std::cout << "Warmup " << warmup << " times..." << std::endl; - for (int i = 0; i < warmup; i++) { - fastdeploy::vision::DetectionResult res; - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - } - // Step2: repeat for repeats times - std::cout << "Counting time..." << std::endl; - std::cout << "Repeat " << repeats << " times..." << std::endl; - fastdeploy::vision::DetectionResult res; - fastdeploy::TimeCounter tc; - tc.Start(); - for (int i = 0; i < repeats; i++) { - if (!model.Predict(im, &res)) { - std::cerr << "Failed to predict." << std::endl; - return false; - } - } - tc.End(); - double end2end = tc.Duration() / repeats * 1000; - std::cout << "End2End(ms): " << end2end << "ms." << std::endl; - auto vis_im = fastdeploy::vision::VisDetection(im, res); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; - } - if (FLAGS_collect_memory_info) { - float cpu_mem = resource_moniter.GetMaxCpuMem(); - float gpu_mem = resource_moniter.GetMaxGpuMem(); - float gpu_util = resource_moniter.GetMaxGpuUtil(); - std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; - std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; - std::cout << "gpu_util: " << gpu_util << std::endl; - resource_moniter.Stop(); - } - - return true; -} - -int main(int argc, char* argv[]) { - google::ParseCommandLineFlags(&argc, &argv, true); - int repeats = FLAGS_repeat; - int warmup = FLAGS_warmup; - int sampling_interval = FLAGS_sampling_interval; - // Run model - if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) { - exit(1); - } + auto model_yolov5 = + fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option); + fastdeploy::vision::DetectionResult res; + BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res)) + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; return 0; -} \ No newline at end of file +} diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 64f22c702..7f8c3a29f 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -15,7 +15,6 @@ #pragma once #include "gflags/gflags.h" -#include "fastdeploy/utils/perf.h" DEFINE_string(model, "", "Directory of the inference model."); DEFINE_string(image, "", "Path of the image file."); @@ -49,75 +48,3 @@ void PrintUsage() { std::cout << "Default value of backend: default" << std::endl; std::cout << "Default value of use_fp16: false" << std::endl; } - -bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { - if (FLAGS_device == "gpu") { - option->UseGpu(FLAGS_device_id); - if (FLAGS_backend == "ort") { - option->UseOrtBackend(); - } else if (FLAGS_backend == "paddle") { - option->UsePaddleInferBackend(); - } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { - option->UseTrtBackend(); - if (FLAGS_backend == "paddle_trt") { - option->EnablePaddleToTrt(); - } - if (FLAGS_use_fp16) { - option->EnableTrtFP16(); - } - } else if (FLAGS_backend == "default") { - return true; - } else { - std::cout << "While inference with GPU, only support " - "default/ort/paddle/trt/paddle_trt now, " - << FLAGS_backend << " is not supported." << std::endl; - return false; - } - } else if (FLAGS_device == "cpu") { - option->SetCpuThreadNum(FLAGS_cpu_thread_nums); - if (FLAGS_backend == "ort") { - option->UseOrtBackend(); - } else if (FLAGS_backend == "ov") { - option->UseOpenVINOBackend(); - } else if (FLAGS_backend == "paddle") { - option->UsePaddleInferBackend(); - } else if (FLAGS_backend == "lite") { - option->UsePaddleLiteBackend(); - if (FLAGS_use_fp16) { - option->EnableLiteFP16(); - } - } else if (FLAGS_backend == "default") { - return true; - } else { - std::cout << "While inference with CPU, only support " - "default/ort/ov/paddle/lite now, " - << FLAGS_backend << " is not supported." << std::endl; - return false; - } - } else if (FLAGS_device == "xpu") { - option->UseKunlunXin(FLAGS_device_id); - if (FLAGS_backend == "ort") { - option->UseOrtBackend(); - } else if (FLAGS_backend == "paddle") { - option->UsePaddleInferBackend(); - } else if (FLAGS_backend == "lite") { - option->UsePaddleLiteBackend(); - if (FLAGS_use_fp16) { - option->EnableLiteFP16(); - } - } else if (FLAGS_backend == "default") { - return true; - } else { - std::cout << "While inference with XPU, only support " - "default/ort/paddle/lite now, " - << FLAGS_backend << " is not supported." << std::endl; - return false; - } - } else { - std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device - << " is not supported." << std::endl; - return false; - } - - return true; -} diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h new file mode 100755 index 000000000..bebd26e0d --- /dev/null +++ b/benchmark/cpp/macros.h @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/benchmark/utils.h" +#include "fastdeploy/utils/perf.h" + +#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \ +{ \ + std::cout << "====" << #MODEL_NAME << "====" << std::endl; \ + if (!MODEL_NAME.Initialized()) { \ + std::cerr << "Failed to initialize." << std::endl; \ + return 0; \ + } \ + auto __im__ = cv::imread(FLAGS_image); \ + fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \ + FLAGS_sampling_interval, FLAGS_device_id); \ + if (FLAGS_collect_memory_info) { \ + __resource_moniter__.Start(); \ + } \ + if (FLAGS_profile_mode == "runtime") { \ + if (!BENCHMARK_FUNC) { \ + std::cerr << "Failed to predict." << std::endl; \ + return 0; \ + } \ + double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000; \ + std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \ + } else { \ + std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \ + for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \ + if (!BENCHMARK_FUNC) { \ + std::cerr << "Failed to predict." << std::endl; \ + return 0; \ + } \ + } \ + std::cout << "Counting time..." << std::endl; \ + std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl; \ + fastdeploy::TimeCounter __tc__; \ + __tc__.Start(); \ + for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) { \ + if (!BENCHMARK_FUNC) { \ + std::cerr << "Failed to predict." << std::endl; \ + return 0; \ + } \ + } \ + __tc__.End(); \ + double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \ + std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \ + } \ + if (FLAGS_collect_memory_info) { \ + float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \ + float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \ + float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \ + std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl; \ + std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl; \ + std::cout << "gpu_util: " << __gpu_util__ << std::endl; \ + __resource_moniter__.Stop(); \ + } \ +} diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h new file mode 100755 index 000000000..9989255e5 --- /dev/null +++ b/benchmark/cpp/option.h @@ -0,0 +1,92 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision.h" + +static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { + if (FLAGS_profile_mode == "runtime") { + option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup); + } + if (FLAGS_device == "gpu") { + option->UseGpu(FLAGS_device_id); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { + option->UseTrtBackend(); + if (FLAGS_backend == "paddle_trt") { + option->EnablePaddleToTrt(); + } + if (FLAGS_use_fp16) { + option->EnableTrtFP16(); + } + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with GPU, only support " + "default/ort/paddle/trt/paddle_trt now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } + } else if (FLAGS_device == "cpu") { + option->SetCpuThreadNum(FLAGS_cpu_thread_nums); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "ov") { + option->UseOpenVINOBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "lite") { + option->UsePaddleLiteBackend(); + if (FLAGS_use_fp16) { + option->EnableLiteFP16(); + } + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with CPU, only support " + "default/ort/ov/paddle/lite now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } + } else if (FLAGS_device == "xpu") { + option->UseKunlunXin(FLAGS_device_id); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "lite") { + option->UsePaddleLiteBackend(); + if (FLAGS_use_fp16) { + option->EnableLiteFP16(); + } + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with XPU, only support " + "default/ort/paddle/lite now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } + } else { + std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device + << " is not supported." << std::endl; + return false; + } + + return true; +}