diff --git a/benchmark/.gitignore b/benchmark/.gitignore deleted file mode 100644 index 89096b6cd..000000000 --- a/benchmark/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -*.tgz -*.zip -*.tar -*.tar.gz -*.tgz -*.jpg -*.png -*.jpeg -*.txt -*.log -yolov8_s_* -._yolov8_s_* -Mobile* \ No newline at end of file diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt new file mode 100755 index 000000000..9706587d3 --- /dev/null +++ b/benchmark/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# specify the decompress directory of FastDeploy SDK +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") +include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake) +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc) + +if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) + target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread) +else() + target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags) +endif() diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc new file mode 100755 index 000000000..d84292536 --- /dev/null +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -0,0 +1,110 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/benchmark/utils.h" +#include "fastdeploy/vision.h" +#include "flags.h" + +bool RunModel(std::string model_file, std::string image_file, size_t warmup, + size_t repeats, size_t dump_period, std::string cpu_mem_file_name, + std::string gpu_mem_file_name) { + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option)) { + PrintUsage(); + return false; + } + if (FLAGS_profile_mode == "runtime") { + option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup); + } + auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return false; + } + auto im = cv::imread(image_file); + // For Runtime + if (FLAGS_profile_mode == "runtime") { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + double profile_time = model.GetProfileTime() * 1000; + std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } else { + // For End2End + // Step1: warm up for warmup times + std::cout << "Warmup " << warmup << " times..." << std::endl; + for (int i = 0; i < warmup; i++) { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + } + std::vector end2end_statis; + // Step2: repeat for repeats times + std::cout << "Counting time..." << std::endl; + fastdeploy::TimeCounter tc; + fastdeploy::vision::DetectionResult res; + for (int i = 0; i < repeats; i++) { + if (FLAGS_collect_memory_info && i % dump_period == 0) { + fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); + fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, + FLAGS_device_id); + } + tc.Start(); + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + tc.End(); + end2end_statis.push_back(tc.Duration() * 1000); + } + float end2end = std::accumulate(end2end_statis.end() - repeats, + end2end_statis.end(), 0.f) / + repeats; + std::cout << "End2End(ms): " << end2end << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } + + return true; +} + +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + int repeats = FLAGS_repeat; + int warmup = FLAGS_warmup; + int dump_period = FLAGS_dump_period; + std::string cpu_mem_file_name = "result_cpu.txt"; + std::string gpu_mem_file_name = "result_gpu.txt"; + // Run model + if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, + cpu_mem_file_name, gpu_mem_file_name) != true) { + exit(1); + } + if (FLAGS_collect_memory_info) { + float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); + float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); + std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; + std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h new file mode 100755 index 000000000..3d35eb313 --- /dev/null +++ b/benchmark/cpp/flags.h @@ -0,0 +1,99 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "gflags/gflags.h" +#include "fastdeploy/utils/perf.h" + +DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(image, "", "Path of the image file."); +DEFINE_string(device, "cpu", + "Type of inference device, support 'cpu' or 'gpu'."); +DEFINE_int32(device_id, 0, "device(gpu) id."); +DEFINE_int32(warmup, 200, "Number of warmup for profiling."); +DEFINE_int32(repeat, 1000, "Number of repeats for profiling."); +DEFINE_string(profile_mode, "runtime", "runtime or end2end."); +DEFINE_string(backend, "default", + "The inference runtime backend, support: ['default', 'ort', " + "'paddle', 'ov', 'trt', 'paddle_trt']"); +DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread."); +DEFINE_bool( + include_h2d_d2h, false, "Whether run profiling with h2d and d2h."); +DEFINE_bool( + use_fp16, false, + "Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend"); +DEFINE_bool( + collect_memory_info, false, "Whether to collect memory info"); +DEFINE_int32(dump_period, 100, "How often to collect memory info."); + +void PrintUsage() { + std::cout << "Usage: infer_demo --model model_path --image img_path --device " + "[cpu|gpu] --backend " + "[default|ort|paddle|ov|trt|paddle_trt] " + "--use_fp16 false" + << std::endl; + std::cout << "Default value of device: cpu" << std::endl; + std::cout << "Default value of backend: default" << std::endl; + std::cout << "Default value of use_fp16: false" << std::endl; +} + +bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { + if (FLAGS_device == "gpu") { + option->UseGpu(); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { + option->UseTrtBackend(); + option->SetTrtInputShape("input", {1, 3, 112, 112}); + if (FLAGS_backend == "paddle_trt") { + option->EnablePaddleToTrt(); + } + if (FLAGS_use_fp16) { + option->EnableTrtFP16(); + } + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with GPU, only support " + "default/ort/paddle/trt/paddle_trt now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } + } else if (FLAGS_device == "cpu") { + option->SetCpuThreadNum(FLAGS_cpu_thread_nums); + if (FLAGS_backend == "ort") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "ov") { + option->UseOpenVINOBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "default") { + return true; + } else { + std::cout << "While inference with CPU, only support " + "default/ort/ov/paddle now, " + << FLAGS_backend << " is not supported." << std::endl; + return false; + } + } else { + std::cerr << "Only support device CPU/GPU now, " << FLAGS_device + << " is not supported." << std::endl; + return false; + } + + return true; +} diff --git a/benchmark/README.md b/benchmark/python/README.md similarity index 100% rename from benchmark/README.md rename to benchmark/python/README.md diff --git a/benchmark/benchmark_ernie_seq_cls.py b/benchmark/python/benchmark_ernie_seq_cls.py similarity index 100% rename from benchmark/benchmark_ernie_seq_cls.py rename to benchmark/python/benchmark_ernie_seq_cls.py diff --git a/benchmark/benchmark_ppcls.py b/benchmark/python/benchmark_ppcls.py similarity index 96% rename from benchmark/benchmark_ppcls.py rename to benchmark/python/benchmark_ppcls.py index a8219b028..20a62c9fc 100755 --- a/benchmark/benchmark_ppcls.py +++ b/benchmark/python/benchmark_ppcls.py @@ -17,7 +17,8 @@ import cv2 import os import numpy as np import time -from tqdm import tqdm +from tqdm import tqdm + def parse_arguments(): import argparse @@ -38,19 +39,19 @@ def parse_arguments(): "--profile_mode", type=str, default="runtime", - help="runtime or end2end.") + help="runtime or end2end.") parser.add_argument( "--repeat", required=True, type=int, default=1000, - help="number of repeats for profiling.") + help="number of repeats for profiling.") parser.add_argument( "--warmup", required=True, type=int, default=50, - help="number of warmup for profiling.") + help="number of warmup for profiling.") parser.add_argument( "--device", default="cpu", @@ -74,7 +75,7 @@ def parse_arguments(): "--include_h2d_d2h", type=ast.literal_eval, default=False, - help="whether run profiling with h2d and d2h") + help="whether run profiling with h2d and d2h") args = parser.parse_args() return args @@ -85,7 +86,7 @@ def build_option(args): backend = args.backend enable_trt_fp16 = args.enable_trt_fp16 if args.profile_mode == "runtime": - option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup) + option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup) option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": option.use_gpu() @@ -274,25 +275,27 @@ if __name__ == '__main__': enable_gpu = args.device == "gpu" monitor = Monitor(enable_gpu, gpu_id) monitor.start() - + im_ori = cv2.imread(args.image) if args.profile_mode == "runtime": result = model.predict(im_ori) profile_time = model.get_profile_time() dump_result["runtime"] = profile_time * 1000 - f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) + f.writelines("Runtime(ms): {} \n".format( + str(dump_result["runtime"]))) print("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) else: # end2end for i in range(args.warmup): result = model.predict(im_ori) - + start = time.time() for i in tqdm(range(args.repeat)): result = model.predict(im_ori) end = time.time() dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0 - f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) + f.writelines("End2End(ms): {} \n".format( + str(dump_result["end2end"]))) print("End2End(ms): {} \n".format(str(dump_result["end2end"]))) if enable_collect_memory_info: @@ -304,7 +307,7 @@ if __name__ == '__main__': 'memory.used'] if 'gpu' in mem_info else 0 dump_result["gpu_util"] = mem_info['gpu'][ 'utilization.gpu'] if 'gpu' in mem_info else 0 - + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) diff --git a/benchmark/benchmark_ppdet.py b/benchmark/python/benchmark_ppdet.py similarity index 95% rename from benchmark/benchmark_ppdet.py rename to benchmark/python/benchmark_ppdet.py index eb0b0f4a8..c2b1da6b1 100755 --- a/benchmark/benchmark_ppdet.py +++ b/benchmark/python/benchmark_ppdet.py @@ -20,6 +20,7 @@ import time from sympy import EX from tqdm import tqdm + def parse_arguments(): import argparse import ast @@ -39,19 +40,19 @@ def parse_arguments(): "--profile_mode", type=str, default="runtime", - help="runtime or end2end.") + help="runtime or end2end.") parser.add_argument( "--repeat", required=True, type=int, default=1000, - help="number of repeats for profiling.") + help="number of repeats for profiling.") parser.add_argument( "--warmup", required=True, type=int, default=50, - help="number of warmup for profiling.") + help="number of warmup for profiling.") parser.add_argument( "--device", default="cpu", @@ -70,7 +71,7 @@ def parse_arguments(): "--enable_lite_fp16", type=ast.literal_eval, default=False, - help="whether enable fp16 in Paddle Lite backend") + help="whether enable fp16 in Paddle Lite backend") parser.add_argument( "--enable_collect_memory_info", type=ast.literal_eval, @@ -80,7 +81,7 @@ def parse_arguments(): "--include_h2d_d2h", type=ast.literal_eval, default=False, - help="whether run profiling with h2d and d2h") + help="whether run profiling with h2d and d2h") args = parser.parse_args() return args @@ -92,7 +93,7 @@ def build_option(args): enable_trt_fp16 = args.enable_trt_fp16 enable_lite_fp16 = args.enable_lite_fp16 if args.profile_mode == "runtime": - option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup) + option.enable_profiling(args.include_h2d_d2h, args.repeat, args.warmup) option.set_cpu_thread_num(args.cpu_num_thread) if device == "gpu": option.use_gpu() @@ -149,7 +150,7 @@ def build_option(args): else: raise Exception( "While inference with CPU, only support default/ort/lite/paddle now, {} is not supported.". - format(backend)) + format(backend)) elif device == "ascend": option.use_ascend() if backend == "lite": @@ -161,11 +162,11 @@ def build_option(args): else: raise Exception( "While inference with CPU, only support default/lite now, {} is not supported.". - format(backend)) + format(backend)) else: raise Exception( - "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.".format( - device)) + "Only support device CPU/GPU/Kunlunxin/Ascend now, {} is not supported.". + format(device)) return option @@ -340,19 +341,21 @@ if __name__ == '__main__': result = model.predict(im_ori) profile_time = model.get_profile_time() dump_result["runtime"] = profile_time * 1000 - f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) + f.writelines("Runtime(ms): {} \n".format( + str(dump_result["runtime"]))) print("Runtime(ms): {} \n".format(str(dump_result["runtime"]))) else: # end2end for i in range(args.warmup): result = model.predict(im_ori) - + start = time.time() for i in tqdm(range(args.repeat)): result = model.predict(im_ori) end = time.time() dump_result["end2end"] = ((end - start) / args.repeat) * 1000.0 - f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"]))) + f.writelines("End2End(ms): {} \n".format( + str(dump_result["end2end"]))) print("End2End(ms): {} \n".format(str(dump_result["end2end"]))) if enable_collect_memory_info: @@ -364,7 +367,7 @@ if __name__ == '__main__': 'memory.used'] if 'gpu' in mem_info else 0 dump_result["gpu_util"] = mem_info['gpu'][ 'utilization.gpu'] if 'gpu' in mem_info else 0 - + if enable_collect_memory_info: f.writelines("cpu_rss_mb: {} \n".format( str(dump_result["cpu_rss_mb"]))) diff --git a/benchmark/benchmark_ppocr.py b/benchmark/python/benchmark_ppocr.py similarity index 100% rename from benchmark/benchmark_ppocr.py rename to benchmark/python/benchmark_ppocr.py diff --git a/benchmark/benchmark_ppseg.py b/benchmark/python/benchmark_ppseg.py similarity index 100% rename from benchmark/benchmark_ppseg.py rename to benchmark/python/benchmark_ppseg.py diff --git a/benchmark/benchmark_uie.py b/benchmark/python/benchmark_uie.py similarity index 100% rename from benchmark/benchmark_uie.py rename to benchmark/python/benchmark_uie.py diff --git a/benchmark/benchmark_yolo.py b/benchmark/python/benchmark_yolo.py similarity index 100% rename from benchmark/benchmark_yolo.py rename to benchmark/python/benchmark_yolo.py diff --git a/benchmark/convert_info.py b/benchmark/python/convert_info.py similarity index 100% rename from benchmark/convert_info.py rename to benchmark/python/convert_info.py diff --git a/benchmark/requirements.txt b/benchmark/python/requirements.txt similarity index 100% rename from benchmark/requirements.txt rename to benchmark/python/requirements.txt diff --git a/benchmark/run_benchmark_ernie_seq_cls.sh b/benchmark/python/run_benchmark_ernie_seq_cls.sh similarity index 100% rename from benchmark/run_benchmark_ernie_seq_cls.sh rename to benchmark/python/run_benchmark_ernie_seq_cls.sh diff --git a/benchmark/run_benchmark_ppcls.sh b/benchmark/python/run_benchmark_ppcls.sh similarity index 100% rename from benchmark/run_benchmark_ppcls.sh rename to benchmark/python/run_benchmark_ppcls.sh diff --git a/benchmark/run_benchmark_ppdet.sh b/benchmark/python/run_benchmark_ppdet.sh similarity index 100% rename from benchmark/run_benchmark_ppdet.sh rename to benchmark/python/run_benchmark_ppdet.sh diff --git a/benchmark/run_benchmark_ppocr.sh b/benchmark/python/run_benchmark_ppocr.sh similarity index 100% rename from benchmark/run_benchmark_ppocr.sh rename to benchmark/python/run_benchmark_ppocr.sh diff --git a/benchmark/run_benchmark_ppseg.sh b/benchmark/python/run_benchmark_ppseg.sh similarity index 100% rename from benchmark/run_benchmark_ppseg.sh rename to benchmark/python/run_benchmark_ppseg.sh diff --git a/benchmark/run_benchmark_uie.sh b/benchmark/python/run_benchmark_uie.sh similarity index 100% rename from benchmark/run_benchmark_uie.sh rename to benchmark/python/run_benchmark_uie.sh diff --git a/benchmark/run_benchmark_yolo.sh b/benchmark/python/run_benchmark_yolo.sh similarity index 100% rename from benchmark/run_benchmark_yolo.sh rename to benchmark/python/run_benchmark_yolo.sh diff --git a/fastdeploy/benchmark/benchmark.h b/fastdeploy/benchmark/benchmark.h old mode 100644 new mode 100755 index 825fc4f54..b7463d3e9 --- a/fastdeploy/benchmark/benchmark.h +++ b/fastdeploy/benchmark/benchmark.h @@ -18,7 +18,7 @@ #include "fastdeploy/benchmark/option.h" #include "fastdeploy/benchmark/results.h" -#ifdef ENABLE_BENCHMARK +#ifdef ENABLE_BENCHMARK #define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop) \ int __p_loop = (base_loop); \ const bool __p_enable_profile = option.enable_profile; \ @@ -75,12 +75,12 @@ result.time_of_runtime = \ __p_tc_duration_h / static_cast(__p_repeats_h); \ } \ - } + } #else #define __RUNTIME_PROFILE_LOOP_BEGIN(option, base_loop) \ - for (int __p_i = 0; __p_i < (base_loop); ++ __p_i) { + for (int __p_i = 0; __p_i < (base_loop); ++__p_i) { #define __RUNTIME_PROFILE_LOOP_END(result) } #define __RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(option, base_loop) \ - for (int __p_i_h = 0; __p_i_h < (base_loop); ++ __p_i_h) { + for (int __p_i_h = 0; __p_i_h < (base_loop); ++__p_i_h) { #define __RUNTIME_PROFILE_LOOP_H2D_D2H_END(result) } -#endif \ No newline at end of file +#endif diff --git a/fastdeploy/benchmark/option.h b/fastdeploy/benchmark/option.h old mode 100644 new mode 100755 index 6df9b473c..5af9f1585 --- a/fastdeploy/benchmark/option.h +++ b/fastdeploy/benchmark/option.h @@ -26,22 +26,22 @@ struct BenchmarkOption { int warmup = 50; ///< Warmup for backend inference. int repeats = 100; ///< Repeats for backend inference. bool enable_profile = false; ///< Whether to use profile or not. - bool include_h2d_d2h = false; ///< Whether to include time of H2D_D2H for time of runtime. + bool include_h2d_d2h = false; ///< Whether to include time of H2D_D2H for time of runtime. // NOLINT friend std::ostream& operator<<( std::ostream& output, const BenchmarkOption &option) { - if (!option.include_h2d_d2h) { - output << "Running profiling for Runtime " - << "without H2D and D2H, "; - } else { - output << "Running profiling for Runtime " - << "with H2D and D2H, "; - } - output << "Repeats: " << option.repeats << ", " - << "Warmup: " << option.warmup; - return output; + if (!option.include_h2d_d2h) { + output << "Running profiling for Runtime " + << "without H2D and D2H, "; + } else { + output << "Running profiling for Runtime " + << "with H2D and D2H, "; + } + output << "Repeats: " << option.repeats << ", " + << "Warmup: " << option.warmup; + return output; } }; -} // namespace benchmark -} // namespace fastdeploy \ No newline at end of file +} // namespace benchmark +} // namespace fastdeploy diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc new file mode 100755 index 000000000..2b0bd9df1 --- /dev/null +++ b/fastdeploy/benchmark/utils.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#if defined(__linux__) || defined(__ANDROID__) +#include +#endif +#include + +#include "fastdeploy/benchmark/utils.h" + +namespace fastdeploy { +namespace benchmark { + +// Remove the ch characters at both ends of str +static std::string strip(const std::string& str, char ch = ' ') { + int i = 0; + while (str[i] == ch) { + i++; + } + int j = str.size() - 1; + while (str[j] == ch) { + j--; + } + return str.substr(i, j + 1 - i); +} + +void DumpCurrentCpuMemoryUsage(const std::string& name) { +#if defined(__linux__) || defined(__ANDROID__) + int iPid = static_cast(getpid()); + std::string command = "pmap -x " + std::to_string(iPid) + " | grep total"; + FILE* pp = popen(command.data(), "r"); + if (!pp) return; + char tmp[1024]; + + while (fgets(tmp, sizeof(tmp), pp) != NULL) { + std::ofstream write; + write.open(name, std::ios::app); + write << tmp; + write.close(); + } + pclose(pp); +#else + FDASSERT(false, + "Currently collect cpu memory info only supports Linux and ANDROID.") +#endif + return; +} + +void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) { +#if defined(__linux__) && defined(WITH_GPU) + std::string command = "nvidia-smi --id=" + std::to_string(device_id) + + " --query-gpu=index,uuid,name,timestamp,memory.total," + "memory.free,memory.used,utilization.gpu,utilization." + "memory --format=csv,noheader,nounits"; + FILE* pp = popen(command.data(), "r"); + if (!pp) return; + char tmp[1024]; + + while (fgets(tmp, sizeof(tmp), pp) != NULL) { + std::ofstream write; + write.open(name, std::ios::app); + write << tmp; + write.close(); + } + pclose(pp); +#else + FDASSERT(false, + "Currently collect gpu memory info only supports Linux in GPU.") +#endif + return; +} + +float GetCpuMemoryUsage(const std::string& name) { + std::ifstream read(name); + std::string line; + float max_cpu_mem = -1; + while (getline(read, line)) { + std::stringstream ss(line); + std::string tmp; + std::vector nums; + while (getline(ss, tmp, ' ')) { + tmp = strip(tmp); + if (tmp.empty()) continue; + nums.push_back(tmp); + } + max_cpu_mem = std::max(max_cpu_mem, stof(nums[3])); + } + return max_cpu_mem / 1024; +} + +float GetGpuMemoryUsage(const std::string& name) { + std::ifstream read(name); + std::string line; + float max_gpu_mem = -1; + while (getline(read, line)) { + std::stringstream ss(line); + std::string tmp; + std::vector nums; + while (getline(ss, tmp, ',')) { + tmp = strip(tmp); + if (tmp.empty()) continue; + nums.push_back(tmp); + } + max_gpu_mem = std::max(max_gpu_mem, stof(nums[6])); + } + return max_gpu_mem; +} + +} // namespace benchmark +} // namespace fastdeploy diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h new file mode 100755 index 000000000..12770f365 --- /dev/null +++ b/fastdeploy/benchmark/utils.h @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace benchmark { + +// Record current cpu memory usage into file +FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name); + +// Record current gpu memory usage into file +FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name, + int device_id); + +// Get Max cpu memory usage +FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name); + +// Get Max gpu memory usage +FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name); + +} // namespace benchmark +} // namespace fastdeploy