[Benchmark] Add PaddleYOLOv8 cpp benchmark example & lite flags option (#1270)

* [Android] Add PaddleYOLOv8 cpp benchmark example & lite flags option

* [Benchmark] add linux x86_64 gpu benchmark build script
This commit is contained in:
DefTruth
2023-02-09 11:58:07 +08:00
committed by GitHub
parent 3eb571a047
commit ab5377b5fa
8 changed files with 345 additions and 7 deletions

View File

@@ -9,9 +9,12 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
include_directories(${FASTDEPLOY_INCS})
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
else()
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
endif()

View File

@@ -0,0 +1,125 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/benchmark/utils.h"
#include "fastdeploy/vision.h"
#include "flags.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
std::string gpu_mem_file_name) {
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option)) {
PrintUsage();
return false;
}
auto model_file = model_dir + sep + "model.pdmodel";
auto params_file = model_dir + sep + "model.pdiparams";
auto config_file = model_dir + sep + "infer_cfg.yml";
if (FLAGS_profile_mode == "runtime") {
option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
}
auto model = fastdeploy::vision::detection::PaddleYOLOv8(
model_file, params_file, config_file, option);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return false;
}
auto im = cv::imread(image_file);
// For Runtime
if (FLAGS_profile_mode == "runtime") {
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return false;
}
double profile_time = model.GetProfileTime() * 1000;
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
} else {
// For End2End
// Step1: warm up for warmup times
std::cout << "Warmup " << warmup << " times..." << std::endl;
for (int i = 0; i < warmup; i++) {
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return false;
}
}
std::vector<float> end2end_statis;
// Step2: repeat for repeats times
std::cout << "Counting time..." << std::endl;
fastdeploy::TimeCounter tc;
fastdeploy::vision::DetectionResult res;
for (int i = 0; i < repeats; i++) {
if (FLAGS_collect_memory_info && i % dump_period == 0) {
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
#if defined(WITH_GPU)
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
FLAGS_device_id);
#endif
}
tc.Start();
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return false;
}
tc.End();
end2end_statis.push_back(tc.Duration() * 1000);
}
float end2end = std::accumulate(end2end_statis.end() - repeats,
end2end_statis.end(), 0.f) /
repeats;
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
return true;
}
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
int repeats = FLAGS_repeat;
int warmup = FLAGS_warmup;
int dump_period = FLAGS_dump_period;
std::string cpu_mem_file_name = "result_cpu.txt";
std::string gpu_mem_file_name = "result_gpu.txt";
// Run model
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
cpu_mem_file_name, gpu_mem_file_name) != true) {
exit(1);
}
if (FLAGS_collect_memory_info) {
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
#if defined(WITH_GPU)
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
#endif
}
return 0;
}

6
benchmark/cpp/benchmark_yolov5.cc Executable file → Normal file
View File

@@ -65,8 +65,10 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
for (int i = 0; i < repeats; i++) {
if (FLAGS_collect_memory_info && i % dump_period == 0) {
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
#if defined(WITH_GPU)
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
FLAGS_device_id);
#endif
}
tc.Start();
if (!model.Predict(im, &res)) {
@@ -102,9 +104,11 @@ int main(int argc, char* argv[]) {
}
if (FLAGS_collect_memory_info) {
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
#if defined(WITH_GPU)
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
#endif
}
return 0;
}

View File

@@ -27,13 +27,14 @@ DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
DEFINE_string(backend, "default",
"The inference runtime backend, support: ['default', 'ort', "
"'paddle', 'ov', 'trt', 'paddle_trt']");
"'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
DEFINE_bool(
include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
DEFINE_bool(
use_fp16, false,
"Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend");
"Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
"and 'lite' backend");
DEFINE_bool(
collect_memory_info, false, "Whether to collect memory info");
DEFINE_int32(dump_period, 100, "How often to collect memory info.");
@@ -58,7 +59,6 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
option->UsePaddleInferBackend();
} else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
option->UseTrtBackend();
option->SetTrtInputShape("input", {1, 3, 112, 112});
if (FLAGS_backend == "paddle_trt") {
option->EnablePaddleToTrt();
}
@@ -81,11 +81,16 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
option->UseOpenVINOBackend();
} else if (FLAGS_backend == "paddle") {
option->UsePaddleInferBackend();
} else if (FLAGS_backend == "lite") {
option->UsePaddleLiteBackend();
if (FLAGS_use_fp16) {
option->EnableLiteFP16();
}
} else if (FLAGS_backend == "default") {
return true;
} else {
std::cout << "While inference with CPU, only support "
"default/ort/ov/paddle now, "
"default/ort/ov/paddle/lite now, "
<< FLAGS_backend << " is not supported." << std::endl;
return false;
}