mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00

* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments * fixed enable_paddle_to_trt * add log for paddle_trt * support ppcls benchmark * use new trt option api * update benchmark info * simplify benchmark.cc * simplify benchmark.cc * deal with comments * Add ppseg && ppocr benchmark * add OCR rec img * add ocr benchmark * fixed trt shape * add trt shape * resolve conflict * add ENABLE_BENCHMARK define * Add ClassifyDiff * Add Resize for ClassifyResult * deal with comments * add convert info script * resolve conflict * Add SaveBenchmarkResult func * fixed bug * fixed bug * fixed bug * add config.txt for option * fixed bug * fixed bug * fixed bug * add benchmark.sh * mv thread_nums from 8 to 1 * deal with comments * deal with comments * fixed readme * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
88 lines
5.9 KiB
C++
Executable File
88 lines
5.9 KiB
C++
Executable File
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#pragma once
|
|
|
|
#include "fastdeploy/benchmark/utils.h"
|
|
#include "fastdeploy/utils/perf.h"
|
|
|
|
#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \
|
|
{ \
|
|
if (!MODEL_NAME.Initialized()) { \
|
|
std::cerr << "Failed to initialize." << std::endl; \
|
|
return 0; \
|
|
} \
|
|
std::unordered_map<std::string, std::string> __config_info__; \
|
|
fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig( \
|
|
FLAGS_config_path, &__config_info__); \
|
|
std::stringstream __ss__; \
|
|
__ss__.precision(6); \
|
|
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
|
|
std::stoi(__config_info__["sampling_interval"]), \
|
|
std::stoi(__config_info__["device_id"])); \
|
|
if (__config_info__["collect_memory_info"] == "true") { \
|
|
__resource_moniter__.Start(); \
|
|
} \
|
|
if (__config_info__["profile_mode"] == "runtime") { \
|
|
if (!BENCHMARK_FUNC) { \
|
|
std::cerr << "Failed to predict." << std::endl; \
|
|
return 0; \
|
|
} \
|
|
double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000; \
|
|
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
|
__ss__ << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
|
} else { \
|
|
std::cout << "Warmup " \
|
|
<< __config_info__["warmup"] \
|
|
<< " times..." << std::endl; \
|
|
int __warmup__ = std::stoi(__config_info__["warmup"]); \
|
|
for (int __i__ = 0; __i__ < __warmup__; __i__++) { \
|
|
if (!BENCHMARK_FUNC) { \
|
|
std::cerr << "Failed to predict." << std::endl; \
|
|
return 0; \
|
|
} \
|
|
} \
|
|
std::cout << "Counting time..." << std::endl; \
|
|
std::cout << "Repeat " \
|
|
<< __config_info__["repeat"] \
|
|
<< " times..." << std::endl; \
|
|
fastdeploy::TimeCounter __tc__; \
|
|
__tc__.Start(); \
|
|
int __repeat__ = std::stoi(__config_info__["repeat"]); \
|
|
for (int __i__ = 0; __i__ < __repeat__; __i__++) { \
|
|
if (!BENCHMARK_FUNC) { \
|
|
std::cerr << "Failed to predict." << std::endl; \
|
|
return 0; \
|
|
} \
|
|
} \
|
|
__tc__.End(); \
|
|
double __end2end__ = __tc__.Duration() / __repeat__ * 1000; \
|
|
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
|
__ss__ << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
|
} \
|
|
if (__config_info__["collect_memory_info"] == "true") { \
|
|
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
|
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
|
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
|
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
|
__ss__ << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
|
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
|
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
|
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
|
|
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
|
__resource_moniter__.Stop(); \
|
|
} \
|
|
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \
|
|
__config_info__["result_path"]); \
|
|
}
|