mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Benchmark] Add PaddleYOLOv8 cpp benchmark example & lite flags option (#1270)
* [Android] Add PaddleYOLOv8 cpp benchmark example & lite flags option * [Benchmark] add linux x86_64 gpu benchmark build script
This commit is contained in:
@@ -9,9 +9,12 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
|
||||
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
|
||||
add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
|
||||
|
||||
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
else()
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
|
||||
endif()
|
||||
|
125
benchmark/cpp/benchmark_ppyolov8.cc
Normal file
125
benchmark/cpp/benchmark_ppyolov8.cc
Normal file
@@ -0,0 +1,125 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/benchmark/utils.h"
|
||||
#include "fastdeploy/vision.h"
|
||||
#include "flags.h"
|
||||
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
|
||||
bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
|
||||
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
|
||||
std::string gpu_mem_file_name) {
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option)) {
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
auto model_file = model_dir + sep + "model.pdmodel";
|
||||
auto params_file = model_dir + sep + "model.pdiparams";
|
||||
auto config_file = model_dir + sep + "infer_cfg.yml";
|
||||
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
|
||||
}
|
||||
auto model = fastdeploy::vision::detection::PaddleYOLOv8(
|
||||
model_file, params_file, config_file, option);
|
||||
if (!model.Initialized()) {
|
||||
std::cerr << "Failed to initialize." << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto im = cv::imread(image_file);
|
||||
// For Runtime
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
if (!model.Predict(im, &res)) {
|
||||
std::cerr << "Failed to predict." << std::endl;
|
||||
return false;
|
||||
}
|
||||
double profile_time = model.GetProfileTime() * 1000;
|
||||
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
} else {
|
||||
// For End2End
|
||||
// Step1: warm up for warmup times
|
||||
std::cout << "Warmup " << warmup << " times..." << std::endl;
|
||||
for (int i = 0; i < warmup; i++) {
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
if (!model.Predict(im, &res)) {
|
||||
std::cerr << "Failed to predict." << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::vector<float> end2end_statis;
|
||||
// Step2: repeat for repeats times
|
||||
std::cout << "Counting time..." << std::endl;
|
||||
fastdeploy::TimeCounter tc;
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (FLAGS_collect_memory_info && i % dump_period == 0) {
|
||||
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
|
||||
#if defined(WITH_GPU)
|
||||
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
|
||||
FLAGS_device_id);
|
||||
#endif
|
||||
}
|
||||
tc.Start();
|
||||
if (!model.Predict(im, &res)) {
|
||||
std::cerr << "Failed to predict." << std::endl;
|
||||
return false;
|
||||
}
|
||||
tc.End();
|
||||
end2end_statis.push_back(tc.Duration() * 1000);
|
||||
}
|
||||
float end2end = std::accumulate(end2end_statis.end() - repeats,
|
||||
end2end_statis.end(), 0.f) /
|
||||
repeats;
|
||||
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
int repeats = FLAGS_repeat;
|
||||
int warmup = FLAGS_warmup;
|
||||
int dump_period = FLAGS_dump_period;
|
||||
std::string cpu_mem_file_name = "result_cpu.txt";
|
||||
std::string gpu_mem_file_name = "result_gpu.txt";
|
||||
// Run model
|
||||
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
|
||||
cpu_mem_file_name, gpu_mem_file_name) != true) {
|
||||
exit(1);
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
#if defined(WITH_GPU)
|
||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
6
benchmark/cpp/benchmark_yolov5.cc
Executable file → Normal file
6
benchmark/cpp/benchmark_yolov5.cc
Executable file → Normal file
@@ -65,8 +65,10 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (FLAGS_collect_memory_info && i % dump_period == 0) {
|
||||
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
|
||||
#if defined(WITH_GPU)
|
||||
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
|
||||
FLAGS_device_id);
|
||||
#endif
|
||||
}
|
||||
tc.Start();
|
||||
if (!model.Predict(im, &res)) {
|
||||
@@ -102,9 +104,11 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
|
||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
#if defined(WITH_GPU)
|
||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -27,13 +27,14 @@ DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
|
||||
DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
|
||||
DEFINE_string(backend, "default",
|
||||
"The inference runtime backend, support: ['default', 'ort', "
|
||||
"'paddle', 'ov', 'trt', 'paddle_trt']");
|
||||
"'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
|
||||
DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
|
||||
DEFINE_bool(
|
||||
include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
|
||||
DEFINE_bool(
|
||||
use_fp16, false,
|
||||
"Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend");
|
||||
"Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
|
||||
"and 'lite' backend");
|
||||
DEFINE_bool(
|
||||
collect_memory_info, false, "Whether to collect memory info");
|
||||
DEFINE_int32(dump_period, 100, "How often to collect memory info.");
|
||||
@@ -58,7 +59,6 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
|
||||
option->UseTrtBackend();
|
||||
option->SetTrtInputShape("input", {1, 3, 112, 112});
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
option->EnablePaddleToTrt();
|
||||
}
|
||||
@@ -81,11 +81,16 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
||||
option->UseOpenVINOBackend();
|
||||
} else if (FLAGS_backend == "paddle") {
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (FLAGS_backend == "lite") {
|
||||
option->UsePaddleLiteBackend();
|
||||
if (FLAGS_use_fp16) {
|
||||
option->EnableLiteFP16();
|
||||
}
|
||||
} else if (FLAGS_backend == "default") {
|
||||
return true;
|
||||
} else {
|
||||
std::cout << "While inference with CPU, only support "
|
||||
"default/ort/ov/paddle now, "
|
||||
"default/ort/ov/paddle/lite now, "
|
||||
<< FLAGS_backend << " is not supported." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
@@ -32,7 +32,7 @@ class LiteBackend : public BaseBackend {
|
||||
LiteBackend() {}
|
||||
virtual ~LiteBackend() = default;
|
||||
|
||||
bool Init(const RuntimeOption& option);
|
||||
bool Init(const RuntimeOption& option) override;
|
||||
|
||||
bool Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs,
|
||||
|
118
scripts/android/build_android_cpp_with_benchmark.sh
Executable file
118
scripts/android/build_android_cpp_with_benchmark.sh
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set +x
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# mutable global variables
|
||||
# -------------------------------------------------------------------------------
|
||||
TOOLCHAIN=clang # gcc/clang toolchain
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# readonly global variables
|
||||
# -------------------------------------------------------------------------------
|
||||
readonly ROOT_PATH=$(pwd)
|
||||
readonly ANDROID_ABI=$1
|
||||
readonly ANDROID_PLATFORM="android-$2"
|
||||
readonly BUILD_ROOT=build/Android
|
||||
readonly BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-$2
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# tasks
|
||||
# -------------------------------------------------------------------------------
|
||||
__make_build_dir() {
|
||||
if [ ! -d "${BUILD_DIR}" ]; then
|
||||
echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..."
|
||||
if [ ! -d "${BUILD_ROOT}" ]; then
|
||||
mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !"
|
||||
fi
|
||||
mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !"
|
||||
else
|
||||
echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}"
|
||||
fi
|
||||
}
|
||||
|
||||
__check_cxx_envs() {
|
||||
if [ $LDFLAGS ]; then
|
||||
echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c"
|
||||
echo "unset it before crossing compiling ${ANDROID_ABI}"
|
||||
unset LDFLAGS
|
||||
fi
|
||||
if [ $CPPFLAGS ]; then
|
||||
echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c"
|
||||
echo "unset it before crossing compiling ${ANDROID_ABI}"
|
||||
unset CPPFLAGS
|
||||
fi
|
||||
if [ $CPLUS_INCLUDE_PATH ]; then
|
||||
echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c"
|
||||
echo "unset it before crossing compiling ${ANDROID_ABI}"
|
||||
unset CPLUS_INCLUDE_PATH
|
||||
fi
|
||||
if [ $C_INCLUDE_PATH ]; then
|
||||
echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c"
|
||||
echo "unset it before crossing compiling ${ANDROID_ABI}"
|
||||
unset C_INCLUDE_PATH
|
||||
fi
|
||||
}
|
||||
|
||||
__set_android_ndk() {
|
||||
if [ -z $ANDROID_NDK ]; then
|
||||
echo "-- [INFO] ANDROID_NDK not exists, please setup manually ..."
|
||||
exit 0
|
||||
else
|
||||
echo "-- [INFO] Found ANDROID_NDK: ${ANDROID_NDK}"
|
||||
fi
|
||||
if [ "$ANDROID_NDK" ]; then
|
||||
NDK_VERSION=$(echo $ANDROID_NDK | egrep -o "[0-9]{2}" | head -n 1)
|
||||
if [ "$NDK_VERSION" -gt 17 ]; then
|
||||
TOOLCHAIN=clang
|
||||
fi
|
||||
echo "-- [INFO] Checked ndk version: ${NDK_VERSION}"
|
||||
echo "-- [INFO] Selected toolchain: ${TOOLCHAIN}"
|
||||
fi
|
||||
}
|
||||
|
||||
__build_fastdeploy_android_shared() {
|
||||
|
||||
local ANDROID_STL=c++_shared # c++_static
|
||||
local ANDROID_TOOLCHAIN=${TOOLCHAIN}
|
||||
local TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake
|
||||
local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
|
||||
cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
|
||||
-DCMAKE_BUILD_TYPE=MinSizeRel \
|
||||
-DANDROID_ABI=${ANDROID_ABI} \
|
||||
-DANDROID_NDK=${ANDROID_NDK} \
|
||||
-DANDROID_PLATFORM=${ANDROID_PLATFORM} \
|
||||
-DANDROID_STL=${ANDROID_STL} \
|
||||
-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
|
||||
-DENABLE_ORT_BACKEND=OFF \
|
||||
-DENABLE_LITE_BACKEND=ON \
|
||||
-DENABLE_PADDLE2ONNX=OFF \
|
||||
-DENABLE_FLYCV=ON \
|
||||
-DENABLE_TEXT=OFF \
|
||||
-DENABLE_VISION=ON \
|
||||
-DBUILD_EXAMPLES=ON \
|
||||
-DENABLE_BENCHMARK=ON \
|
||||
-DWITH_OPENCV_STATIC=OFF \
|
||||
-DWITH_LITE_STATIC=OFF \
|
||||
-DWITH_OPENMP=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
|
||||
-Wno-dev ../../.. && make -j8 && make install
|
||||
|
||||
echo "-- [INFO][built][${ANDROID_ABI}][${BUILD_DIR}/install]"
|
||||
}
|
||||
|
||||
main() {
|
||||
__make_build_dir
|
||||
__check_cxx_envs
|
||||
__set_android_ndk
|
||||
__build_fastdeploy_android_shared
|
||||
exit 0
|
||||
}
|
||||
|
||||
main
|
||||
|
||||
# Usage:
|
||||
# ./scripts/android/build_android_cpp_with_benchmark.sh arm64-v8a 21
|
||||
# ./scripts/android/build_android_cpp_with_benchmark.sh armeabi-v7a 21
|
@@ -62,7 +62,7 @@ __build_fastdeploy_linux_x86_64_gpu_shared() {
|
||||
-DENABLE_OPENVINO_BACKEND=ON \
|
||||
-DENABLE_PADDLE2ONNX=ON \
|
||||
-DENABLE_VISION=ON \
|
||||
-DENABLE_BENCHMARK=ON \
|
||||
-DENABLE_BENCHMARK=OFF \
|
||||
-DBUILD_EXAMPLES=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
|
||||
-Wno-dev ../../.. && make -j8 && make install
|
||||
|
83
scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh
Executable file
83
scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set +x
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# readonly global variables
|
||||
# -------------------------------------------------------------------------------
|
||||
readonly ROOT_PATH=$(pwd)
|
||||
readonly BUILD_ROOT=build/Linux
|
||||
readonly BUILD_DIR="${BUILD_ROOT}/x86_64_gpu"
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# tasks
|
||||
# -------------------------------------------------------------------------------
|
||||
__make_build_dir() {
|
||||
if [ ! -d "${BUILD_DIR}" ]; then
|
||||
echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..."
|
||||
if [ ! -d "${BUILD_ROOT}" ]; then
|
||||
mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !"
|
||||
fi
|
||||
mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !"
|
||||
else
|
||||
echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}"
|
||||
fi
|
||||
}
|
||||
|
||||
__check_cxx_envs() {
|
||||
if [ $LDFLAGS ]; then
|
||||
echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c"
|
||||
echo "unset it before crossing compiling ${BUILD_DIR}"
|
||||
unset LDFLAGS
|
||||
fi
|
||||
if [ $CPPFLAGS ]; then
|
||||
echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c"
|
||||
echo "unset it before crossing compiling ${BUILD_DIR}"
|
||||
unset CPPFLAGS
|
||||
fi
|
||||
if [ $CPLUS_INCLUDE_PATH ]; then
|
||||
echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c"
|
||||
echo "unset it before crossing compiling ${BUILD_DIR}"
|
||||
unset CPLUS_INCLUDE_PATH
|
||||
fi
|
||||
if [ $C_INCLUDE_PATH ]; then
|
||||
echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c"
|
||||
echo "unset it before crossing compiling ${BUILD_DIR}"
|
||||
unset C_INCLUDE_PATH
|
||||
fi
|
||||
}
|
||||
|
||||
__build_fastdeploy_linux_x86_64_gpu_shared() {
|
||||
|
||||
local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install"
|
||||
cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}"
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=Release \
|
||||
-DWITH_GPU=ON \
|
||||
-DTRT_DIRECTORY=${TRT_DIRECTORY} \
|
||||
-DCUDA_DIRECTORY=${CUDA_DIRECTORY} \
|
||||
-DENABLE_ORT_BACKEND=ON \
|
||||
-DENABLE_TRT_BACKEND=ON \
|
||||
-DENABLE_PADDLE_BACKEND=ON \
|
||||
-DENABLE_OPENVINO_BACKEND=ON \
|
||||
-DENABLE_PADDLE2ONNX=ON \
|
||||
-DENABLE_VISION=ON \
|
||||
-DENABLE_BENCHMARK=ON \
|
||||
-DBUILD_EXAMPLES=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
|
||||
-Wno-dev ../../.. && make -j8 && make install
|
||||
|
||||
echo "-- [INFO][built][x86_64_gpu}][${BUILD_DIR}/install]"
|
||||
}
|
||||
|
||||
main() {
|
||||
__make_build_dir
|
||||
__check_cxx_envs
|
||||
__build_fastdeploy_linux_x86_64_gpu_shared
|
||||
exit 0
|
||||
}
|
||||
|
||||
main
|
||||
|
||||
# Usage:
|
||||
# ./scripts/linux/build_linux_x86_64_cpp_gpu.sh
|
Reference in New Issue
Block a user