diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt index 9706587d3..c79e679c3 100755 --- a/benchmark/cpp/CMakeLists.txt +++ b/benchmark/cpp/CMakeLists.txt @@ -9,9 +9,12 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include_directories(${FASTDEPLOY_INCS}) add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc) +add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc) if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread) else() target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags) endif() diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc new file mode 100644 index 000000000..4bd6e0df4 --- /dev/null +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/benchmark/utils.h" +#include "fastdeploy/vision.h" +#include "flags.h" + +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +bool RunModel(std::string model_dir, std::string image_file, size_t warmup, + size_t repeats, size_t dump_period, std::string cpu_mem_file_name, + std::string gpu_mem_file_name) { + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option)) { + PrintUsage(); + return false; + } + auto model_file = model_dir + sep + "model.pdmodel"; + auto params_file = model_dir + sep + "model.pdiparams"; + auto config_file = model_dir + sep + "infer_cfg.yml"; + + if (FLAGS_profile_mode == "runtime") { + option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup); + } + auto model = fastdeploy::vision::detection::PaddleYOLOv8( + model_file, params_file, config_file, option); + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return false; + } + auto im = cv::imread(image_file); + // For Runtime + if (FLAGS_profile_mode == "runtime") { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + double profile_time = model.GetProfileTime() * 1000; + std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } else { + // For End2End + // Step1: warm up for warmup times + std::cout << "Warmup " << warmup << " times..." << std::endl; + for (int i = 0; i < warmup; i++) { + fastdeploy::vision::DetectionResult res; + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + } + std::vector end2end_statis; + // Step2: repeat for repeats times + std::cout << "Counting time..." << std::endl; + fastdeploy::TimeCounter tc; + fastdeploy::vision::DetectionResult res; + for (int i = 0; i < repeats; i++) { + if (FLAGS_collect_memory_info && i % dump_period == 0) { + fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); +#if defined(WITH_GPU) + fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, + FLAGS_device_id); +#endif + } + tc.Start(); + if (!model.Predict(im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return false; + } + tc.End(); + end2end_statis.push_back(tc.Duration() * 1000); + } + float end2end = std::accumulate(end2end_statis.end() - repeats, + end2end_statis.end(), 0.f) / + repeats; + std::cout << "End2End(ms): " << end2end << "ms." << std::endl; + auto vis_im = fastdeploy::vision::VisDetection(im, res); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; + } + + return true; +} + +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + int repeats = FLAGS_repeat; + int warmup = FLAGS_warmup; + int dump_period = FLAGS_dump_period; + std::string cpu_mem_file_name = "result_cpu.txt"; + std::string gpu_mem_file_name = "result_gpu.txt"; + // Run model + if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, + cpu_mem_file_name, gpu_mem_file_name) != true) { + exit(1); + } + if (FLAGS_collect_memory_info) { + float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); + std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; +#if defined(WITH_GPU) + float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); + std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; +#endif + } + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100755 new mode 100644 index d84292536..ae16dd8d8 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -65,8 +65,10 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, for (int i = 0; i < repeats; i++) { if (FLAGS_collect_memory_info && i % dump_period == 0) { fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); +#if defined(WITH_GPU) fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, FLAGS_device_id); +#endif } tc.Start(); if (!model.Predict(im, &res)) { @@ -102,9 +104,11 @@ int main(int argc, char* argv[]) { } if (FLAGS_collect_memory_info) { float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); - float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; +#if defined(WITH_GPU) + float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; +#endif } return 0; } \ No newline at end of file diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 3d35eb313..c9a8e8d91 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -27,13 +27,14 @@ DEFINE_int32(repeat, 1000, "Number of repeats for profiling."); DEFINE_string(profile_mode, "runtime", "runtime or end2end."); DEFINE_string(backend, "default", "The inference runtime backend, support: ['default', 'ort', " - "'paddle', 'ov', 'trt', 'paddle_trt']"); + "'paddle', 'ov', 'trt', 'paddle_trt', 'lite']"); DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread."); DEFINE_bool( include_h2d_d2h, false, "Whether run profiling with h2d and d2h."); DEFINE_bool( use_fp16, false, - "Whether to use FP16 mode, only support 'trt' and 'paddle_trt' backend"); + "Whether to use FP16 mode, only support 'trt', 'paddle_trt' " + "and 'lite' backend"); DEFINE_bool( collect_memory_info, false, "Whether to collect memory info"); DEFINE_int32(dump_period, 100, "How often to collect memory info."); @@ -58,7 +59,6 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { option->UsePaddleInferBackend(); } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") { option->UseTrtBackend(); - option->SetTrtInputShape("input", {1, 3, 112, 112}); if (FLAGS_backend == "paddle_trt") { option->EnablePaddleToTrt(); } @@ -81,11 +81,16 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { option->UseOpenVINOBackend(); } else if (FLAGS_backend == "paddle") { option->UsePaddleInferBackend(); + } else if (FLAGS_backend == "lite") { + option->UsePaddleLiteBackend(); + if (FLAGS_use_fp16) { + option->EnableLiteFP16(); + } } else if (FLAGS_backend == "default") { return true; } else { std::cout << "While inference with CPU, only support " - "default/ort/ov/paddle now, " + "default/ort/ov/paddle/lite now, " << FLAGS_backend << " is not supported." << std::endl; return false; } diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index bd738545a..15e71b50a 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -32,7 +32,7 @@ class LiteBackend : public BaseBackend { LiteBackend() {} virtual ~LiteBackend() = default; - bool Init(const RuntimeOption& option); + bool Init(const RuntimeOption& option) override; bool Infer(std::vector& inputs, std::vector* outputs, diff --git a/scripts/android/build_android_cpp_with_benchmark.sh b/scripts/android/build_android_cpp_with_benchmark.sh new file mode 100755 index 000000000..4a2c4084c --- /dev/null +++ b/scripts/android/build_android_cpp_with_benchmark.sh @@ -0,0 +1,118 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# mutable global variables +# ------------------------------------------------------------------------------- +TOOLCHAIN=clang # gcc/clang toolchain + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly ANDROID_ABI=$1 +readonly ANDROID_PLATFORM="android-$2" +readonly BUILD_ROOT=build/Android +readonly BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-$2 + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset C_INCLUDE_PATH + fi +} + +__set_android_ndk() { + if [ -z $ANDROID_NDK ]; then + echo "-- [INFO] ANDROID_NDK not exists, please setup manually ..." + exit 0 + else + echo "-- [INFO] Found ANDROID_NDK: ${ANDROID_NDK}" + fi + if [ "$ANDROID_NDK" ]; then + NDK_VERSION=$(echo $ANDROID_NDK | egrep -o "[0-9]{2}" | head -n 1) + if [ "$NDK_VERSION" -gt 17 ]; then + TOOLCHAIN=clang + fi + echo "-- [INFO] Checked ndk version: ${NDK_VERSION}" + echo "-- [INFO] Selected toolchain: ${TOOLCHAIN}" + fi +} + +__build_fastdeploy_android_shared() { + + local ANDROID_STL=c++_shared # c++_static + local ANDROID_TOOLCHAIN=${TOOLCHAIN} + local TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + -DCMAKE_BUILD_TYPE=MinSizeRel \ + -DANDROID_ABI=${ANDROID_ABI} \ + -DANDROID_NDK=${ANDROID_NDK} \ + -DANDROID_PLATFORM=${ANDROID_PLATFORM} \ + -DANDROID_STL=${ANDROID_STL} \ + -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ + -DENABLE_ORT_BACKEND=OFF \ + -DENABLE_LITE_BACKEND=ON \ + -DENABLE_PADDLE2ONNX=OFF \ + -DENABLE_FLYCV=ON \ + -DENABLE_TEXT=OFF \ + -DENABLE_VISION=ON \ + -DBUILD_EXAMPLES=ON \ + -DENABLE_BENCHMARK=ON \ + -DWITH_OPENCV_STATIC=OFF \ + -DWITH_LITE_STATIC=OFF \ + -DWITH_OPENMP=OFF \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][${ANDROID_ABI}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __set_android_ndk + __build_fastdeploy_android_shared + exit 0 +} + +main + +# Usage: +# ./scripts/android/build_android_cpp_with_benchmark.sh arm64-v8a 21 +# ./scripts/android/build_android_cpp_with_benchmark.sh armeabi-v7a 21 diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu.sh b/scripts/linux/build_linux_x86_64_cpp_gpu.sh index 6f2b4ed7d..9ae91921e 100755 --- a/scripts/linux/build_linux_x86_64_cpp_gpu.sh +++ b/scripts/linux/build_linux_x86_64_cpp_gpu.sh @@ -62,7 +62,7 @@ __build_fastdeploy_linux_x86_64_gpu_shared() { -DENABLE_OPENVINO_BACKEND=ON \ -DENABLE_PADDLE2ONNX=ON \ -DENABLE_VISION=ON \ - -DENABLE_BENCHMARK=ON \ + -DENABLE_BENCHMARK=OFF \ -DBUILD_EXAMPLES=ON \ -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ -Wno-dev ../../.. && make -j8 && make install diff --git a/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh new file mode 100755 index 000000000..6f2b4ed7d --- /dev/null +++ b/scripts/linux/build_linux_x86_64_cpp_gpu_with_benchmark.sh @@ -0,0 +1,83 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly BUILD_ROOT=build/Linux +readonly BUILD_DIR="${BUILD_ROOT}/x86_64_gpu" + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${BUILD_DIR}" + unset C_INCLUDE_PATH + fi +} + +__build_fastdeploy_linux_x86_64_gpu_shared() { + + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DCMAKE_BUILD_TYPE=Release \ + -DWITH_GPU=ON \ + -DTRT_DIRECTORY=${TRT_DIRECTORY} \ + -DCUDA_DIRECTORY=${CUDA_DIRECTORY} \ + -DENABLE_ORT_BACKEND=ON \ + -DENABLE_TRT_BACKEND=ON \ + -DENABLE_PADDLE_BACKEND=ON \ + -DENABLE_OPENVINO_BACKEND=ON \ + -DENABLE_PADDLE2ONNX=ON \ + -DENABLE_VISION=ON \ + -DENABLE_BENCHMARK=ON \ + -DBUILD_EXAMPLES=ON \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][x86_64_gpu}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __build_fastdeploy_linux_x86_64_gpu_shared + exit 0 +} + +main + +# Usage: +# ./scripts/linux/build_linux_x86_64_cpp_gpu.sh