diff --git a/benchmark/cpp/benchmark.cc b/benchmark/cpp/benchmark.cc index 687e4b584..fef7c267d 100644 --- a/benchmark/cpp/benchmark.cc +++ b/benchmark/cpp/benchmark.cc @@ -33,16 +33,11 @@ DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt", "The paths to dumped tensors."); DEFINE_bool(mem, false, "Whether to force to collect memory info."); DEFINE_int32(interval, -1, "Sampling interval for collect memory info."); -DEFINE_string(model_file, "UNKNOWN", - "Optional, set specific model file," - "eg, model.pdmodel, model.onnx"); -DEFINE_string(params_file, "", - "Optional, set specific params file," - "eg, model.pdiparams."); DEFINE_string(model_format, "PADDLE", "Optional, set specific model format," "eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO"); DEFINE_bool(disable_mkldnn, false, "disable mkldnn for paddle backend"); +DEFINE_string(optimized_model_dir, "", "Set optimized model dir for lite backend."); #if defined(ENABLE_BENCHMARK) static std::vector GetInt64Shape(const std::vector& shape) { @@ -117,15 +112,25 @@ static void RuntimeProfiling(int argc, char* argv[]) { auto model_format = fastdeploy::ModelFormat::PADDLE; if (FLAGS_model_file != "UNKNOWN") { // Set model file/param/format via command line - model_file = FLAGS_model + sep + FLAGS_model_file; - params_file = FLAGS_model + sep + FLAGS_params_file; + if (FLAGS_model != "") { + model_file = FLAGS_model + sep + FLAGS_model_file; + params_file = FLAGS_model + sep + FLAGS_params_file; + } else { + model_file = FLAGS_model_file; + params_file = FLAGS_params_file; + } model_format = GetModelFormat(FLAGS_model_format); if (model_format == fastdeploy::ModelFormat::PADDLE && FLAGS_params_file == "") { - std::cout << "[ERROR] params_file can not be empty for PADDLE" + if (config_info["backend"] != "lite") { + std::cout << "[ERROR] params_file can not be empty for PADDLE" << " format, Please, set your custom params_file manually." << std::endl; - return; + return; + } else { + std::cout << "[INFO] Will using the lite light api for: " + << model_file << std::endl; + } } } else { // Set model file/param/format via model dir (only support @@ -140,6 +145,16 @@ static void RuntimeProfiling(int argc, char* argv[]) { option.SetModelPath(model_file, params_file, model_format); + // Set opt model dir + if (config_info["backend"] == "lite") { + if (FLAGS_optimized_model_dir != "") { + option.paddle_lite_option.optimized_model_dir = + FLAGS_optimized_model_dir; + } else { + option.paddle_lite_option.optimized_model_dir = FLAGS_model; + } + } + // Get input shapes/names/dtypes std::vector> input_shapes = benchmark::ResultManager::GetInputShapes(FLAGS_shapes); diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 25ab8fe71..a126ae8fb 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -33,6 +33,12 @@ DEFINE_string(config_path, "config.txt", "Path of benchmark config."); DEFINE_int32(warmup, -1, "Number of warmup for profiling."); DEFINE_int32(repeat, -1, "Number of repeats for profiling."); DEFINE_int32(xpu_l3_cache, -1, "Size xpu l3 cache for profiling."); +DEFINE_string(model_file, "UNKNOWN", + "Optional, set specific model file," + "eg, model.pdmodel, model.onnx"); +DEFINE_string(params_file, "", + "Optional, set specific params file," + "eg, model.pdiparams."); static void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path " @@ -50,8 +56,12 @@ static void PrintBenchmarkInfo(std::unordered_map model_names; fastdeploy::benchmark::Split(FLAGS_model, model_names, sep); if (model_names.empty()) { - std::cout << "Directory of the inference model is invalid!!!" << std::endl; - return; + if (FLAGS_model_file != "UNKNOWN") { + model_names.push_back(FLAGS_model_file); + } else { + std::cout << "[WARNING] Directory of the inference model is empty!!!" + << std::endl; + } } // Save benchmark info int warmup = std::stoi(config_info["warmup"]); @@ -65,7 +75,9 @@ static void PrintBenchmarkInfo(std::unordered_map( - config_); - if (option_.optimized_model_dir != "") { - FDINFO << "Optimzed model dir is not empty, will save optimized model to: " - << option_.optimized_model_dir << std::endl; - predictor_->SaveOptimizedModel( - option_.optimized_model_dir, - paddle::lite_api::LiteModelType::kNaiveBuffer); + if (runtime_option.params_file == "") { + // Use light api for Arm CPU via MobileConfig. + FDASSERT(runtime_option.device == Device::CPU, + "In FastDeploy, Paddle Lite light API is only support for Arm CPU now!") + mobile_config_.set_model_from_file(runtime_option.model_file); + mobile_config_.set_threads(runtime_option.paddle_lite_option.cpu_threads); + mobile_config_.set_power_mode(static_cast( + runtime_option.paddle_lite_option.power_mode)); + // TODO(qiuyanjun): Add OpenCL support for mobile gpu. + // Paddle-Lite/blob/develop/lite/api/tools/benchmark/benchmark.h#L265 + // mobile_config_.set_opencl_tune( + // tune_mode, opencl_cache_dir, opencl_tuned_file); + // mobile_config_.set_opencl_precision(gpu_precision); + predictor_ = + paddle::lite_api::CreatePaddlePredictor( + mobile_config_); + } else { + // Use full api for many hardwares via CxxConfig. + config_.set_model_file(runtime_option.model_file); + config_.set_param_file(runtime_option.params_file); + BuildOption(runtime_option.paddle_lite_option); + predictor_ = + paddle::lite_api::CreatePaddlePredictor( + config_); + if (option_.optimized_model_dir != "") { + FDINFO << "Optimzed model dir is not empty, will save optimized model to: " + << option_.optimized_model_dir << std::endl; + predictor_->SaveOptimizedModel( + option_.optimized_model_dir, + paddle::lite_api::LiteModelType::kNaiveBuffer); + } } - + inputs_desc_.clear(); outputs_desc_.clear(); inputs_order_.clear(); diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index aaad37b94..90f0f900e 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -48,8 +48,9 @@ class LiteBackend : public BaseBackend { std::vector GetOutputInfos() override; private: + // Build CxxConfig from option for Paddle Lite full api. void BuildOption(const LiteBackendOption& option); - + // Configure many hardwares for Paddle Lite full api. void ConfigureCpu(const LiteBackendOption& option); void ConfigureGpu(const LiteBackendOption& option); void ConfigureTimvx(const LiteBackendOption& option); @@ -59,6 +60,7 @@ class LiteBackend : public BaseBackend { paddle::lite_api::CxxConfig config_; std::shared_ptr predictor_; + paddle::lite_api::MobileConfig mobile_config_; std::vector inputs_desc_; std::vector outputs_desc_; std::map inputs_order_; diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index 410ec6034..9b90c861e 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -61,6 +61,9 @@ struct LiteBackendOption { Device device = Device::CPU; // Index of inference device int device_id = 0; + // TODO(qiuyanjun): add opencl binary path and cache settings. + std::string opencl_cache_dir = "/data/local/tmp/"; + std::string opencl_tuned_file = "/data/local/tmp/opencl_tuned_kernels.bin"; /// kunlunxin_l3_workspace_size int kunlunxin_l3_workspace_size = 0xfffc00; diff --git a/scripts/android/build_android_cpp_opencl_with_benchmark.sh b/scripts/android/build_android_cpp_opencl_with_benchmark.sh new file mode 100755 index 000000000..5c9eb25f4 --- /dev/null +++ b/scripts/android/build_android_cpp_opencl_with_benchmark.sh @@ -0,0 +1,120 @@ +#!/bin/bash +set -e +set +x + +# ------------------------------------------------------------------------------- +# mutable global variables +# ------------------------------------------------------------------------------- +TOOLCHAIN=clang # gcc/clang toolchain + +# ------------------------------------------------------------------------------- +# readonly global variables +# ------------------------------------------------------------------------------- +readonly ROOT_PATH=$(pwd) +readonly ANDROID_ABI=$1 +readonly ANDROID_PLATFORM="android-$2" +readonly BUILD_ROOT=build/Android +readonly BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-$2 + +# ------------------------------------------------------------------------------- +# tasks +# ------------------------------------------------------------------------------- +__make_build_dir() { + if [ ! -d "${BUILD_DIR}" ]; then + echo "-- [INFO] BUILD_DIR: ${BUILD_DIR} not exists, setup manually ..." + if [ ! -d "${BUILD_ROOT}" ]; then + mkdir -p "${BUILD_ROOT}" && echo "-- [INFO] Created ${BUILD_ROOT} !" + fi + mkdir -p "${BUILD_DIR}" && echo "-- [INFO] Created ${BUILD_DIR} !" + else + echo "-- [INFO] Found BUILD_DIR: ${BUILD_DIR}" + fi +} + +__check_cxx_envs() { + if [ $LDFLAGS ]; then + echo "-- [INFO] Found LDFLAGS: ${LDFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset LDFLAGS + fi + if [ $CPPFLAGS ]; then + echo "-- [INFO] Found CPPFLAGS: ${CPPFLAGS}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPPFLAGS + fi + if [ $CPLUS_INCLUDE_PATH ]; then + echo "-- [INFO] Found CPLUS_INCLUDE_PATH: ${CPLUS_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset CPLUS_INCLUDE_PATH + fi + if [ $C_INCLUDE_PATH ]; then + echo "-- [INFO] Found C_INCLUDE_PATH: ${C_INCLUDE_PATH}, \c" + echo "unset it before crossing compiling ${ANDROID_ABI}" + unset C_INCLUDE_PATH + fi +} + +__set_android_ndk() { + if [ -z $ANDROID_NDK ]; then + echo "-- [INFO] ANDROID_NDK not exists, please setup manually ..." + exit 0 + else + echo "-- [INFO] Found ANDROID_NDK: ${ANDROID_NDK}" + fi + if [ "$ANDROID_NDK" ]; then + NDK_VERSION=$(echo $ANDROID_NDK | egrep -o "[0-9]{2}" | head -n 1) + if [ "$NDK_VERSION" -gt 17 ]; then + TOOLCHAIN=clang + fi + echo "-- [INFO] Checked ndk version: ${NDK_VERSION}" + echo "-- [INFO] Selected toolchain: ${TOOLCHAIN}" + fi +} + +__build_fastdeploy_android_shared() { + + local ANDROID_STL=c++_shared # c++_static + local ANDROID_TOOLCHAIN=${TOOLCHAIN} + local TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake + local FASDEPLOY_INSTALL_DIR="${ROOT_PATH}/${BUILD_DIR}/install" + cd "${BUILD_DIR}" && echo "-- [INFO] Working Dir: ${PWD}" + + cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + -DCMAKE_BUILD_TYPE=MinSizeRel \ + -DANDROID_ABI=${ANDROID_ABI} \ + -DANDROID_NDK=${ANDROID_NDK} \ + -DANDROID_PLATFORM=${ANDROID_PLATFORM} \ + -DANDROID_STL=${ANDROID_STL} \ + -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ + -DENABLE_ORT_BACKEND=OFF \ + -DENABLE_LITE_BACKEND=ON \ + -DENABLE_PADDLE2ONNX=OFF \ + -DENABLE_FLYCV=ON \ + -DENABLE_TEXT=OFF \ + -DENABLE_VISION=ON \ + -DBUILD_EXAMPLES=OFF \ + -DENABLE_BENCHMARK=ON \ + -DWITH_OPENCL=ON \ + -DWITH_ANDROID_OPENCV_STATIC=OFF \ + -DWITH_ANDROID_LITE_STATIC=OFF \ + -DWITH_ANDROID_OPENMP=OFF \ + -DWITH_TESTING=OFF \ + -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \ + -Wno-dev ../../.. && make -j8 && make install + + echo "-- [INFO][built][${ANDROID_ABI}][${BUILD_DIR}/install]" +} + +main() { + __make_build_dir + __check_cxx_envs + __set_android_ndk + __build_fastdeploy_android_shared + exit 0 +} + +main + +# Usage: +# ./scripts/android/build_android_cpp_opencl_with_benchmark.sh arm64-v8a 21 +# ./scripts/android/build_android_cpp_opencl_with_benchmark.sh armeabi-v7a 21