diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18ee7cce3..d1569f65a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,25 +1,7 @@ name: Build -on: [push, pull_request] +on: [pull_request] jobs: - macOS-latest-cpp: - runs-on: macOS-latest - - steps: - - name: Clone - uses: actions/checkout@v1 - - - name: Get CMake - uses: lukka/get-cmake@latest - - - name: Build FastDeploy - run: | - mkdir build - cd build - cmake .. -DENABLE_ORT_BACKEND=ON -DENABLE_PADDLE_BACKEND=OFF -DENABLE_OPENVINO_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk -DENABLE_VISION=ON -DENABLE_TEXT=ON - make -j12 - make install - ls -l macOS-latest-py: runs-on: macos-latest diff --git a/README_CN.md b/README_CN.md index 3057359d6..88d0fea59 100755 --- a/README_CN.md +++ b/README_CN.md @@ -289,7 +289,7 @@ int main(int argc, char* argv[]) { | Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | diff --git a/README_EN.md b/README_EN.md index 64b349fbd..8390d948e 100644 --- a/README_EN.md +++ b/README_EN.md @@ -275,7 +275,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | diff --git a/cmake/ascend.cmake b/cmake/ascend.cmake index 3e22aa92c..1f4ff80b9 100644 --- a/cmake/ascend.cmake +++ b/cmake/ascend.cmake @@ -26,7 +26,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") else () message(STATUS "Build FastDeploy Ascend Python library on aarch64 platform.") if(NOT PADDLELITE_URL) - set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0118.tgz") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0202.tgz") endif() endif() endif() diff --git a/docs/cn/faq/rknpu2/build.md b/docs/cn/faq/rknpu2/build.md index c99bcb17f..7389d2396 100644 --- a/docs/cn/faq/rknpu2/build.md +++ b/docs/cn/faq/rknpu2/build.md @@ -47,8 +47,7 @@ cmake .. -DCMAKE_C_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch -DENABLE_ORT_BACKEND=OFF \ -DENABLE_RKNPU2_BACKEND=ON \ -DENABLE_VISION=ON \ - -DRKNN2_TARGET_SOC=RK3588 \ - -DENABLE_FLYCV=ON \ + -DRKNN2_TARGET_SOC=RK356X \ -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 make -j8 make install diff --git a/examples/vision/detection/rkyolo/cpp/CMakeLists.txt b/examples/vision/detection/rkyolo/cpp/CMakeLists.txt index 524b94fea..c29281d84 100644 --- a/examples/vision/detection/rkyolo/cpp/CMakeLists.txt +++ b/examples/vision/detection/rkyolo/cpp/CMakeLists.txt @@ -4,34 +4,9 @@ project(rknpu2_test) set(CMAKE_CXX_STANDARD 14) # 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3") - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake) -include_directories(${FastDeploy_INCLUDE_DIRS}) - +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) add_executable(infer_rkyolo infer_rkyolo.cc) -target_link_libraries(infer_rkyolo ${FastDeploy_LIBS}) - - - -set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install) - -install(TARGETS infer_rkyolo DESTINATION ./) - -install(DIRECTORY model DESTINATION ./) -install(DIRECTORY images DESTINATION ./) - -file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*) -message("${FASTDEPLOY_LIBS}") -install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib) - -file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*) -install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib) - -install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./) - -file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*) -install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib) - -file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/${RKNN2_TARGET_SOC}/lib/*) -install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib) +target_link_libraries(infer_rkyolo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/detection/rkyolo/cpp/README_CN.md b/examples/vision/detection/rkyolo/cpp/README_CN.md index 924e34984..7d2827173 100644 --- a/examples/vision/detection/rkyolo/cpp/README_CN.md +++ b/examples/vision/detection/rkyolo/cpp/README_CN.md @@ -10,58 +10,12 @@ 以上步骤请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)实现 -## 生成基本目录文件 - -该例程由以下几个部分组成 -```text -. -├── CMakeLists.txt -├── build # 编译文件夹 -├── image # 存放图片的文件夹 -├── infer_rkyolo.cc -├── model # 存放模型文件的文件夹 -└── thirdpartys # 存放sdk的文件夹 -``` - -首先需要先生成目录结构 -```bash -mkdir build -mkdir images -mkdir model -mkdir thirdpartys -``` - -## 编译 - -### 编译并拷贝SDK到thirdpartys文件夹 - -请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK,编译完成后,将在build目录下生成 -fastdeploy-0.0.3目录,请移动它至thirdpartys目录下. - -### 拷贝模型文件,以及配置文件至model文件夹 -在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中,将生成ONNX文件以及对应的yaml配置文件,请将配置文件存放到model文件夹内。 -转换为RKNN后的模型文件也需要拷贝至model。 - -### 准备测试图片至image文件夹 ```bash wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg -cp 000000014439.jpg ./images -``` - -### 编译example - -```bash cd build -cmake .. +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x make -j8 -make install -``` - -## 运行例程 - -```bash -cd ./build/install -./infer_picodet model/ images/000000014439.jpg +./infer_rkyolo /path/to/model 000000014439.jpg ``` diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc new file mode 100644 index 000000000..7c7a9993c --- /dev/null +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -0,0 +1,159 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime/backends/lite/lite_backend.h" +// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 +// When compiling the FastDeploy dynamic library, namely, +// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite +// static library, you need to include the fake registration +// codes of Paddle Lite. When you compile the FastDeploy static +// library and depends on the Paddle Lite static library, +// WITH_STATIC_LIB=ON, you do not need to include the fake +// registration codes for Paddle Lite, but wait until you +// use the FastDeploy static library. +#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB))) +#warning You are compiling the FastDeploy dynamic library with \ +Paddle Lite static lib We will automatically add some registration \ +codes for ops, kernels and passes for Paddle Lite. +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT +#endif + +#include + +namespace fastdeploy { + +#if defined(__arm__) || defined(__aarch64__) +#define FD_LITE_HOST TARGET(kARM) +#elif defined(__x86_64__) +#define FD_LITE_HOST TARGET(kX86) +#endif + +std::vector GetPlacesForCpu( + const LiteBackendOption& option) { + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)}); + if (option.enable_fp16) { + paddle::lite_api::MobileConfig check_fp16_config; + if (check_fp16_config.check_fp16_valid()) { + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)}); + } else { + FDWARNING << "Current CPU doesn't support float16 precision, will " + "fallback to float32." + << std::endl; + } + } + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)}); + return valid_places; +} + +void LiteBackend::ConfigureCpu(const LiteBackendOption& option) { + config_.set_valid_places(GetPlacesForCpu(option)); +} + +void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); + if (option.enable_fp16) { + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); + } + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); + + config_.set_xpu_dev_per_thread(option.device_id); + config_.set_xpu_workspace_l3_size_per_thread( + option.kunlunxin_l3_workspace_size); + config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size, + option.kunlunxin_locked); + config_.set_xpu_conv_autotune(option.kunlunxin_autotune, + option.kunlunxin_autotune_file); + config_.set_xpu_multi_encoder_method(option.kunlunxin_precision, + option.kunlunxin_adaptive_seqlen); + if (option.kunlunxin_enable_multi_stream) { + config_.enable_xpu_multi_stream(); + } + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); +} + +void LiteBackend::ConfigureTimvx(const LiteBackendOption& option) { + config_.set_nnadapter_device_names({"verisilicon_timvx"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureAscend(const LiteBackendOption& option) { + config_.set_nnadapter_device_names({"huawei_ascend_npu"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureNNAdapter(const LiteBackendOption& option) { + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + std::vector nnadapter_subgraph_partition_config_buffer; + if (ReadFile(option.nnadapter_subgraph_partition_config_path, + &nnadapter_subgraph_partition_config_buffer, false)) { + if (!nnadapter_subgraph_partition_config_buffer.empty()) { + std::string nnadapter_subgraph_partition_config_string( + nnadapter_subgraph_partition_config_buffer.data(), + nnadapter_subgraph_partition_config_buffer.size()); + config_.set_nnadapter_subgraph_partition_config_buffer( + nnadapter_subgraph_partition_config_string); + } + } + } + + if (!option.nnadapter_context_properties.empty()) { + config_.set_nnadapter_context_properties( + option.nnadapter_context_properties); + } + + if (!option.nnadapter_model_cache_dir.empty()) { + config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir); + } + + if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) { + config_.set_nnadapter_mixed_precision_quantization_config_path( + option.nnadapter_mixed_precision_quantization_config_path); + } + + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + config_.set_nnadapter_subgraph_partition_config_path( + option.nnadapter_subgraph_partition_config_path); + } + + config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info); +} +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/lite_backend.cc b/fastdeploy/runtime/backends/lite/lite_backend.cc index f20c23c57..f9d47a7a5 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.cc +++ b/fastdeploy/runtime/backends/lite/lite_backend.cc @@ -14,164 +14,41 @@ #include "fastdeploy/runtime/backends/lite/lite_backend.h" // https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 -// When compiling the FastDeploy dynamic library, namely, -// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite +// When compiling the FastDeploy dynamic library, namely, +// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite // static library, you need to include the fake registration -// codes of Paddle Lite. When you compile the FastDeploy static -// library and depends on the Paddle Lite static library, -// WITH_STATIC_LIB=ON, you do not need to include the fake -// registration codes for Paddle Lite, but wait until you +// codes of Paddle Lite. When you compile the FastDeploy static +// library and depends on the Paddle Lite static library, +// WITH_STATIC_LIB=ON, you do not need to include the fake +// registration codes for Paddle Lite, but wait until you // use the FastDeploy static library. #if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB))) #warning You are compiling the FastDeploy dynamic library with \ Paddle Lite static lib We will automatically add some registration \ codes for ops, kernels and passes for Paddle Lite. -#include "paddle_use_ops.h" // NOLINT -#include "paddle_use_kernels.h" // NOLINT -#include "paddle_use_passes.h" // NOLINT +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT #endif #include namespace fastdeploy { -// Convert data type from paddle lite to fastdeploy -FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) { - if (dtype == paddle::lite_api::PrecisionType::kFloat) { - return FDDataType::FP32; - } else if (dtype == paddle::lite_api::PrecisionType::kInt8) { - return FDDataType::INT8; - } else if (dtype == paddle::lite_api::PrecisionType::kInt32) { - return FDDataType::INT32; - } else if (dtype == paddle::lite_api::PrecisionType::kInt64) { - return FDDataType::INT64; - } else if (dtype == paddle::lite_api::PrecisionType::kInt16) { - return FDDataType::INT16; - } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) { - return FDDataType::UINT8; - } else if (dtype == paddle::lite_api::PrecisionType::kFP64) { - return FDDataType::FP64; - } - FDASSERT(false, "Unexpected data type of %d.", dtype); - return FDDataType::FP32; -} - void LiteBackend::BuildOption(const LiteBackendOption& option) { option_ = option; - std::vector valid_places; - if (option_.enable_int8) { - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); - } else { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will " - << "inference with int8 precision!" << std::endl; + + if (option_.device == Device::CPU) { + ConfigureCpu(option_); + } else if (option_.device == Device::TIMVX) { + ConfigureTimvx(option_); + } else if (option_.device == Device::KUNLUNXIN) { + ConfigureKunlunXin(option_); + } else if (option_.device == Device::ASCEND) { + ConfigureAscend(option_); } - if (option_.enable_fp16) { - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); - } else { - paddle::lite_api::MobileConfig check_fp16_config; - // Determine whether the device supports the FP16 - // instruction set (or whether it is an arm device - // of the armv8.2 architecture) - supported_fp16_ = check_fp16_config.check_fp16_valid(); - if (supported_fp16_) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)}); - FDINFO << "The device supports FP16, Lite::Backend will inference with " - "FP16 precision." - << std::endl; - } else { - FDWARNING << "The device doesn't support FP16, will fallback to FP32."; - } - } - } - if (!option_.nnadapter_subgraph_partition_config_path.empty()) { - std::vector nnadapter_subgraph_partition_config_buffer; - if (ReadFile(option_.nnadapter_subgraph_partition_config_path, - &nnadapter_subgraph_partition_config_buffer, false)) { - if (!nnadapter_subgraph_partition_config_buffer.empty()) { - std::string nnadapter_subgraph_partition_config_string( - nnadapter_subgraph_partition_config_buffer.data(), - nnadapter_subgraph_partition_config_buffer.size()); - config_.set_nnadapter_subgraph_partition_config_buffer( - nnadapter_subgraph_partition_config_string); - } - } - } - if (option_.enable_timvx) { - config_.set_nnadapter_device_names({"verisilicon_timvx"}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - - if (option_.enable_ascend) { - if (option_.nnadapter_device_names.empty()) { - config_.set_nnadapter_device_names({"huawei_ascend_npu"}); - } else { - config_.set_nnadapter_device_names(option_.nnadapter_device_names); - } - - if (!option_.nnadapter_context_properties.empty()) { - config_.set_nnadapter_context_properties( - option_.nnadapter_context_properties); - } - - if (!option_.nnadapter_model_cache_dir.empty()) { - config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir); - } - - if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) { - config_.set_nnadapter_mixed_precision_quantization_config_path( - option_.nnadapter_mixed_precision_quantization_config_path); - } - - if (!option_.nnadapter_subgraph_partition_config_path.empty()) { - config_.set_nnadapter_subgraph_partition_config_path( - option_.nnadapter_subgraph_partition_config_path); - } - - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)}); - config_.set_xpu_dev_per_thread(option_.device_id); - config_.set_xpu_workspace_l3_size_per_thread( - option_.kunlunxin_l3_workspace_size); - config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size, - option_.kunlunxin_locked); - config_.set_xpu_conv_autotune(option_.kunlunxin_autotune, - option_.kunlunxin_autotune_file); - config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision, - option_.kunlunxin_adaptive_seqlen); - if (option_.kunlunxin_enable_multi_stream) { - config_.enable_xpu_multi_stream(); - } - } else { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}); - } - config_.set_valid_places(valid_places); - if (option_.threads > 0) { - config_.set_threads(option_.threads); + if (option_.cpu_threads > 0) { + config_.set_threads(option_.cpu_threads); } if (option_.power_mode > 0) { config_.set_power_mode( @@ -179,29 +56,6 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { } } -bool LiteBackend::ReadFile(const std::string& filename, - std::vector* contents, const bool binary) { - FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r"); - if (!fp) { - FDERROR << "Cannot open file " << filename << "." << std::endl; - return false; - } - fseek(fp, 0, SEEK_END); - size_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - contents->clear(); - contents->resize(size); - size_t offset = 0; - char* ptr = reinterpret_cast(&(contents->at(0))); - while (offset < size) { - size_t already_read = fread(ptr, 1, size - offset, fp); - offset += already_read; - ptr += already_read; - } - fclose(fp); - return true; -} - bool LiteBackend::InitFromPaddle(const std::string& model_file, const std::string& params_file, const LiteBackendOption& option) { @@ -246,7 +100,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file, auto shape = tensor->shape(); info.shape.assign(shape.begin(), shape.end()); info.name = output_names[i]; - if (!option_.enable_kunlunxin) { + if (!option_.device == Device::KUNLUNXIN) { info.dtype = LiteDataTypeToFD(tensor->precision()); } outputs_desc_.emplace_back(info); @@ -337,4 +191,49 @@ bool LiteBackend::Infer(std::vector& inputs, return true; } +bool ReadFile(const std::string& filename, std::vector* contents, + bool binary) { + FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r"); + if (!fp) { + FDERROR << "Cannot open file " << filename << "." << std::endl; + return false; + } + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + contents->clear(); + contents->resize(size); + size_t offset = 0; + char* ptr = reinterpret_cast(&(contents->at(0))); + while (offset < size) { + size_t already_read = fread(ptr, 1, size - offset, fp); + offset += already_read; + ptr += already_read; + } + fclose(fp); + return true; +} + +// Convert data type from paddle lite to fastdeploy +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) { + if (dtype == paddle::lite_api::PrecisionType::kFloat) { + return FDDataType::FP32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt8) { + return FDDataType::INT8; + } else if (dtype == paddle::lite_api::PrecisionType::kInt32) { + return FDDataType::INT32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt64) { + return FDDataType::INT64; + } else if (dtype == paddle::lite_api::PrecisionType::kInt16) { + return FDDataType::INT16; + } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) { + return FDDataType::UINT8; + } else if (dtype == paddle::lite_api::PrecisionType::kFP64) { + return FDDataType::FP64; + } + FDASSERT(false, "Unexpected data type of %s.", + paddle::lite_api::PrecisionToStr(dtype).c_str()); + return FDDataType::FP32; +} + } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index 8a7b2651c..bb01551a0 100755 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -19,13 +19,12 @@ #include #include -#include "fastdeploy/runtime/backends/backend.h" -#include "fastdeploy/runtime/backends/lite/option.h" #include "paddle_api.h" // NOLINT +#include "fastdeploy/runtime/backends/backend.h" +#include "fastdeploy/runtime/backends/lite/option.h" + namespace fastdeploy { -// Convert data type from paddle lite to fastdeploy -FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); class LiteBackend : public BaseBackend { public: @@ -51,15 +50,26 @@ class LiteBackend : public BaseBackend { std::vector GetOutputInfos() override; private: + void ConfigureCpu(const LiteBackendOption& option); + void ConfigureTimvx(const LiteBackendOption& option); + void ConfigureAscend(const LiteBackendOption& option); + void ConfigureKunlunXin(const LiteBackendOption& option); + void ConfigureNNAdapter(const LiteBackendOption& option); + paddle::lite_api::CxxConfig config_; std::shared_ptr predictor_; std::vector inputs_desc_; std::vector outputs_desc_; std::map inputs_order_; LiteBackendOption option_; - bool supported_fp16_ = false; - bool ReadFile(const std::string& filename, - std::vector* contents, - const bool binary = true); }; + +// Convert data type from paddle lite to fastdeploy +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); + +// Helper function to read file +bool ReadFile(const std::string& filename, + std::vector* contents, + bool binary = true); + } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index fc1dfa919..879cb3472 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -17,7 +17,7 @@ #include "fastdeploy/core/fd_type.h" // https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 #if (defined(WITH_LITE_STATIC) && defined(WITH_STATIC_LIB)) -// Whether to output some warning messages when using the +// Whether to output some warning messages when using the // FastDepoy static library, default OFF. These messages // are only reserve for debugging. #if defined(WITH_STATIC_WARNING) @@ -36,7 +36,7 @@ ops, kernels and passes for Paddle Lite. #include #include #include -#include +#include namespace fastdeploy { @@ -51,24 +51,19 @@ enum LitePowerMode { }; struct LiteBackendOption { - // cpu num threads - int threads = 1; - // lite power mode - // 0: LITE_POWER_HIGH - // 1: LITE_POWER_LOW - // 2: LITE_POWER_FULL - // 3: LITE_POWER_NO_BIND - // 4: LITE_POWER_RAND_HIGH - // 5: LITE_POWER_RAND_LOW - int power_mode = 3; - // enable fp16 + /// Paddle Lite power mode for mobile device. + LitePowerMode power_mode = LITE_POWER_NO_BIND; + /// Number of threads while use CPU + int cpu_threads = 1; + /// Enable use half precision bool enable_fp16 = false; - // enable int8 + /// Enable use int8 precision for quantized model bool enable_int8 = false; + + Device device = Device::CPU; + // optimized model dir for CxxConfig std::string optimized_model_dir = ""; - // TODO(qiuyanjun): support more options for lite backend. - // Such as fp16, different device target (kARM/kXPU/kNPU/...) std::string nnadapter_subgraph_partition_config_path = ""; std::string nnadapter_subgraph_partition_config_buffer = ""; std::string nnadapter_context_properties = ""; @@ -77,9 +72,6 @@ struct LiteBackendOption { std::map>> nnadapter_dynamic_shape_info = {{"", {{0}}}}; std::vector nnadapter_device_names = {}; - bool enable_timvx = false; - bool enable_ascend = false; - bool enable_kunlunxin = false; int device_id = 0; int kunlunxin_l3_workspace_size = 0xfffc00; bool kunlunxin_locked = false; diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index 7b96aeff9..90bd27682 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -92,23 +92,19 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { bool PaddleBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const PaddleBackendOption& option) { - // bool PaddleBackend::InitFromPaddle(const std::string& contents) { if (initialized_) { FDERROR << "PaddleBackend is already initlized, cannot initialize again." << std::endl; return false; } - - // The input/output information get from predictor is not right, use - // PaddleReader instead now - std::string contents; - config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), params_buffer.size()); - contents = model_buffer; config_.EnableMemoryOptim(); BuildOption(option); - auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); + + // The input/output information get from predictor is not right, use + // PaddleReader instead now + auto reader = paddle2onnx::PaddleReader(model_buffer.c_str(), model_buffer.size()); // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to // int8 mode if (reader.is_quantize_model) { diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 0798786b9..7dee5365a 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -58,7 +58,7 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, void RuntimeOption::UseTimVX() { device = Device::TIMVX; - paddle_lite_option.enable_timvx = true; + paddle_lite_option.device = device; } void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, @@ -68,7 +68,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, bool adaptive_seqlen, bool enable_multi_stream) { device = Device::KUNLUNXIN; - paddle_lite_option.enable_kunlunxin = true; + paddle_lite_option.device = device; paddle_lite_option.device_id = kunlunxin_id; paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size; paddle_lite_option.kunlunxin_locked = locked; @@ -81,7 +81,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, void RuntimeOption::UseAscend() { device = Device::ASCEND; - paddle_lite_option.enable_ascend = true; + paddle_lite_option.device = device; } void RuntimeOption::UseSophgo() { @@ -96,7 +96,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) { void RuntimeOption::SetCpuThreadNum(int thread_num) { FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); cpu_thread_num = thread_num; - paddle_lite_option.threads = thread_num; + paddle_lite_option.cpu_threads = thread_num; ort_option.intra_op_num_threads = thread_num; } diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc index 581cac2e8..7aae742ea 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc @@ -141,7 +141,6 @@ int RKYOLOPostprocessor::ProcessFP16(float* input, int* anchor, int grid_h, } else { limit_score = box_conf_f32 * class_prob_f32; } - // printf("limit score: %f", limit_score); if (limit_score > conf_threshold_) { float box_x, box_y, box_w, box_h; if (anchor_per_branch_ == 1) { diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index b65479415..eea3fe521 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -55,26 +55,30 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { /// Get nms_threshold, default 0.45 float GetNMSThreshold() const { return nms_threshold_; } - // Set height and weight + /// Set height and weight void SetHeightAndWeight(int& height, int& width) { height_ = height; width_ = width; } - // Set pad_hw_values + /// Set pad_hw_values void SetPadHWValues(std::vector> pad_hw_values) { pad_hw_values_ = pad_hw_values; } - // Set scale - void SetScale(std::vector scale) { - scale_ = scale; + /// Set scale + void SetScale(std::vector scale) { scale_ = scale; } + + /// Set Anchor + void SetAnchor(std::vector anchors, int anchor_per_branch) { + anchors_ = anchors; + anchor_per_branch_ = anchor_per_branch; } - // Set Anchor - void SetAnchor(std::vector anchors, int anchor_per_branch) { - anchors_ = anchors; - anchor_per_branch_ = anchor_per_branch; + /// Set the number of class + void SetClassNum(int num) { + obj_class_num_ = num; + prob_box_size_ = obj_class_num_ + 5; } private: @@ -85,12 +89,9 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { int width_ = 0; int anchor_per_branch_ = 0; - int ProcessFP16(float *input, int *anchor, int grid_h, - int grid_w, int stride, - std::vector &boxes, - std::vector &boxScores, - std::vector &classId, - float threshold); + int ProcessFP16(float* input, int* anchor, int grid_h, int grid_w, int stride, + std::vector& boxes, std::vector& boxScores, + std::vector& classId, float threshold); // Model int QuickSortIndiceInverse(std::vector& input, int left, int right, std::vector& indices);