mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-13 20:34:02 +08:00
Merge branch 'develop' of github.com:yunyaoXYY/FastDeploy into huawei
This commit is contained in:
20
.github/workflows/build.yml
vendored
20
.github/workflows/build.yml
vendored
@@ -1,25 +1,7 @@
|
|||||||
name: Build
|
name: Build
|
||||||
on: [push, pull_request]
|
on: [pull_request]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
macOS-latest-cpp:
|
|
||||||
runs-on: macOS-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v1
|
|
||||||
|
|
||||||
- name: Get CMake
|
|
||||||
uses: lukka/get-cmake@latest
|
|
||||||
|
|
||||||
- name: Build FastDeploy
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -DENABLE_ORT_BACKEND=ON -DENABLE_PADDLE_BACKEND=OFF -DENABLE_OPENVINO_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk -DENABLE_VISION=ON -DENABLE_TEXT=ON
|
|
||||||
make -j12
|
|
||||||
make install
|
|
||||||
ls -l
|
|
||||||
macOS-latest-py:
|
macOS-latest-py:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
@@ -289,7 +289,7 @@ int main(int argc, char* argv[]) {
|
|||||||
| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
||||||
| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
||||||
| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
||||||
| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
||||||
| Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ |
|
| Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ |
|
||||||
| Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ |
|
| Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ |
|
||||||
| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |
|
| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |
|
||||||
|
@@ -275,7 +275,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava
|
|||||||
| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
||||||
| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
||||||
| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ |
|
||||||
| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ |
|
||||||
| Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ |
|
| Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ |
|
||||||
| Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ |
|
| Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ |
|
||||||
| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |
|
| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |
|
||||||
|
@@ -26,7 +26,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|||||||
else ()
|
else ()
|
||||||
message(STATUS "Build FastDeploy Ascend Python library on aarch64 platform.")
|
message(STATUS "Build FastDeploy Ascend Python library on aarch64 platform.")
|
||||||
if(NOT PADDLELITE_URL)
|
if(NOT PADDLELITE_URL)
|
||||||
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0118.tgz")
|
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0202.tgz")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
@@ -47,8 +47,7 @@ cmake .. -DCMAKE_C_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch
|
|||||||
-DENABLE_ORT_BACKEND=OFF \
|
-DENABLE_ORT_BACKEND=OFF \
|
||||||
-DENABLE_RKNPU2_BACKEND=ON \
|
-DENABLE_RKNPU2_BACKEND=ON \
|
||||||
-DENABLE_VISION=ON \
|
-DENABLE_VISION=ON \
|
||||||
-DRKNN2_TARGET_SOC=RK3588 \
|
-DRKNN2_TARGET_SOC=RK356X \
|
||||||
-DENABLE_FLYCV=ON \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0
|
-DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0
|
||||||
make -j8
|
make -j8
|
||||||
make install
|
make install
|
||||||
|
@@ -4,34 +4,9 @@ project(rknpu2_test)
|
|||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
|
||||||
# 指定下载解压后的fastdeploy库路径
|
# 指定下载解压后的fastdeploy库路径
|
||||||
set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3")
|
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
|
||||||
|
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
|
# 添加FastDeploy依赖头文件
|
||||||
include_directories(${FastDeploy_INCLUDE_DIRS})
|
include_directories(${FASTDEPLOY_INCS})
|
||||||
|
|
||||||
add_executable(infer_rkyolo infer_rkyolo.cc)
|
add_executable(infer_rkyolo infer_rkyolo.cc)
|
||||||
target_link_libraries(infer_rkyolo ${FastDeploy_LIBS})
|
target_link_libraries(infer_rkyolo ${FASTDEPLOY_LIBS})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
|
|
||||||
|
|
||||||
install(TARGETS infer_rkyolo DESTINATION ./)
|
|
||||||
|
|
||||||
install(DIRECTORY model DESTINATION ./)
|
|
||||||
install(DIRECTORY images DESTINATION ./)
|
|
||||||
|
|
||||||
file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
|
|
||||||
message("${FASTDEPLOY_LIBS}")
|
|
||||||
install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
|
|
||||||
|
|
||||||
file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*)
|
|
||||||
install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib)
|
|
||||||
|
|
||||||
install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./)
|
|
||||||
|
|
||||||
file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*)
|
|
||||||
install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
|
|
||||||
|
|
||||||
file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/${RKNN2_TARGET_SOC}/lib/*)
|
|
||||||
install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
|
|
||||||
|
@@ -10,58 +10,12 @@
|
|||||||
|
|
||||||
以上步骤请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)实现
|
以上步骤请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)实现
|
||||||
|
|
||||||
## 生成基本目录文件
|
|
||||||
|
|
||||||
该例程由以下几个部分组成
|
|
||||||
```text
|
|
||||||
.
|
|
||||||
├── CMakeLists.txt
|
|
||||||
├── build # 编译文件夹
|
|
||||||
├── image # 存放图片的文件夹
|
|
||||||
├── infer_rkyolo.cc
|
|
||||||
├── model # 存放模型文件的文件夹
|
|
||||||
└── thirdpartys # 存放sdk的文件夹
|
|
||||||
```
|
|
||||||
|
|
||||||
首先需要先生成目录结构
|
|
||||||
```bash
|
|
||||||
mkdir build
|
|
||||||
mkdir images
|
|
||||||
mkdir model
|
|
||||||
mkdir thirdpartys
|
|
||||||
```
|
|
||||||
|
|
||||||
## 编译
|
|
||||||
|
|
||||||
### 编译并拷贝SDK到thirdpartys文件夹
|
|
||||||
|
|
||||||
请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK,编译完成后,将在build目录下生成
|
|
||||||
fastdeploy-0.0.3目录,请移动它至thirdpartys目录下.
|
|
||||||
|
|
||||||
### 拷贝模型文件,以及配置文件至model文件夹
|
|
||||||
在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中,将生成ONNX文件以及对应的yaml配置文件,请将配置文件存放到model文件夹内。
|
|
||||||
转换为RKNN后的模型文件也需要拷贝至model。
|
|
||||||
|
|
||||||
### 准备测试图片至image文件夹
|
|
||||||
```bash
|
```bash
|
||||||
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
|
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
|
||||||
cp 000000014439.jpg ./images
|
|
||||||
```
|
|
||||||
|
|
||||||
### 编译example
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd build
|
cd build
|
||||||
cmake ..
|
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||||
make -j8
|
make -j8
|
||||||
make install
|
./infer_rkyolo /path/to/model 000000014439.jpg
|
||||||
```
|
|
||||||
|
|
||||||
## 运行例程
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd ./build/install
|
|
||||||
./infer_picodet model/ images/000000014439.jpg
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
159
fastdeploy/runtime/backends/lite/configure_hardware.cc
Normal file
159
fastdeploy/runtime/backends/lite/configure_hardware.cc
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime/backends/lite/lite_backend.h"
|
||||||
|
// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290
|
||||||
|
// When compiling the FastDeploy dynamic library, namely,
|
||||||
|
// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite
|
||||||
|
// static library, you need to include the fake registration
|
||||||
|
// codes of Paddle Lite. When you compile the FastDeploy static
|
||||||
|
// library and depends on the Paddle Lite static library,
|
||||||
|
// WITH_STATIC_LIB=ON, you do not need to include the fake
|
||||||
|
// registration codes for Paddle Lite, but wait until you
|
||||||
|
// use the FastDeploy static library.
|
||||||
|
#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB)))
|
||||||
|
#warning You are compiling the FastDeploy dynamic library with \
|
||||||
|
Paddle Lite static lib We will automatically add some registration \
|
||||||
|
codes for ops, kernels and passes for Paddle Lite.
|
||||||
|
#include "paddle_use_kernels.h" // NOLINT
|
||||||
|
#include "paddle_use_ops.h" // NOLINT
|
||||||
|
#include "paddle_use_passes.h" // NOLINT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
|
#define FD_LITE_HOST TARGET(kARM)
|
||||||
|
#elif defined(__x86_64__)
|
||||||
|
#define FD_LITE_HOST TARGET(kX86)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::vector<paddle::lite_api::Place> GetPlacesForCpu(
|
||||||
|
const LiteBackendOption& option) {
|
||||||
|
std::vector<paddle::lite_api::Place> valid_places;
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)});
|
||||||
|
if (option.enable_fp16) {
|
||||||
|
paddle::lite_api::MobileConfig check_fp16_config;
|
||||||
|
if (check_fp16_config.check_fp16_valid()) {
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)});
|
||||||
|
} else {
|
||||||
|
FDWARNING << "Current CPU doesn't support float16 precision, will "
|
||||||
|
"fallback to float32."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)});
|
||||||
|
return valid_places;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LiteBackend::ConfigureCpu(const LiteBackendOption& option) {
|
||||||
|
config_.set_valid_places(GetPlacesForCpu(option));
|
||||||
|
}
|
||||||
|
|
||||||
|
void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
|
||||||
|
std::vector<paddle::lite_api::Place> valid_places;
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
|
||||||
|
if (option.enable_fp16) {
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
|
||||||
|
}
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
|
||||||
|
|
||||||
|
config_.set_xpu_dev_per_thread(option.device_id);
|
||||||
|
config_.set_xpu_workspace_l3_size_per_thread(
|
||||||
|
option.kunlunxin_l3_workspace_size);
|
||||||
|
config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size,
|
||||||
|
option.kunlunxin_locked);
|
||||||
|
config_.set_xpu_conv_autotune(option.kunlunxin_autotune,
|
||||||
|
option.kunlunxin_autotune_file);
|
||||||
|
config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
|
||||||
|
option.kunlunxin_adaptive_seqlen);
|
||||||
|
if (option.kunlunxin_enable_multi_stream) {
|
||||||
|
config_.enable_xpu_multi_stream();
|
||||||
|
}
|
||||||
|
auto cpu_places = GetPlacesForCpu(option);
|
||||||
|
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
|
||||||
|
config_.set_valid_places(valid_places);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LiteBackend::ConfigureTimvx(const LiteBackendOption& option) {
|
||||||
|
config_.set_nnadapter_device_names({"verisilicon_timvx"});
|
||||||
|
std::vector<paddle::lite_api::Place> valid_places;
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
|
||||||
|
auto cpu_places = GetPlacesForCpu(option);
|
||||||
|
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
|
||||||
|
config_.set_valid_places(valid_places);
|
||||||
|
ConfigureNNAdapter(option);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LiteBackend::ConfigureAscend(const LiteBackendOption& option) {
|
||||||
|
config_.set_nnadapter_device_names({"huawei_ascend_npu"});
|
||||||
|
std::vector<paddle::lite_api::Place> valid_places;
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
|
||||||
|
valid_places.push_back(
|
||||||
|
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
|
||||||
|
auto cpu_places = GetPlacesForCpu(option);
|
||||||
|
valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
|
||||||
|
config_.set_valid_places(valid_places);
|
||||||
|
ConfigureNNAdapter(option);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LiteBackend::ConfigureNNAdapter(const LiteBackendOption& option) {
|
||||||
|
if (!option.nnadapter_subgraph_partition_config_path.empty()) {
|
||||||
|
std::vector<char> nnadapter_subgraph_partition_config_buffer;
|
||||||
|
if (ReadFile(option.nnadapter_subgraph_partition_config_path,
|
||||||
|
&nnadapter_subgraph_partition_config_buffer, false)) {
|
||||||
|
if (!nnadapter_subgraph_partition_config_buffer.empty()) {
|
||||||
|
std::string nnadapter_subgraph_partition_config_string(
|
||||||
|
nnadapter_subgraph_partition_config_buffer.data(),
|
||||||
|
nnadapter_subgraph_partition_config_buffer.size());
|
||||||
|
config_.set_nnadapter_subgraph_partition_config_buffer(
|
||||||
|
nnadapter_subgraph_partition_config_string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!option.nnadapter_context_properties.empty()) {
|
||||||
|
config_.set_nnadapter_context_properties(
|
||||||
|
option.nnadapter_context_properties);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!option.nnadapter_model_cache_dir.empty()) {
|
||||||
|
config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) {
|
||||||
|
config_.set_nnadapter_mixed_precision_quantization_config_path(
|
||||||
|
option.nnadapter_mixed_precision_quantization_config_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!option.nnadapter_subgraph_partition_config_path.empty()) {
|
||||||
|
config_.set_nnadapter_subgraph_partition_config_path(
|
||||||
|
option.nnadapter_subgraph_partition_config_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info);
|
||||||
|
}
|
||||||
|
} // namespace fastdeploy
|
@@ -26,8 +26,8 @@
|
|||||||
#warning You are compiling the FastDeploy dynamic library with \
|
#warning You are compiling the FastDeploy dynamic library with \
|
||||||
Paddle Lite static lib We will automatically add some registration \
|
Paddle Lite static lib We will automatically add some registration \
|
||||||
codes for ops, kernels and passes for Paddle Lite.
|
codes for ops, kernels and passes for Paddle Lite.
|
||||||
#include "paddle_use_ops.h" // NOLINT
|
|
||||||
#include "paddle_use_kernels.h" // NOLINT
|
#include "paddle_use_kernels.h" // NOLINT
|
||||||
|
#include "paddle_use_ops.h" // NOLINT
|
||||||
#include "paddle_use_passes.h" // NOLINT
|
#include "paddle_use_passes.h" // NOLINT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -35,143 +35,20 @@ codes for ops, kernels and passes for Paddle Lite.
|
|||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
// Convert data type from paddle lite to fastdeploy
|
|
||||||
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) {
|
|
||||||
if (dtype == paddle::lite_api::PrecisionType::kFloat) {
|
|
||||||
return FDDataType::FP32;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kInt8) {
|
|
||||||
return FDDataType::INT8;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kInt32) {
|
|
||||||
return FDDataType::INT32;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kInt64) {
|
|
||||||
return FDDataType::INT64;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kInt16) {
|
|
||||||
return FDDataType::INT16;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kUInt8) {
|
|
||||||
return FDDataType::UINT8;
|
|
||||||
} else if (dtype == paddle::lite_api::PrecisionType::kFP64) {
|
|
||||||
return FDDataType::FP64;
|
|
||||||
}
|
|
||||||
FDASSERT(false, "Unexpected data type of %d.", dtype);
|
|
||||||
return FDDataType::FP32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
||||||
option_ = option;
|
option_ = option;
|
||||||
std::vector<paddle::lite_api::Place> valid_places;
|
|
||||||
if (option_.enable_int8) {
|
|
||||||
if (option_.enable_kunlunxin) {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
|
|
||||||
} else {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
|
|
||||||
}
|
|
||||||
FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
|
|
||||||
<< "inference with int8 precision!" << std::endl;
|
|
||||||
}
|
|
||||||
if (option_.enable_fp16) {
|
|
||||||
if (option_.enable_kunlunxin) {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
|
|
||||||
} else {
|
|
||||||
paddle::lite_api::MobileConfig check_fp16_config;
|
|
||||||
// Determine whether the device supports the FP16
|
|
||||||
// instruction set (or whether it is an arm device
|
|
||||||
// of the armv8.2 architecture)
|
|
||||||
supported_fp16_ = check_fp16_config.check_fp16_valid();
|
|
||||||
if (supported_fp16_) {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
|
|
||||||
FDINFO << "The device supports FP16, Lite::Backend will inference with "
|
|
||||||
"FP16 precision."
|
|
||||||
<< std::endl;
|
|
||||||
} else {
|
|
||||||
FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
|
|
||||||
std::vector<char> nnadapter_subgraph_partition_config_buffer;
|
|
||||||
if (ReadFile(option_.nnadapter_subgraph_partition_config_path,
|
|
||||||
&nnadapter_subgraph_partition_config_buffer, false)) {
|
|
||||||
if (!nnadapter_subgraph_partition_config_buffer.empty()) {
|
|
||||||
std::string nnadapter_subgraph_partition_config_string(
|
|
||||||
nnadapter_subgraph_partition_config_buffer.data(),
|
|
||||||
nnadapter_subgraph_partition_config_buffer.size());
|
|
||||||
config_.set_nnadapter_subgraph_partition_config_buffer(
|
|
||||||
nnadapter_subgraph_partition_config_string);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (option_.enable_timvx) {
|
|
||||||
config_.set_nnadapter_device_names({"verisilicon_timvx"});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (option_.enable_ascend) {
|
if (option_.device == Device::CPU) {
|
||||||
if (option_.nnadapter_device_names.empty()) {
|
ConfigureCpu(option_);
|
||||||
config_.set_nnadapter_device_names({"huawei_ascend_npu"});
|
} else if (option_.device == Device::TIMVX) {
|
||||||
} else {
|
ConfigureTimvx(option_);
|
||||||
config_.set_nnadapter_device_names(option_.nnadapter_device_names);
|
} else if (option_.device == Device::KUNLUNXIN) {
|
||||||
|
ConfigureKunlunXin(option_);
|
||||||
|
} else if (option_.device == Device::ASCEND) {
|
||||||
|
ConfigureAscend(option_);
|
||||||
}
|
}
|
||||||
|
if (option_.cpu_threads > 0) {
|
||||||
if (!option_.nnadapter_context_properties.empty()) {
|
config_.set_threads(option_.cpu_threads);
|
||||||
config_.set_nnadapter_context_properties(
|
|
||||||
option_.nnadapter_context_properties);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!option_.nnadapter_model_cache_dir.empty()) {
|
|
||||||
config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) {
|
|
||||||
config_.set_nnadapter_mixed_precision_quantization_config_path(
|
|
||||||
option_.nnadapter_mixed_precision_quantization_config_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
|
|
||||||
config_.set_nnadapter_subgraph_partition_config_path(
|
|
||||||
option_.nnadapter_subgraph_partition_config_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (option_.enable_kunlunxin) {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
|
|
||||||
config_.set_xpu_dev_per_thread(option_.device_id);
|
|
||||||
config_.set_xpu_workspace_l3_size_per_thread(
|
|
||||||
option_.kunlunxin_l3_workspace_size);
|
|
||||||
config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size,
|
|
||||||
option_.kunlunxin_locked);
|
|
||||||
config_.set_xpu_conv_autotune(option_.kunlunxin_autotune,
|
|
||||||
option_.kunlunxin_autotune_file);
|
|
||||||
config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision,
|
|
||||||
option_.kunlunxin_adaptive_seqlen);
|
|
||||||
if (option_.kunlunxin_enable_multi_stream) {
|
|
||||||
config_.enable_xpu_multi_stream();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
valid_places.push_back(
|
|
||||||
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
|
|
||||||
}
|
|
||||||
config_.set_valid_places(valid_places);
|
|
||||||
if (option_.threads > 0) {
|
|
||||||
config_.set_threads(option_.threads);
|
|
||||||
}
|
}
|
||||||
if (option_.power_mode > 0) {
|
if (option_.power_mode > 0) {
|
||||||
config_.set_power_mode(
|
config_.set_power_mode(
|
||||||
@@ -179,29 +56,6 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LiteBackend::ReadFile(const std::string& filename,
|
|
||||||
std::vector<char>* contents, const bool binary) {
|
|
||||||
FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
|
|
||||||
if (!fp) {
|
|
||||||
FDERROR << "Cannot open file " << filename << "." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fseek(fp, 0, SEEK_END);
|
|
||||||
size_t size = ftell(fp);
|
|
||||||
fseek(fp, 0, SEEK_SET);
|
|
||||||
contents->clear();
|
|
||||||
contents->resize(size);
|
|
||||||
size_t offset = 0;
|
|
||||||
char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
|
|
||||||
while (offset < size) {
|
|
||||||
size_t already_read = fread(ptr, 1, size - offset, fp);
|
|
||||||
offset += already_read;
|
|
||||||
ptr += already_read;
|
|
||||||
}
|
|
||||||
fclose(fp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool LiteBackend::InitFromPaddle(const std::string& model_file,
|
bool LiteBackend::InitFromPaddle(const std::string& model_file,
|
||||||
const std::string& params_file,
|
const std::string& params_file,
|
||||||
const LiteBackendOption& option) {
|
const LiteBackendOption& option) {
|
||||||
@@ -246,7 +100,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
auto shape = tensor->shape();
|
auto shape = tensor->shape();
|
||||||
info.shape.assign(shape.begin(), shape.end());
|
info.shape.assign(shape.begin(), shape.end());
|
||||||
info.name = output_names[i];
|
info.name = output_names[i];
|
||||||
if (!option_.enable_kunlunxin) {
|
if (!option_.device == Device::KUNLUNXIN) {
|
||||||
info.dtype = LiteDataTypeToFD(tensor->precision());
|
info.dtype = LiteDataTypeToFD(tensor->precision());
|
||||||
}
|
}
|
||||||
outputs_desc_.emplace_back(info);
|
outputs_desc_.emplace_back(info);
|
||||||
@@ -337,4 +191,49 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadFile(const std::string& filename, std::vector<char>* contents,
|
||||||
|
bool binary) {
|
||||||
|
FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
|
||||||
|
if (!fp) {
|
||||||
|
FDERROR << "Cannot open file " << filename << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
fseek(fp, 0, SEEK_END);
|
||||||
|
size_t size = ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
contents->clear();
|
||||||
|
contents->resize(size);
|
||||||
|
size_t offset = 0;
|
||||||
|
char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
|
||||||
|
while (offset < size) {
|
||||||
|
size_t already_read = fread(ptr, 1, size - offset, fp);
|
||||||
|
offset += already_read;
|
||||||
|
ptr += already_read;
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert data type from paddle lite to fastdeploy
|
||||||
|
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) {
|
||||||
|
if (dtype == paddle::lite_api::PrecisionType::kFloat) {
|
||||||
|
return FDDataType::FP32;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kInt8) {
|
||||||
|
return FDDataType::INT8;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kInt32) {
|
||||||
|
return FDDataType::INT32;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kInt64) {
|
||||||
|
return FDDataType::INT64;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kInt16) {
|
||||||
|
return FDDataType::INT16;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kUInt8) {
|
||||||
|
return FDDataType::UINT8;
|
||||||
|
} else if (dtype == paddle::lite_api::PrecisionType::kFP64) {
|
||||||
|
return FDDataType::FP64;
|
||||||
|
}
|
||||||
|
FDASSERT(false, "Unexpected data type of %s.",
|
||||||
|
paddle::lite_api::PrecisionToStr(dtype).c_str());
|
||||||
|
return FDDataType::FP32;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -19,13 +19,12 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fastdeploy/runtime/backends/backend.h"
|
|
||||||
#include "fastdeploy/runtime/backends/lite/option.h"
|
|
||||||
#include "paddle_api.h" // NOLINT
|
#include "paddle_api.h" // NOLINT
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime/backends/backend.h"
|
||||||
|
#include "fastdeploy/runtime/backends/lite/option.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
// Convert data type from paddle lite to fastdeploy
|
|
||||||
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
|
|
||||||
|
|
||||||
class LiteBackend : public BaseBackend {
|
class LiteBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
@@ -51,15 +50,26 @@ class LiteBackend : public BaseBackend {
|
|||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void ConfigureCpu(const LiteBackendOption& option);
|
||||||
|
void ConfigureTimvx(const LiteBackendOption& option);
|
||||||
|
void ConfigureAscend(const LiteBackendOption& option);
|
||||||
|
void ConfigureKunlunXin(const LiteBackendOption& option);
|
||||||
|
void ConfigureNNAdapter(const LiteBackendOption& option);
|
||||||
|
|
||||||
paddle::lite_api::CxxConfig config_;
|
paddle::lite_api::CxxConfig config_;
|
||||||
std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
|
std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor_;
|
||||||
std::vector<TensorInfo> inputs_desc_;
|
std::vector<TensorInfo> inputs_desc_;
|
||||||
std::vector<TensorInfo> outputs_desc_;
|
std::vector<TensorInfo> outputs_desc_;
|
||||||
std::map<std::string, int> inputs_order_;
|
std::map<std::string, int> inputs_order_;
|
||||||
LiteBackendOption option_;
|
LiteBackendOption option_;
|
||||||
bool supported_fp16_ = false;
|
|
||||||
bool ReadFile(const std::string& filename,
|
|
||||||
std::vector<char>* contents,
|
|
||||||
const bool binary = true);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Convert data type from paddle lite to fastdeploy
|
||||||
|
FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype);
|
||||||
|
|
||||||
|
// Helper function to read file
|
||||||
|
bool ReadFile(const std::string& filename,
|
||||||
|
std::vector<char>* contents,
|
||||||
|
bool binary = true);
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -51,24 +51,19 @@ enum LitePowerMode {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct LiteBackendOption {
|
struct LiteBackendOption {
|
||||||
// cpu num threads
|
/// Paddle Lite power mode for mobile device.
|
||||||
int threads = 1;
|
LitePowerMode power_mode = LITE_POWER_NO_BIND;
|
||||||
// lite power mode
|
/// Number of threads while use CPU
|
||||||
// 0: LITE_POWER_HIGH
|
int cpu_threads = 1;
|
||||||
// 1: LITE_POWER_LOW
|
/// Enable use half precision
|
||||||
// 2: LITE_POWER_FULL
|
|
||||||
// 3: LITE_POWER_NO_BIND
|
|
||||||
// 4: LITE_POWER_RAND_HIGH
|
|
||||||
// 5: LITE_POWER_RAND_LOW
|
|
||||||
int power_mode = 3;
|
|
||||||
// enable fp16
|
|
||||||
bool enable_fp16 = false;
|
bool enable_fp16 = false;
|
||||||
// enable int8
|
/// Enable use int8 precision for quantized model
|
||||||
bool enable_int8 = false;
|
bool enable_int8 = false;
|
||||||
|
|
||||||
|
Device device = Device::CPU;
|
||||||
|
|
||||||
// optimized model dir for CxxConfig
|
// optimized model dir for CxxConfig
|
||||||
std::string optimized_model_dir = "";
|
std::string optimized_model_dir = "";
|
||||||
// TODO(qiuyanjun): support more options for lite backend.
|
|
||||||
// Such as fp16, different device target (kARM/kXPU/kNPU/...)
|
|
||||||
std::string nnadapter_subgraph_partition_config_path = "";
|
std::string nnadapter_subgraph_partition_config_path = "";
|
||||||
std::string nnadapter_subgraph_partition_config_buffer = "";
|
std::string nnadapter_subgraph_partition_config_buffer = "";
|
||||||
std::string nnadapter_context_properties = "";
|
std::string nnadapter_context_properties = "";
|
||||||
@@ -77,9 +72,6 @@ struct LiteBackendOption {
|
|||||||
std::map<std::string, std::vector<std::vector<int64_t>>>
|
std::map<std::string, std::vector<std::vector<int64_t>>>
|
||||||
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
||||||
std::vector<std::string> nnadapter_device_names = {};
|
std::vector<std::string> nnadapter_device_names = {};
|
||||||
bool enable_timvx = false;
|
|
||||||
bool enable_ascend = false;
|
|
||||||
bool enable_kunlunxin = false;
|
|
||||||
int device_id = 0;
|
int device_id = 0;
|
||||||
int kunlunxin_l3_workspace_size = 0xfffc00;
|
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||||
bool kunlunxin_locked = false;
|
bool kunlunxin_locked = false;
|
||||||
|
@@ -92,23 +92,19 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
||||||
const std::string& params_buffer,
|
const std::string& params_buffer,
|
||||||
const PaddleBackendOption& option) {
|
const PaddleBackendOption& option) {
|
||||||
// bool PaddleBackend::InitFromPaddle(const std::string& contents) {
|
|
||||||
if (initialized_) {
|
if (initialized_) {
|
||||||
FDERROR << "PaddleBackend is already initlized, cannot initialize again."
|
FDERROR << "PaddleBackend is already initlized, cannot initialize again."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
||||||
|
params_buffer.c_str(), params_buffer.size());
|
||||||
|
config_.EnableMemoryOptim();
|
||||||
|
BuildOption(option);
|
||||||
|
|
||||||
// The input/output information get from predictor is not right, use
|
// The input/output information get from predictor is not right, use
|
||||||
// PaddleReader instead now
|
// PaddleReader instead now
|
||||||
std::string contents;
|
auto reader = paddle2onnx::PaddleReader(model_buffer.c_str(), model_buffer.size());
|
||||||
|
|
||||||
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
|
||||||
params_buffer.c_str(), params_buffer.size());
|
|
||||||
contents = model_buffer;
|
|
||||||
config_.EnableMemoryOptim();
|
|
||||||
BuildOption(option);
|
|
||||||
auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
|
||||||
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
|
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
|
||||||
// int8 mode
|
// int8 mode
|
||||||
if (reader.is_quantize_model) {
|
if (reader.is_quantize_model) {
|
||||||
|
@@ -58,7 +58,7 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
|
|||||||
|
|
||||||
void RuntimeOption::UseTimVX() {
|
void RuntimeOption::UseTimVX() {
|
||||||
device = Device::TIMVX;
|
device = Device::TIMVX;
|
||||||
paddle_lite_option.enable_timvx = true;
|
paddle_lite_option.device = device;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
||||||
@@ -68,7 +68,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
|||||||
bool adaptive_seqlen,
|
bool adaptive_seqlen,
|
||||||
bool enable_multi_stream) {
|
bool enable_multi_stream) {
|
||||||
device = Device::KUNLUNXIN;
|
device = Device::KUNLUNXIN;
|
||||||
paddle_lite_option.enable_kunlunxin = true;
|
paddle_lite_option.device = device;
|
||||||
paddle_lite_option.device_id = kunlunxin_id;
|
paddle_lite_option.device_id = kunlunxin_id;
|
||||||
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
|
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
|
||||||
paddle_lite_option.kunlunxin_locked = locked;
|
paddle_lite_option.kunlunxin_locked = locked;
|
||||||
@@ -81,7 +81,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
|||||||
|
|
||||||
void RuntimeOption::UseAscend() {
|
void RuntimeOption::UseAscend() {
|
||||||
device = Device::ASCEND;
|
device = Device::ASCEND;
|
||||||
paddle_lite_option.enable_ascend = true;
|
paddle_lite_option.device = device;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseSophgo() {
|
void RuntimeOption::UseSophgo() {
|
||||||
@@ -96,7 +96,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
|
|||||||
void RuntimeOption::SetCpuThreadNum(int thread_num) {
|
void RuntimeOption::SetCpuThreadNum(int thread_num) {
|
||||||
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
|
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
|
||||||
cpu_thread_num = thread_num;
|
cpu_thread_num = thread_num;
|
||||||
paddle_lite_option.threads = thread_num;
|
paddle_lite_option.cpu_threads = thread_num;
|
||||||
ort_option.intra_op_num_threads = thread_num;
|
ort_option.intra_op_num_threads = thread_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -141,7 +141,6 @@ int RKYOLOPostprocessor::ProcessFP16(float* input, int* anchor, int grid_h,
|
|||||||
} else {
|
} else {
|
||||||
limit_score = box_conf_f32 * class_prob_f32;
|
limit_score = box_conf_f32 * class_prob_f32;
|
||||||
}
|
}
|
||||||
// printf("limit score: %f", limit_score);
|
|
||||||
if (limit_score > conf_threshold_) {
|
if (limit_score > conf_threshold_) {
|
||||||
float box_x, box_y, box_w, box_h;
|
float box_x, box_y, box_w, box_h;
|
||||||
if (anchor_per_branch_ == 1) {
|
if (anchor_per_branch_ == 1) {
|
||||||
|
@@ -55,28 +55,32 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
|
|||||||
/// Get nms_threshold, default 0.45
|
/// Get nms_threshold, default 0.45
|
||||||
float GetNMSThreshold() const { return nms_threshold_; }
|
float GetNMSThreshold() const { return nms_threshold_; }
|
||||||
|
|
||||||
// Set height and weight
|
/// Set height and weight
|
||||||
void SetHeightAndWeight(int& height, int& width) {
|
void SetHeightAndWeight(int& height, int& width) {
|
||||||
height_ = height;
|
height_ = height;
|
||||||
width_ = width;
|
width_ = width;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set pad_hw_values
|
/// Set pad_hw_values
|
||||||
void SetPadHWValues(std::vector<std::vector<int>> pad_hw_values) {
|
void SetPadHWValues(std::vector<std::vector<int>> pad_hw_values) {
|
||||||
pad_hw_values_ = pad_hw_values;
|
pad_hw_values_ = pad_hw_values;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set scale
|
/// Set scale
|
||||||
void SetScale(std::vector<float> scale) {
|
void SetScale(std::vector<float> scale) { scale_ = scale; }
|
||||||
scale_ = scale;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Anchor
|
/// Set Anchor
|
||||||
void SetAnchor(std::vector<int> anchors, int anchor_per_branch) {
|
void SetAnchor(std::vector<int> anchors, int anchor_per_branch) {
|
||||||
anchors_ = anchors;
|
anchors_ = anchors;
|
||||||
anchor_per_branch_ = anchor_per_branch;
|
anchor_per_branch_ = anchor_per_branch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the number of class
|
||||||
|
void SetClassNum(int num) {
|
||||||
|
obj_class_num_ = num;
|
||||||
|
prob_box_size_ = obj_class_num_ + 5;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62,
|
std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62,
|
||||||
45, 59, 119, 116, 90, 156, 198, 373, 326};
|
45, 59, 119, 116, 90, 156, 198, 373, 326};
|
||||||
@@ -85,12 +89,9 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
|
|||||||
int width_ = 0;
|
int width_ = 0;
|
||||||
int anchor_per_branch_ = 0;
|
int anchor_per_branch_ = 0;
|
||||||
|
|
||||||
int ProcessFP16(float *input, int *anchor, int grid_h,
|
int ProcessFP16(float* input, int* anchor, int grid_h, int grid_w, int stride,
|
||||||
int grid_w, int stride,
|
std::vector<float>& boxes, std::vector<float>& boxScores,
|
||||||
std::vector<float> &boxes,
|
std::vector<int>& classId, float threshold);
|
||||||
std::vector<float> &boxScores,
|
|
||||||
std::vector<int> &classId,
|
|
||||||
float threshold);
|
|
||||||
// Model
|
// Model
|
||||||
int QuickSortIndiceInverse(std::vector<float>& input, int left, int right,
|
int QuickSortIndiceInverse(std::vector<float>& input, int left, int right,
|
||||||
std::vector<int>& indices);
|
std::vector<int>& indices);
|
||||||
|
Reference in New Issue
Block a user