From 8ec1353cd8397c6a62e2b1c230ecf61aff027d0a Mon Sep 17 00:00:00 2001 From: Jason Date: Tue, 7 Mar 2023 18:46:48 +0800 Subject: [PATCH 1/2] [Doc] Add notes for tensors (#1533) Add notes for tensors --- fastdeploy/benchmark/option.h | 4 +- fastdeploy/core/fd_tensor.h | 114 ++++++++++++++++++++-------- fastdeploy/runtime/runtime_option.h | 28 +++---- 3 files changed, 97 insertions(+), 49 deletions(-) diff --git a/fastdeploy/benchmark/option.h b/fastdeploy/benchmark/option.h index 5af9f1585..653bd4029 100755 --- a/fastdeploy/benchmark/option.h +++ b/fastdeploy/benchmark/option.h @@ -20,8 +20,8 @@ namespace fastdeploy { */ namespace benchmark { -/*! @brief Option object used to control the behavior of the benchmark profiling. - */ +// @brief Option object used to control the behavior of the benchmark profiling. +// struct BenchmarkOption { int warmup = 50; ///< Warmup for backend inference. int repeats = 100; ///< Repeats for backend inference. diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h index 95a603dd8..dc86d8d83 100644 --- a/fastdeploy/core/fd_tensor.h +++ b/fastdeploy/core/fd_tensor.h @@ -25,11 +25,89 @@ namespace fastdeploy { +/*! @brief FDTensor object used to represend data matrix + * + */ struct FASTDEPLOY_DECL FDTensor { - // std::vector data; + /** \brief Set data buffer for a FDTensor, e.g + * ``` + * std::vector buffer(1 * 3 * 224 * 224, 0); + * FDTensor tensor; + * tensor.SetData({1, 3, 224, 224}, FDDataType::FLOAT, buffer.data()); + * ``` + * \param[in] tensor_shape The shape of tensor + * \param[in] data_type The data type of tensor + * \param[in] data_buffer The pointer of data buffer memory + * \param[in] copy Whether to copy memory from data_buffer to tensor, if false, this tensor will share memory with data_buffer, and the data is managed by userself + * \param[in] data_device The device of data_buffer, e.g if data_buffer is a pointer to GPU data, the device should be Device::GPU + * \param[in] data_device_id The device id of data_buffer + */ + void SetData(const std::vector& tensor_shape, const FDDataType& data_type, void* data_buffer, bool copy = false, const Device& data_device = Device::CPU, int data_device_id = -1) { + SetExternalData(tensor_shape, data_type, data_buffer, data_device, data_device_id); + if (copy) { + StopSharing(); + } + } + + /// Get data pointer of tensor + void* GetData() { + return MutableData(); + } + /// Get data pointer of tensor + const void* GetData() const { + return Data(); + } + + /// Expand the shape of tensor, it will not change the data memory, just modify its attribute `shape` + void ExpandDim(int64_t axis = 0); + + /// Squeeze the shape of tensor, it will not change the data memory, just modify its attribute `shape` + void Squeeze(int64_t axis = 0); + + /// Reshape the tensor, it will not change the data memory, just modify its attribute `shape` + bool Reshape(const std::vector& new_shape); + + /// Total size of tensor memory buffer in bytes + int Nbytes() const; + + /// Total number of elements in tensor + int Numel() const; + + /// Get shape of tensor + std::vector Shape() const { return shape; } + + /// Get dtype of tensor + FDDataType Dtype() const { return dtype; } + + /** \brief Allocate cpu data buffer for a FDTensor, e.g + * ``` + * FDTensor tensor; + * tensor.Allocate(FDDataType::FLOAT, {1, 3, 224, 224}; + * ``` + * \param[in] data_type The data type of tensor + * \param[in] tensor_shape The shape of tensor + */ + void Allocate(const FDDataType& data_type, const std::vector& data_shape) { + Allocate(data_shape, data_type, name); + } + + /// Debug function, print shape, dtype, mean, max, min of tensor + void PrintInfo(const std::string& prefix = "Debug TensorInfo: ") const; + + /// Name of tensor, while feed to runtime, this need be defined + std::string name = ""; + + /// Whether the tensor is owned the data buffer or share the data buffer from outside + bool IsShared() { return external_data_ptr != nullptr; } + /// If the tensor is share the data buffer from outside, `StopSharing` will copy to its own structure; Otherwise, do nothing + void StopSharing(); + + + // ****************************************************** + // The following member and function only used by inside FastDeploy, maybe removed in next version + void* buffer_ = nullptr; std::vector shape = {0}; - std::string name = ""; FDDataType dtype = FDDataType::INT8; // This use to skip memory copy step @@ -64,10 +142,6 @@ struct FASTDEPLOY_DECL FDTensor { void* Data(); - bool IsShared() { return external_data_ptr != nullptr; } - - void StopSharing(); - const void* Data() const; // Use this data to get the tensor data to process @@ -78,6 +152,7 @@ struct FASTDEPLOY_DECL FDTensor { // will copy to cpu store in `temporary_cpu_buffer` const void* CpuData() const; + // void SetDataBuffer(const std::vector& new_shape, const FDDataType& data_type, void* data_buffer, bool copy = false, const Device& new_device = Device::CPU, int new_device_id = -1); // Set user memory buffer for Tensor, the memory is managed by // the user it self, but the Tensor will share the memory with user // So take care with the user buffer @@ -85,15 +160,6 @@ struct FASTDEPLOY_DECL FDTensor { const FDDataType& data_type, void* data_buffer, const Device& new_device = Device::CPU, int new_device_id = -1); - - // Expand the shape of a Tensor. Insert a new axis that will appear - // at the `axis` position in the expanded Tensor shape. - void ExpandDim(int64_t axis = 0); - - // Squeeze the shape of a Tensor. Erase the axis that will appear - // at the `axis` position in the squeezed Tensor shape. - void Squeeze(int64_t axis = 0); - // Initialize Tensor // Include setting attribute for tensor // and allocate cpu memory buffer @@ -102,18 +168,6 @@ struct FASTDEPLOY_DECL FDTensor { const std::string& tensor_name = "", const Device& new_device = Device::CPU); - // Total size of tensor memory buffer in bytes - int Nbytes() const; - - // Total number of elements in this tensor - int Numel() const; - - // Get shape of FDTensor - std::vector Shape() const { return shape; } - - // Get dtype of FDTensor - FDDataType Dtype() const { return dtype; } - void Resize(size_t nbytes); void Resize(const std::vector& new_shape); @@ -122,12 +176,6 @@ struct FASTDEPLOY_DECL FDTensor { const FDDataType& data_type, const std::string& tensor_name = "", const Device& new_device = Device::CPU); - bool Reshape(const std::vector& new_shape); - // Debug function - // Use this function to print shape, dtype, mean, max, min - // prefix will also be printed as tag - void PrintInfo(const std::string& prefix = "TensorInfo: ") const; - bool ReallocFn(size_t nbytes); void FreeFn(); diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h index a7d7492fb..a1c2d152d 100644 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -158,12 +158,12 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Option to configure RKNPU2 backend RKNPU2BackendOption rknpu2_option; - /** \brief Set the profile mode as 'true'. - * - * \param[in] inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. - * \param[in] repeat Repeat times for runtime inference. - * \param[in] warmup Warmup times for runtime inference. - */ + // \brief Set the profile mode as 'true'. + // + // \param[in] inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime. + // \param[in] repeat Repeat times for runtime inference. + // \param[in] warmup Warmup times for runtime inference. + // void EnableProfiling(bool inclue_h2d_d2h = false, int repeat = 100, int warmup = 50) { benchmark_option.enable_profile = true; @@ -172,24 +172,24 @@ struct FASTDEPLOY_DECL RuntimeOption { benchmark_option.include_h2d_d2h = inclue_h2d_d2h; } - /** \brief Set the profile mode as 'false'. - */ + // \brief Set the profile mode as 'false'. + // void DisableProfiling() { benchmark_option.enable_profile = false; } - /** \brief Enable to check if current backend set by user can be found at valid_xxx_backend. - */ + // \brief Enable to check if current backend set by user can be found at valid_xxx_backend. + // void EnableValidBackendCheck() { enable_valid_backend_check = true; } - /** \brief Disable to check if current backend set by user can be found at valid_xxx_backend. - */ + // \brief Disable to check if current backend set by user can be found at valid_xxx_backend. + // void DisableValidBackendCheck() { enable_valid_backend_check = false; } - /// Benchmark option + // Benchmark option benchmark::BenchmarkOption benchmark_option; // enable the check for valid backend, default true. bool enable_valid_backend_check = true; @@ -200,7 +200,7 @@ struct FASTDEPLOY_DECL RuntimeOption { std::string model_file = ""; std::string params_file = ""; bool model_from_memory_ = false; - /// format of input model + // format of input model ModelFormat model_format = ModelFormat::PADDLE; std::string encryption_key_ = ""; From c2d377a1d6f994232e250a2afae945be944b1cfa Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Tue, 7 Mar 2023 18:54:27 +0800 Subject: [PATCH 2/2] [Doc] update rknn and ppocr example docs (#1538) * update docs * update --- docs/cn/build_and_install/rknpu2.md | 146 +++++++++++++++++- .../vision/ocr/PP-OCRv3/rknpu2/cpp/README.md | 6 +- .../ocr/PP-OCRv3/rknpu2/python/README.md | 8 +- .../ocr/PP-OCRv3/rknpu2/python/README_CN.md | 8 +- 4 files changed, 155 insertions(+), 13 deletions(-) diff --git a/docs/cn/build_and_install/rknpu2.md b/docs/cn/build_and_install/rknpu2.md index f432c94ce..9e91cd7c9 100644 --- a/docs/cn/build_and_install/rknpu2.md +++ b/docs/cn/build_and_install/rknpu2.md @@ -1,11 +1,153 @@ [English](../../en/build_and_install/rknpu2.md) | 简体中文 -# FastDeploy RKNPU2资源导航 -## 写在前面 +# FastDeploy RKNPU2 导航文档 + RKNPU2指的是Rockchip推出的RK356X以及RK3588系列芯片的NPU。 目前FastDeploy已经初步支持使用RKNPU2来部署模型。 如果您在使用的过程中出现问题,请附带上您的运行环境,在Issues中反馈。 +## FastDeploy RKNPU2 环境安装简介 + +如果您想在FastDeploy中使用RKNPU2推理引擎,你需要配置以下几个环境。 + +| 工具名 | 是否必须 | 安装设备 | 用途 | +|--------------|------|-------|---------------------------------| +| Paddle2ONNX | 必装 | PC | 用于转换PaddleInference模型到ONNX模型 | +| RKNNToolkit2 | 必装 | PC | 用于转换ONNX模型到rknn模型 | +| RKNPU2 | 选装 | Board | RKNPU2的基础驱动,FastDeploy已经集成,可以不装 | + +## 安装模型转换环境 + +模型转换环境需要在Ubuntu下完成,我们建议您使用conda作为python控制器,并使用python3.6作为您的模型转换环境。 +例如您可以输入以下命令行完成对python3.6环境的创建 + +```bash +conda create -n rknn2 python=3.6 +conda activate rknn2 +``` + +### 安装必备的依赖软件包 + +在安装RKNNtoolkit2之前我们需要安装一下必备的软件包 + +```bash +sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++ +``` + + +### 安装RKNNtoolkit2 + +目前,FastDeploy使用的转化工具版本号为1.4.2b3。如果你有使用最新版本的转换工具的需求,你可以在Rockchip提供的[百度网盘(提取码为rknn)](https://eyun.baidu.com/s/3eTDMk6Y) +中找到最新版本的模型转换工具。 + +```bash +# rknn_toolkit2对numpy存在特定依赖,因此需要先安装numpy==1.16.6 +pip install numpy==1.16.6 + +# 安装rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl +wget https://bj.bcebos.com/fastdeploy/third_libs/rknn_toolkit2-1.4.2b3+0bdd72ff-cp36-cp36m-linux_x86_64.whl +pip install rknn_toolkit2-1.4.2b3+0bdd72ff-cp36-cp36m-linux_x86_64.whl +``` + +## 安装FastDeploy C++ SDK + +针对RK356X和RK3588的性能差异,我们提供了两种编译FastDeploy的方式。 + + +### 板端编译FastDeploy C++ SDK + +针对RK3588,其CPU性能较强,板端编译的速度还是可以接受的,我们推荐在板端上进行编译。以下教程在RK356X(debian10),RK3588(debian 11) 环境下完成。 + +```bash +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy + +# 如果您使用的是develop分支输入以下命令 +git checkout develop + +mkdir build && cd build +cmake .. -DENABLE_ORT_BACKEND=ON \ + -DENABLE_RKNPU2_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DRKNN2_TARGET_SOC=RK3588 \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 +make -j8 +make install +``` + +### 交叉编译FastDeploy C++ SDK + +针对RK356X,其CPU性能较弱,我们推荐使用交叉编译进行编译。以下教程在Ubuntu 22.04环境下完成。 + +```bash +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy + +# 如果您使用的是develop分支输入以下命令 +git checkout develop + +mkdir build && cd build +cmake .. -DCMAKE_C_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc \ + -DCMAKE_CXX_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++ \ + -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \ + -DTARGET_ABI=arm64 \ + -DENABLE_ORT_BACKEND=OFF \ + -DENABLE_RKNPU2_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DRKNN2_TARGET_SOC=RK356X \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 +make -j8 +make install +``` + +如果你找不到编译工具,你可以复制[交叉编译工具](https://bj.bcebos.com/paddle2onnx/libs/gcc-linaro-6.3.1-2017.zip)进行下载。 + +### 配置环境变量 + +为了方便大家配置环境变量,FastDeploy提供了一键配置环境变量的脚本,在运行程序前,你需要执行以下命令 + +```bash +# 临时配置 +source PathToFastDeploySDK/fastdeploy_init.sh + +# 永久配置 +source PathToFastDeploySDK/fastdeploy_init.sh +sudo cp PathToFastDeploySDK/fastdeploy_libs.conf /etc/ld.so.conf.d/ +sudo ldconfig +``` + +## 编译FastDeploy Python SDK + +除了NPU,Rockchip的芯片还有其他的一些功能。 +这些功能大部分都是需要C/C++进行编程,因此如果您使用到了这些模块,我们不推荐您使用Python SDK. +Python SDK的编译暂时仅支持板端编译, 以下教程在RK3568(debian 10)、RK3588(debian 11) 环境下完成。Python打包依赖`wheel`,编译前请先执行`pip install wheel` + + +```bash +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy + +# 如果您使用的是develop分支输入以下命令 +git checkout develop + +cd python +export ENABLE_ORT_BACKEND=ON +export ENABLE_RKNPU2_BACKEND=ON +export ENABLE_VISION=ON + +# 请根据你的开发版的不同,选择RK3588和RK356X +export RKNN2_TARGET_SOC=RK3588 + +# 如果你的核心板的运行内存大于等于8G,我们建议您执行以下命令进行编译。 +python3 setup.py build +# 值得注意的是,如果你的核心板的运行内存小于8G,我们建议您执行以下命令进行编译。 +python3 setup.py build -j1 + +python3 setup.py bdist_wheel +cd dist +pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl +``` + ## 导航目录 * [RKNPU2开发环境搭建](../faq/rknpu2/environment.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md index af5be5360..986462781 100755 --- a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md @@ -5,8 +5,8 @@ This directory provides examples that `infer.cc` fast finishes the deployment of Two steps before deployment -- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) -- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model. @@ -49,7 +49,7 @@ The visualized result after running is as follows ## Other Documents - [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) -- [PPOCR Model Description](../../) +- [PPOCR Model Description](../README.md) - [PPOCRv3 Python Deployment](../python) - [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md) - [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md index d281daf83..b12cef661 100755 --- a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md @@ -3,8 +3,8 @@ English | [简体中文](README_CN.md) Two steps before deployment -- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) -- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows @@ -43,7 +43,7 @@ The visualized result after running is as follows ## Other Documents - [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/) -- [PPOCR Model Description](../../) +- [PPOCR Model Description](../README.md) - [PPOCRv3 C++ Deployment](../cpp) -- [Model Prediction Results](../../../../../../docs/api/vision_results/) +- [Model Prediction Results](../../../../../../docs/api/vision_results/README.md) - [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md index 663a6b62d..e2f6e3d43 100644 --- a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md @@ -3,8 +3,8 @@ 在部署前,需确认以下两个步骤 -- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) -- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) 本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成 @@ -56,7 +56,7 @@ python3 infer_static_shape.py \ ## 其它文档 - [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/) -- [PPOCR 系列模型介绍](../../) +- [PPOCR 系列模型介绍](../README.md) - [PPOCRv3 C++部署](../cpp) -- [模型预测结果说明](../../../../../../docs/api/vision_results/) +- [模型预测结果说明](../../../../../../docs/api/vision_results/README_CN.md) - [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)