From 631f94d27602f7033eb3419e3d8ca73bc26cd30b Mon Sep 17 00:00:00 2001 From: guxukai <44280887+GodIsBoom@users.noreply.github.com> Date: Fri, 24 Mar 2023 16:57:35 +0800 Subject: [PATCH] [CVCUDA] Update CV-CUDA to v0.2.1, add vision processor C++ tutorial (#1678) * update cvcuda 0.2.0 -> 0.2.1 * add cpp tutorials demo * fix reviewed problem --- FastDeploy.cmake.in | 2 + cmake/cvcuda.cmake | 9 +-- docs/api_docs/python/index.rst | 7 ++ fastdeploy/vision/common/processors/cast.cc | 2 +- .../vision/common/processors/center_crop.cc | 2 +- .../vision/common/processors/cvcuda_utils.cc | 6 +- .../vision/common/processors/cvcuda_utils.h | 2 +- .../vision/common/processors/hwc2chw.cc | 2 +- fastdeploy/vision/common/processors/pad.cc | 2 +- .../vision/common/processors/pad_to_size.cc | 6 +- fastdeploy/vision/common/processors/resize.cc | 2 +- .../common/processors/resize_by_short.cc | 2 +- .../vision/common/processors/stride_pad.cc | 2 +- tutorials/vision_processor/cpp/CMakeLists.txt | 11 +++ tutorials/vision_processor/cpp/README.md | 27 +++++++ tutorials/vision_processor/cpp/README_CN.md | 27 +++++++ tutorials/vision_processor/cpp/main.cc | 78 +++++++++++++++++++ 17 files changed, 170 insertions(+), 19 deletions(-) create mode 100644 tutorials/vision_processor/cpp/CMakeLists.txt create mode 100644 tutorials/vision_processor/cpp/README.md create mode 100644 tutorials/vision_processor/cpp/README_CN.md create mode 100644 tutorials/vision_processor/cpp/main.cc diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 0e20fb68c..ee6b4d283 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -321,6 +321,8 @@ if(ENABLE_VISION) find_library(CVCUDA_LIB cvcuda ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH) find_library(NVCV_TYPES_LIB nvcv_types ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/lib NO_DEFAULT_PATH) list(APPEND FASTDEPLOY_LIBS ${CVCUDA_LIB} ${NVCV_TYPES_LIB}) + list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/cvcuda/include NO_DEFAULT_PATH) + add_definitions(-DENABLE_CVCUDA) endif() endif() diff --git a/cmake/cvcuda.cmake b/cmake/cvcuda.cmake index 002af9021..946dbb5d6 100644 --- a/cmake/cvcuda.cmake +++ b/cmake/cvcuda.cmake @@ -24,10 +24,10 @@ if(NOT (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")) message(FATAL_ERROR "CV-CUDA only support x86_64.") endif() -set(CVCUDA_LIB_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.0-alpha/nvcv-lib-0.2.0_alpha-cuda11-x86_64-linux.tar.xz) -set(CVCUDA_LIB_FILENAME nvcv-lib-0.2.0_alpha-cuda11-x86_64-linux.tar.xz) -set(CVCUDA_DEV_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.0-alpha/nvcv-dev-0.2.0_alpha-cuda11-x86_64-linux.tar.xz) -set(CVCUDA_DEV_FILENAME nvcv-dev-0.2.0_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_LIB_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_LIB_FILENAME nvcv-lib-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_DEV_URL https://github.com/CVCUDA/CV-CUDA/releases/download/v0.2.1-alpha/nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) +set(CVCUDA_DEV_FILENAME nvcv-dev-0.2.1_alpha-cuda11-x86_64-linux.tar.xz) download_and_decompress(${CVCUDA_LIB_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_LIB_FILENAME} ${THIRD_PARTY_PATH}/cvcuda) download_and_decompress(${CVCUDA_DEV_URL} ${CMAKE_CURRENT_BINARY_DIR}/${CVCUDA_DEV_FILENAME} ${THIRD_PARTY_PATH}/cvcuda) @@ -40,4 +40,3 @@ execute_process(COMMAND cp -r ${THIRD_PARTY_PATH}/cvcuda/opt/nvidia/cvcuda0/incl link_directories(${THIRD_PARTY_PATH}/install/cvcuda/lib) include_directories(${THIRD_PARTY_PATH}/install/cvcuda/include) -set(CMAKE_CXX_STANDARD 17) diff --git a/docs/api_docs/python/index.rst b/docs/api_docs/python/index.rst index 77d7d64ea..58d3529e6 100644 --- a/docs/api_docs/python/index.rst +++ b/docs/api_docs/python/index.rst @@ -38,3 +38,10 @@ FastDeploy runtime.md runtime_option.md + +.. toctree:: + :caption: Vision Processor (图像预处理库) + :maxdepth: 2 + :titlesonly: + + vision_processor.md diff --git a/fastdeploy/vision/common/processors/cast.cc b/fastdeploy/vision/common/processors/cast.cc index cb206185c..765f6e2bb 100644 --- a/fastdeploy/vision/common/processors/cast.cc +++ b/fastdeploy/vision/common/processors/cast.cc @@ -96,7 +96,7 @@ bool Cast::ImplByCvCuda(FDMat* mat) { auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache), mat->layout); - cvcuda_convert_op_(mat->Stream(), src_tensor, dst_tensor, 1.0f, 0.0f); + cvcuda_convert_op_(mat->Stream(), *src_tensor, *dst_tensor, 1.0f, 0.0f); mat->SetTensor(mat->output_cache); mat->mat_type = ProcLib::CVCUDA; diff --git a/fastdeploy/vision/common/processors/center_crop.cc b/fastdeploy/vision/common/processors/center_crop.cc index f220ac376..16e0d4724 100644 --- a/fastdeploy/vision/common/processors/center_crop.cc +++ b/fastdeploy/vision/common/processors/center_crop.cc @@ -70,7 +70,7 @@ bool CenterCrop::ImplByCvCuda(FDMat* mat) { int offset_x = static_cast((mat->Width() - width_) / 2); int offset_y = static_cast((mat->Height() - height_) / 2); NVCVRectI crop_roi = {offset_x, offset_y, width_, height_}; - cvcuda_crop_op_(mat->Stream(), src_tensor, dst_tensor, crop_roi); + cvcuda_crop_op_(mat->Stream(), *src_tensor, *dst_tensor, crop_roi); mat->SetTensor(mat->output_cache); mat->SetWidth(width_); diff --git a/fastdeploy/vision/common/processors/cvcuda_utils.cc b/fastdeploy/vision/common/processors/cvcuda_utils.cc index 017ad15ee..228325e8b 100644 --- a/fastdeploy/vision/common/processors/cvcuda_utils.cc +++ b/fastdeploy/vision/common/processors/cvcuda_utils.cc @@ -43,8 +43,8 @@ nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel, return nvcv::FMT_BGRf32; } -nvcv::TensorWrapData CreateCvCudaTensorWrapData(const FDTensor& tensor, - Layout layout) { +std::shared_ptr CreateCvCudaTensorWrapData( + const FDTensor& tensor, Layout layout) { FDASSERT(tensor.shape.size() == 3, "When create CVCUDA tensor from FD tensor," "tensor shape should be 3-Dim,"); @@ -76,7 +76,7 @@ nvcv::TensorWrapData CreateCvCudaTensorWrapData(const FDTensor& tensor, nvcv::TensorDataStridedCuda tensor_data( nvcv::TensorShape{req.shape, req.rank, req.layout}, nvcv::DataType{req.dtype}, buf); - return nvcv::TensorWrapData(tensor_data); + return std::make_shared(tensor_data, nullptr); } void* GetCvCudaTensorDataPtr(const nvcv::TensorWrapData& tensor) { diff --git a/fastdeploy/vision/common/processors/cvcuda_utils.h b/fastdeploy/vision/common/processors/cvcuda_utils.h index a3a62e702..392bf94b5 100644 --- a/fastdeploy/vision/common/processors/cvcuda_utils.h +++ b/fastdeploy/vision/common/processors/cvcuda_utils.h @@ -27,7 +27,7 @@ namespace vision { nvcv::ImageFormat CreateCvCudaImageFormat(FDDataType type, int channel, bool interleaved = true); -nvcv::TensorWrapData CreateCvCudaTensorWrapData(const FDTensor& tensor, +std::shared_ptr CreateCvCudaTensorWrapData(const FDTensor& tensor, Layout layout = Layout::HWC); void* GetCvCudaTensorDataPtr(const nvcv::TensorWrapData& tensor); nvcv::ImageWrapData CreateImageWrapData(const FDTensor& tensor); diff --git a/fastdeploy/vision/common/processors/hwc2chw.cc b/fastdeploy/vision/common/processors/hwc2chw.cc index af13da129..86a5211c6 100644 --- a/fastdeploy/vision/common/processors/hwc2chw.cc +++ b/fastdeploy/vision/common/processors/hwc2chw.cc @@ -75,7 +75,7 @@ bool HWC2CHW::ImplByCvCuda(FDMat* mat) { auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache), Layout::CHW); - cvcuda_reformat_op_(mat->Stream(), src_tensor, dst_tensor); + cvcuda_reformat_op_(mat->Stream(), *src_tensor, *dst_tensor); mat->layout = Layout::CHW; mat->SetTensor(mat->output_cache); diff --git a/fastdeploy/vision/common/processors/pad.cc b/fastdeploy/vision/common/processors/pad.cc index 044668e12..341305a3d 100644 --- a/fastdeploy/vision/common/processors/pad.cc +++ b/fastdeploy/vision/common/processors/pad.cc @@ -133,7 +133,7 @@ bool Pad::ImplByCvCuda(FDMat* mat) { "output_cache", Device::GPU); auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); - cvcuda_pad_op_(mat->Stream(), src_tensor, dst_tensor, top_, left_, + cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, top_, left_, NVCV_BORDER_CONSTANT, value); mat->SetTensor(mat->output_cache); diff --git a/fastdeploy/vision/common/processors/pad_to_size.cc b/fastdeploy/vision/common/processors/pad_to_size.cc index 1f456dfc7..745916379 100644 --- a/fastdeploy/vision/common/processors/pad_to_size.cc +++ b/fastdeploy/vision/common/processors/pad_to_size.cc @@ -206,7 +206,7 @@ static bool PadHWCByCvCuda(cvcuda::CopyMakeBorder& pad_op, FDMat* mat, "output_cache", Device::GPU); auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); - pad_op(mat->Stream(), src_tensor, dst_tensor, 0, 0, NVCV_BORDER_CONSTANT, + pad_op(mat->Stream(), *src_tensor, *dst_tensor, 0, 0, NVCV_BORDER_CONSTANT, border_value); mat->SetTensor(mat->output_cache); @@ -238,8 +238,8 @@ static bool PadCHWByCvCuda(cvcuda::CopyMakeBorder& pad_op, FDMat* mat, input->device, input->device_id); auto dst_tensor = CreateCvCudaTensorWrapData(dst); - pad_op(mat->Stream(), src_tensor, dst_tensor, 0, 0, NVCV_BORDER_CONSTANT, - border_value); + pad_op(mat->Stream(), (*src_tensor), (*dst_tensor), 0, 0, + NVCV_BORDER_CONSTANT, border_value); } mat->SetTensor(mat->output_cache); mat->mat_type = ProcLib::CVCUDA; diff --git a/fastdeploy/vision/common/processors/resize.cc b/fastdeploy/vision/common/processors/resize.cc index 538ed419f..046525b97 100644 --- a/fastdeploy/vision/common/processors/resize.cc +++ b/fastdeploy/vision/common/processors/resize.cc @@ -146,7 +146,7 @@ bool Resize::ImplByCvCuda(FDMat* mat) { auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); // CV-CUDA Interp value is compatible with OpenCV - cvcuda_resize_op_(mat->Stream(), src_tensor, dst_tensor, + cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor, CreateCvCudaInterp(interp_)); mat->SetTensor(mat->output_cache); diff --git a/fastdeploy/vision/common/processors/resize_by_short.cc b/fastdeploy/vision/common/processors/resize_by_short.cc index 8ac650dd7..f9a0fb72b 100644 --- a/fastdeploy/vision/common/processors/resize_by_short.cc +++ b/fastdeploy/vision/common/processors/resize_by_short.cc @@ -95,7 +95,7 @@ bool ResizeByShort::ImplByCvCuda(FDMat* mat) { "output_cache", Device::GPU); auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); - cvcuda_resize_op_(mat->Stream(), src_tensor, dst_tensor, + cvcuda_resize_op_(mat->Stream(), *src_tensor, *dst_tensor, CreateCvCudaInterp(interp_)); mat->SetTensor(mat->output_cache); diff --git a/fastdeploy/vision/common/processors/stride_pad.cc b/fastdeploy/vision/common/processors/stride_pad.cc index 6b9506e85..cb8a4623c 100644 --- a/fastdeploy/vision/common/processors/stride_pad.cc +++ b/fastdeploy/vision/common/processors/stride_pad.cc @@ -167,7 +167,7 @@ bool StridePad::ImplByCvCuda(FDMat* mat) { "output_cache", Device::GPU); auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache)); - cvcuda_pad_op_(mat->Stream(), src_tensor, dst_tensor, 0, 0, + cvcuda_pad_op_(mat->Stream(), *src_tensor, *dst_tensor, 0, 0, NVCV_BORDER_CONSTANT, value); mat->SetTensor(mat->output_cache); diff --git a/tutorials/vision_processor/cpp/CMakeLists.txt b/tutorials/vision_processor/cpp/CMakeLists.txt new file mode 100644 index 000000000..f41fe59e7 --- /dev/null +++ b/tutorials/vision_processor/cpp/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT(preprocessor_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(preprocessor_demo ${PROJECT_SOURCE_DIR}/main.cc) +target_link_libraries(preprocessor_demo ${FASTDEPLOY_LIBS}) diff --git a/tutorials/vision_processor/cpp/README.md b/tutorials/vision_processor/cpp/README.md new file mode 100644 index 000000000..e9fb14de5 --- /dev/null +++ b/tutorials/vision_processor/cpp/README.md @@ -0,0 +1,27 @@ +English | [中文](README_CN.md) + +# Preprocessor Python Demo + +1. Compile FastDeploy and open CV-CUDA option + > [Compile FastDeploy](../../../docs/cn/build_and_install/gpu.md) + > [Open CV-CUDA option](../../../docs/cn/faq/use_cv_cuda.md) + +2. Run the demo +```bash +# Download the test image +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg + +# Compile the Demo +mkdir build +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../build/compiled_fastdeploy_sdk/ # if build sdk in `FastDeploy/build/compiled_fastdeploy_sdk` +make -j + +# Run the demo + +# Use OpenCV +./preprocessor_demo ILSVRC2012_val_00000010.jpeg 0 + +# Use CV-CUDA +./preprocessor_demo ILSVRC2012_val_00000010.jpeg 1 +``` diff --git a/tutorials/vision_processor/cpp/README_CN.md b/tutorials/vision_processor/cpp/README_CN.md new file mode 100644 index 000000000..1be46e524 --- /dev/null +++ b/tutorials/vision_processor/cpp/README_CN.md @@ -0,0 +1,27 @@ +中文 | [English](README.md) + +# Preprocessor Python 示例代码 + +1. 编译FastDeploy并开启CV-CUDA选项 + > [编译FastDeploy](../../../docs/cn/build_and_install/gpu.md) + > [开启CV-CUDA选项](../../../docs/cn/faq/use_cv_cuda.md) + +2. 运行示例代码 +```bash +# 下载测试图片 +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg + +# 编译示例代码 +mkdir build +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../build/compiled_fastdeploy_sdk/ # 若编译FastDeploy在其他文件夹,请替换为相应的sdk路径 +make -j + +# 运行示例代码 + +# 使用OpenCV处理图片 +./preprocessor_demo ILSVRC2012_val_00000010.jpeg 0 + +# 使用CV-CUDA处理图片 +./preprocessor_demo ILSVRC2012_val_00000010.jpeg 1 +``` diff --git a/tutorials/vision_processor/cpp/main.cc b/tutorials/vision_processor/cpp/main.cc new file mode 100644 index 000000000..9c06f8095 --- /dev/null +++ b/tutorials/vision_processor/cpp/main.cc @@ -0,0 +1,78 @@ +#include "fastdeploy/vision.h" +#include "fastdeploy/vision/common/processors/manager.h" +#include "fastdeploy/vision/common/processors/transform.h" + +namespace fd = fastdeploy; + +// Define our custom processor +class CustomPreprocessor : public fd::vision::ProcessorManager { + public: + explicit CustomPreprocessor(){}; + ~CustomPreprocessor(){}; + + virtual bool Apply(fd::vision::FDMatBatch* image_batch, + std::vector* outputs); + + private: + // Create op + int width = 160; + int height = 160; + std::shared_ptr resize_op = + std::make_shared(width, height, -1.0, -1.0, 1, false); + std::shared_ptr crop = + std::make_shared(50, 50); + std::vector mean = {0.485f, 0.456f, 0.406f}; + std::vector std = {0.229f, 0.224f, 0.225f}; + std::shared_ptr normalize = + std::make_shared(mean, std); +}; + +// Implement our custom processor's Apply() method +bool CustomPreprocessor::Apply(fd::vision::FDMatBatch* image_batch, + std::vector* outputs) { + // Use op to transform the images + bool resize_ret = (*resize_op)(&(image_batch->mats->at(0))); + bool crop_ret = (*crop)(image_batch); + bool normalize_ret = (*normalize)(image_batch); + + outputs->resize(1); + fd::FDTensor* tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + return true; +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + std::cout << "Usage: ./preprocessor_demo path/to/image run_option, " + "e.g ././preprocessor_demo ./test.jpeg 0" + << std::endl; + std::cout << "Run_option 0: OpenCV; 1: CV-CUDA " << std::endl; + return -1; + } + + // Prepare input images + auto im = cv::imread(argv[1]); + std::vector images = {im, im}; + std::vector mats = fd::vision::WrapMat(images); + std::vector outputs; + + // CustomPreprocessor processor; + CustomPreprocessor processor = CustomPreprocessor(); + + // Use CV-CUDA if parameter passed and detected + if (std::atoi(argv[2]) == 1) { + processor.UseCuda(true, 0); + } + + // Run the processor + bool ret = processor.Run(&mats, &outputs); + + // Print output + for (int i = 0; i < outputs.size(); i++) { + outputs[i].PrintInfo("out"); + } + + return 0; +} \ No newline at end of file