diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e4f917c3..0dab56839 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,25 +103,8 @@ if(NOT GIT_URL) set(GIT_URL "https://github.com") endif() -# Check for 32bit system -if(WIN32) - if(NOT CMAKE_CL_64) - message("***********************Compile on non 64-bit system now**********************") - add_definitions(-DNON_64_PLATFORM) - if(WITH_GPU) - message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.") - endif() - if(ENABLE_PADDLE_BACKEND) - message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.") - endif() - if(ENABLE_POROS_BACKEND) - message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.") - endif() - if(ENABLE_VISION) - message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.") - endif() - endif() -endif() +# check build options +include(${PROJECT_SOURCE_DIR}/cmake/check.cmake) if(WIN32 AND ENABLE_VISION) add_definitions(-DYAML_CPP_DLL) @@ -149,30 +132,15 @@ if (WITH_ASCEND) endif() if (WITH_KUNLUNXIN) - if(NOT ENABLE_LITE_BACKEND) - set(ENABLE_LITE_BACKEND ON) - endif() - if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform") - endif() - if(NOT PADDLELITE_URL) - set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz") - endif() + include(${PROJECT_SOURCE_DIR}/cmake/kunlunxin.cmake) endif() -if(ANDROID OR IOS) - if(ENABLE_ORT_BACKEND) - message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.") - endif() - if(ENABLE_PADDLE_BACKEND) - message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.") - endif() - if(ENABLE_OPENVINO_BACKEND) - message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.") - endif() - if(ENABLE_TRT_BACKEND) - message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.") +if(WITH_IPU) + if(NOT ENABLE_PADDLE_BACKEND) + message("Will force to set ENABLE_PADDLE_BACKEND when build with GraphCore IPU.") + set(ENABLE_PADDLE_BACKEND ON) endif() + add_definitions(-DWITH_IPU) endif() # Check for macOS architecture @@ -208,7 +176,6 @@ set(DEPEND_LIBS "") file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION) string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION) - # Add eigen lib include_directories(${PROJECT_SOURCE_DIR}/third_party/eigen) if(WIN32) @@ -221,7 +188,7 @@ if(WITH_SW) endif() if(ENABLE_ORT_BACKEND) - set(ENABLE_PADDLE_FRONTEND ON) + set(ENABLE_PADDLE2ONNX ON) add_definitions(-DENABLE_ORT_BACKEND) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS}) include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake) @@ -236,7 +203,7 @@ if(ENABLE_LITE_BACKEND) endif() if(ENABLE_PADDLE_BACKEND) - set(ENABLE_PADDLE_FRONTEND ON) + set(ENABLE_PADDLE2ONNX ON) add_definitions(-DENABLE_PADDLE_BACKEND) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_PADDLE_SRCS}) include(${PROJECT_SOURCE_DIR}/cmake/paddle_inference.cmake) @@ -250,7 +217,7 @@ if(ENABLE_PADDLE_BACKEND) endif() if(ENABLE_OPENVINO_BACKEND) - set(ENABLE_PADDLE_FRONTEND ON) + set(ENABLE_PADDLE2ONNX ON) add_definitions(-DENABLE_OPENVINO_BACKEND) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS}) include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake) @@ -313,36 +280,24 @@ if(ENABLE_POROS_BACKEND) endif() if(WITH_GPU) - if(APPLE) - message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") - set(WITH_GPU OFF) - elseif(ANDROID OR IOS) - message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.") - set(WITH_GPU OFF) + add_definitions(-DWITH_GPU) + include_directories(${CUDA_DIRECTORY}/include) + if(WIN32) + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) else() - add_definitions(-DWITH_GPU) - include_directories(${CUDA_DIRECTORY}/include) - if(WIN32) - find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64) - else() - find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) - endif() - list(APPEND DEPEND_LIBS ${CUDA_LIB}) - - # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc. - enable_language(CUDA) - message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: " - "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}") - include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake) + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) endif() -endif() + list(APPEND DEPEND_LIBS ${CUDA_LIB}) -if(WITH_IPU) - add_definitions(-DWITH_IPU) + # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc. + enable_language(CUDA) + message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: " + "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}") + include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake) endif() if(ENABLE_TRT_BACKEND) - set(ENABLE_PADDLE_FRONTEND ON) + set(ENABLE_PADDLE2ONNX ON) if(APPLE OR ANDROID OR IOS) message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.") endif() @@ -406,7 +361,6 @@ endif() if(ENABLE_VISION) add_definitions(-DENABLE_VISION) - add_definitions(-DENABLE_VISION_VISUALIZE) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp) list(APPEND DEPEND_LIBS yaml-cpp) list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS}) @@ -435,11 +389,11 @@ if(ENABLE_ENCRYPTION) list(APPEND DEPEND_LIBS ${OPENSSL_LIBRARIES}) endif() -if(ENABLE_PADDLE_FRONTEND) - add_definitions(-DENABLE_PADDLE_FRONTEND) +if(ENABLE_PADDLE2ONNX) + add_definitions(-DENABLE_PADDLE2ONNX) include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake) list(APPEND DEPEND_LIBS external_paddle2onnx) -endif(ENABLE_PADDLE_FRONTEND) +endif(ENABLE_PADDLE2ONNX) configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index a9f52d2e3..76b8f747c 100755 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -10,7 +10,7 @@ set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@) set(POROS_VERSION @POROS_VERSION@) set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) -set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) +set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@) set(ENABLE_VISION @ENABLE_VISION@) set(ENABLE_FLYCV @ENABLE_FLYCV@) set(ENABLE_TEXT @ENABLE_TEXT@) @@ -238,7 +238,7 @@ if (ENABLE_TEXT) list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include) endif() -if(ENABLE_PADDLE_FRONTEND) +if(ENABLE_PADDLE2ONNX) if(ANDROID) message(FATAL_ERROR "Not support fastdeploy-paddle2onnx APIs with Android now!") endif() diff --git a/cmake/check.cmake b/cmake/check.cmake new file mode 100644 index 000000000..690149a9e --- /dev/null +++ b/cmake/check.cmake @@ -0,0 +1,44 @@ +# Check for 32bit system +if(WIN32) + if(NOT CMAKE_CL_64) + message("***********************Compile on non 64-bit system now**********************") + add_definitions(-DNON_64_PLATFORM) + if(WITH_GPU) + message(FATAL_ERROR "-DWITH_GPU=ON doesn't support on non 64-bit system now.") + endif() + if(ENABLE_PADDLE_BACKEND) + message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.") + endif() + if(ENABLE_POROS_BACKEND) + message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.") + endif() + if(ENABLE_VISION) + message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.") + endif() + endif() +endif() + +if(ANDROID OR IOS) + if(ENABLE_ORT_BACKEND) + message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.") + endif() + if(ENABLE_PADDLE_BACKEND) + message(FATAL_ERROR "Not support Paddle backend for Andorid/IOS now. Please set ENABLE_PADDLE_BACKEND=OFF.") + endif() + if(ENABLE_OPENVINO_BACKEND) + message(FATAL_ERROR "Not support OpenVINO backend for Andorid/IOS now. Please set ENABLE_OPENVINO_BACKEND=OFF.") + endif() + if(ENABLE_TRT_BACKEND) + message(FATAL_ERROR "Not support TensorRT backend for Andorid/IOS now. Please set ENABLE_TRT_BACKEND=OFF.") + endif() +endif() + +if(WITH_GPU) + if(APPLE) + message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") + set(WITH_GPU OFF) + elseif(ANDROID OR IOS) + message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.") + set(WITH_GPU OFF) + endif() +endif() diff --git a/cmake/kunlunxin.cmake b/cmake/kunlunxin.cmake new file mode 100644 index 000000000..5a12526f9 --- /dev/null +++ b/cmake/kunlunxin.cmake @@ -0,0 +1,12 @@ +if(NOT ENABLE_LITE_BACKEND) + message("Will force to set ENABLE_LITE_BACKEND when build with KunlunXin.") + set(ENABLE_LITE_BACKEND ON) +endif() + +if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") + message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform") +endif() + +if(NOT PADDLELITE_URL) + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz") +endif() \ No newline at end of file diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index 6d5c8f0c1..efd65394e 100644 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -13,6 +13,10 @@ # limitations under the License. include(ExternalProject) +if(WITH_GPU AND WITH_IPU) + message(FATAL_ERROR "Cannot build with WITH_GPU=ON and WITH_IPU=ON on the same time.") +endif() + option(PADDLEINFERENCE_DIRECTORY "Directory of Paddle Inference library" OFF) set(PADDLEINFERENCE_PROJECT "extern_paddle_inference") diff --git a/cmake/timvx.cmake b/cmake/timvx.cmake index aae1fba1a..99c7a8b6e 100755 --- a/cmake/timvx.cmake +++ b/cmake/timvx.cmake @@ -3,9 +3,9 @@ if(NOT ${ENABLE_LITE_BACKEND}) message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON") set(ENABLE_LITE_BACKEND ON) endif() -if(${ENABLE_PADDLE_FRONTEND}) - message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_FRONTEND=OFF") - set(ENABLE_PADDLE_FRONTEND OFF) +if(${ENABLE_PADDLE2ONNX}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE2ONNX=OFF") + set(ENABLE_PADDLE2ONNX OFF) endif() if(${ENABLE_ORT_BACKEND}) message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF") diff --git a/docs/api_docs/cpp/Doxyfile b/docs/api_docs/cpp/Doxyfile index afd956162..4bd48f09d 100644 --- a/docs/api_docs/cpp/Doxyfile +++ b/docs/api_docs/cpp/Doxyfile @@ -2100,7 +2100,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = protected=private ENABLE_VISION_VISUALIZE=1 +PREDEFINED = protected=private # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in index c2541cc46..e6f202961 100755 --- a/fastdeploy/core/config.h.in +++ b/fastdeploy/core/config.h.in @@ -17,8 +17,8 @@ #cmakedefine FASTDEPLOY_LIB #endif -#ifndef ENABLE_PADDLE_FRONTEND -#cmakedefine ENABLE_PADDLE_FRONTEND +#ifndef ENABLE_PADDLE2ONNX +#cmakedefine ENABLE_PADDLE2ONNX #endif #ifndef ENABLE_ORT_BACKEND @@ -56,13 +56,3 @@ #ifndef ENABLE_TEXT #cmakedefine ENABLE_TEXT #endif - -#ifdef ENABLE_VISION -#ifndef ENABLE_VISION_VISUALIZE -#define ENABLE_VISION_VISUALIZE -#endif -#endif - -#ifndef ENABLE_FDTENSOR_FUNC -#cmakedefine ENABLE_FDTENSOR_FUNC -#endif diff --git a/fastdeploy/runtime/backends/openvino/ov_backend.cc b/fastdeploy/runtime/backends/openvino/ov_backend.cc index b394e0a0e..c36e4c174 100755 --- a/fastdeploy/runtime/backends/openvino/ov_backend.cc +++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "fastdeploy/runtime/backends/openvino/ov_backend.h" -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif diff --git a/fastdeploy/runtime/backends/ort/ort_backend.cc b/fastdeploy/runtime/backends/ort/ort_backend.cc index f19d209f9..b97656add 100644 --- a/fastdeploy/runtime/backends/ort/ort_backend.cc +++ b/fastdeploy/runtime/backends/ort/ort_backend.cc @@ -21,7 +21,7 @@ #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/runtime/backends/ort/utils.h" #include "fastdeploy/utils/utils.h" -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif @@ -84,7 +84,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file, char* model_content_ptr; int model_content_size = 0; bool save_external = false; -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX std::vector ops; ops.resize(2); strcpy(ops[0].op_name, "multiclass_nms3"); diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.h b/fastdeploy/runtime/backends/paddle/paddle_backend.h index 9f8df5730..2f93b22ed 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.h +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h @@ -21,7 +21,7 @@ #include "fastdeploy/runtime/backends/backend.h" #include "fastdeploy/runtime/backends/paddle/option.h" -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif #include "fastdeploy/utils/unique_ptr.h" diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc index d1302f949..5f498b41d 100644 --- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc +++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc @@ -20,7 +20,7 @@ #include "NvInferRuntime.h" #include "fastdeploy/function/cuda_cast.h" #include "fastdeploy/utils/utils.h" -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif @@ -123,7 +123,7 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file, } option_ = option; -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX std::vector ops; ops.resize(1); strcpy(ops[0].op_name, "pool2d"); diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc old mode 100755 new mode 100644 index 781a8973c..a6de59c9e --- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc +++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc @@ -1,7 +1,8 @@ #include "fastdeploy/vision/keypointdet/pptinypose/pptinypose.h" + #include "fastdeploy/vision/utils/utils.h" #include "yaml-cpp/yaml.h" -#ifdef ENABLE_PADDLE_FRONTEND +#ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif #include "fastdeploy/vision.h" @@ -16,7 +17,8 @@ PPTinyPose::PPTinyPose(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; runtime_option = custom_option; @@ -100,11 +102,11 @@ bool PPTinyPose::Preprocess(Mat* mat, std::vector* outputs) { int resize_height = -1; std::tie(resize_width, resize_height) = processor->GetWidthAndHeight(); cv::Mat trans_matrix(2, 3, CV_64FC1); - GetAffineTransform(center, scale, 0, {resize_width, resize_height}, &trans_matrix, 0); + GetAffineTransform(center, scale, 0, {resize_width, resize_height}, + &trans_matrix, 0); if (!(processor->SetTransformMatrix(trans_matrix))) { - FDERROR << "Failed to set transform matrix of " - << processors_[i]->Name() - << " processor." << std::endl; + FDERROR << "Failed to set transform matrix of " + << processors_[i]->Name() << " processor." << std::endl; } } if (!(*(processors_[i].get()))(mat)) { @@ -139,7 +141,7 @@ bool PPTinyPose::Postprocess(std::vector& infer_result, int idxdata_size = std::accumulate(infer_result[1].shape.begin(), infer_result[1].shape.end(), 1, std::multiplies()); - + if (outdata_size < 6) { FDWARNING << "PPTinyPose No object detected." << std::endl; } @@ -160,7 +162,9 @@ bool PPTinyPose::Postprocess(std::vector& infer_result, std::copy(static_cast(idx_data), static_cast(idx_data) + idxdata_size, idxout.begin()); } else { - FDERROR << "Only support process inference result with INT32/INT64 data type, but now it's " << idx_dtype << "." << std::endl; + FDERROR << "Only support process inference result with INT32/INT64 data " + "type, but now it's " + << idx_dtype << "." << std::endl; } GetFinalPredictions(heatmap, out_data_shape, idxout, center, scale, &preds, this->use_dark); @@ -176,7 +180,8 @@ bool PPTinyPose::Postprocess(std::vector& infer_result, bool PPTinyPose::Predict(cv::Mat* im, KeyPointDetectionResult* result) { std::vector center = {round(im->cols / 2.0f), round(im->rows / 2.0f)}; - std::vector scale = {static_cast(im->cols), static_cast(im->rows)}; + std::vector scale = {static_cast(im->cols), + static_cast(im->rows)}; Mat mat(*im); std::vector processed_data; if (!Preprocess(&mat, &processed_data)) { diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc index aa387b430..0bd2f0067 100644 --- a/fastdeploy/vision/vision_pybind.cc +++ b/fastdeploy/vision/vision_pybind.cc @@ -29,9 +29,7 @@ void BindKeyPointDetection(pybind11::module& m); void BindHeadPose(pybind11::module& m); void BindSR(pybind11::module& m); void BindGeneration(pybind11::module& m); -#ifdef ENABLE_VISION_VISUALIZE void BindVisualize(pybind11::module& m); -#endif void BindVision(pybind11::module& m) { pybind11::class_(m, "Mask") @@ -39,20 +37,20 @@ void BindVision(pybind11::module& m) { .def_readwrite("data", &vision::Mask::data) .def_readwrite("shape", &vision::Mask::shape) .def(pybind11::pickle( - [](const vision::Mask &m) { + [](const vision::Mask& m) { return pybind11::make_tuple(m.data, m.shape); - }, - [](pybind11::tuple t) { + }, + [](pybind11::tuple t) { if (t.size() != 2) - throw std::runtime_error("vision::Mask pickle with invalid state!"); + throw std::runtime_error( + "vision::Mask pickle with invalid state!"); vision::Mask m; m.data = t[0].cast>(); m.shape = t[1].cast>(); return m; - } - )) + })) .def("__repr__", &vision::Mask::Str) .def("__str__", &vision::Mask::Str); @@ -61,20 +59,20 @@ void BindVision(pybind11::module& m) { .def_readwrite("label_ids", &vision::ClassifyResult::label_ids) .def_readwrite("scores", &vision::ClassifyResult::scores) .def(pybind11::pickle( - [](const vision::ClassifyResult &c) { + [](const vision::ClassifyResult& c) { return pybind11::make_tuple(c.label_ids, c.scores); - }, - [](pybind11::tuple t) { + }, + [](pybind11::tuple t) { if (t.size() != 2) - throw std::runtime_error("vision::ClassifyResult pickle with invalid state!"); + throw std::runtime_error( + "vision::ClassifyResult pickle with invalid state!"); vision::ClassifyResult c; c.label_ids = t[0].cast>(); c.scores = t[1].cast>(); return c; - } - )) + })) .def("__repr__", &vision::ClassifyResult::Str) .def("__str__", &vision::ClassifyResult::Str); @@ -86,12 +84,14 @@ void BindVision(pybind11::module& m) { .def_readwrite("masks", &vision::DetectionResult::masks) .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks) .def(pybind11::pickle( - [](const vision::DetectionResult &d) { - return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks, d.contain_masks); - }, - [](pybind11::tuple t) { + [](const vision::DetectionResult& d) { + return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks, + d.contain_masks); + }, + [](pybind11::tuple t) { if (t.size() != 5) - throw std::runtime_error("vision::DetectionResult pickle with Invalid state!"); + throw std::runtime_error( + "vision::DetectionResult pickle with Invalid state!"); vision::DetectionResult d; d.boxes = t[0].cast>>(); @@ -101,8 +101,7 @@ void BindVision(pybind11::module& m) { d.contain_masks = t[4].cast(); return d; - } - )) + })) .def("__repr__", &vision::DetectionResult::Str) .def("__str__", &vision::DetectionResult::Str); @@ -152,14 +151,17 @@ void BindVision(pybind11::module& m) { .def_readwrite("label_map", &vision::SegmentationResult::label_map) .def_readwrite("score_map", &vision::SegmentationResult::score_map) .def_readwrite("shape", &vision::SegmentationResult::shape) - .def_readwrite("contain_score_map", &vision::SegmentationResult::contain_score_map) + .def_readwrite("contain_score_map", + &vision::SegmentationResult::contain_score_map) .def(pybind11::pickle( - [](const vision::SegmentationResult &s) { - return pybind11::make_tuple(s.label_map, s.score_map, s.shape, s.contain_score_map); - }, - [](pybind11::tuple t) { + [](const vision::SegmentationResult& s) { + return pybind11::make_tuple(s.label_map, s.score_map, s.shape, + s.contain_score_map); + }, + [](pybind11::tuple t) { if (t.size() != 4) - throw std::runtime_error("vision::SegmentationResult pickle with Invalid state!"); + throw std::runtime_error( + "vision::SegmentationResult pickle with Invalid state!"); vision::SegmentationResult s; s.label_map = t[0].cast>(); @@ -168,8 +170,7 @@ void BindVision(pybind11::module& m) { s.contain_score_map = t[3].cast(); return s; - } - )) + })) .def("__repr__", &vision::SegmentationResult::Str) .def("__str__", &vision::SegmentationResult::Str); @@ -178,7 +179,8 @@ void BindVision(pybind11::module& m) { .def_readwrite("alpha", &vision::MattingResult::alpha) .def_readwrite("foreground", &vision::MattingResult::foreground) .def_readwrite("shape", &vision::MattingResult::shape) - .def_readwrite("contain_foreground", &vision::MattingResult::contain_foreground) + .def_readwrite("contain_foreground", + &vision::MattingResult::contain_foreground) .def("__repr__", &vision::MattingResult::Str) .def("__str__", &vision::MattingResult::Str); @@ -215,8 +217,6 @@ void BindVision(pybind11::module& m) { BindHeadPose(m); BindSR(m); BindGeneration(m); -#ifdef ENABLE_VISION_VISUALIZE BindVisualize(m); -#endif } } // namespace fastdeploy diff --git a/fastdeploy/vision/visualize/classification.cc b/fastdeploy/vision/visualize/classification.cc index cd2cb547c..e7a852adc 100644 --- a/fastdeploy/vision/visualize/classification.cc +++ b/fastdeploy/vision/visualize/classification.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include #include "fastdeploy/vision/visualize/visualize.h" @@ -46,7 +44,7 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, cv::Point origin; origin.x = w_sep; origin.y = h_sep * (i + 1); - cv::putText(vis_im, text, origin, font, font_size, + cv::putText(vis_im, text, origin, font, font_size, cv::Scalar(255, 255, 255), 1); } return vis_im; @@ -54,9 +52,8 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, // Visualize ClassifyResult with custom labels. cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, - const std::vector& labels, - int top_k, float score_threshold, - float font_size) { + const std::vector& labels, int top_k, + float score_threshold, float font_size) { int h = im.rows; int w = im.cols; auto vis_im = im.clone(); @@ -78,8 +75,8 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, if (labels.size() > result.label_ids[i]) { text = labels[result.label_ids[i]] + "," + text; } else { - FDWARNING << "The label_id: " << result.label_ids[i] - << " in DetectionResult should be less than length of labels:" + FDWARNING << "The label_id: " << result.label_ids[i] + << " in DetectionResult should be less than length of labels:" << labels.size() << "." << std::endl; } if (text.size() > 16) { @@ -89,7 +86,7 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, cv::Point origin; origin.x = w_sep; origin.y = h_sep * (i + 1); - cv::putText(vis_im, text, origin, font, font_size, + cv::putText(vis_im, text, origin, font, font_size, cv::Scalar(255, 255, 255), 1); } return vis_im; @@ -97,4 +94,3 @@ cv::Mat VisClassification(const cv::Mat& im, const ClassifyResult& result, } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/detection.cc b/fastdeploy/vision/visualize/detection.cc index fc681cabf..e8180cafe 100644 --- a/fastdeploy/vision/visualize/detection.cc +++ b/fastdeploy/vision/visualize/detection.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include #include "fastdeploy/vision/visualize/visualize.h" @@ -105,10 +103,9 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, } // Visualize DetectionResult with custom labels. -cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, +cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, const std::vector& labels, - float score_threshold, int line_size, - float font_size) { + float score_threshold, int line_size, float font_size) { if (result.contain_masks) { FDASSERT(result.boxes.size() == result.masks.size(), "The size of masks must be equal to the size of boxes, but now " @@ -145,8 +142,8 @@ cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, if (labels.size() > result.label_ids[i]) { text = labels[result.label_ids[i]] + "," + text; } else { - FDWARNING << "The label_id: " << result.label_ids[i] - << " in DetectionResult should be less than length of labels:" + FDWARNING << "The label_id: " << result.label_ids[i] + << " in DetectionResult should be less than length of labels:" << labels.size() << "." << std::endl; } if (text.size() > 16) { @@ -287,4 +284,3 @@ cv::Mat Visualize::VisDetection(const cv::Mat& im, } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/face_alignment.cc b/fastdeploy/vision/visualize/face_alignment.cc index 9b6d43cff..5cad8ba96 100644 --- a/fastdeploy/vision/visualize/face_alignment.cc +++ b/fastdeploy/vision/visualize/face_alignment.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/imgproc/imgproc.hpp" @@ -28,16 +26,12 @@ cv::Mat VisFaceAlignment(const cv::Mat& im, const FaceAlignmentResult& result, cv::Scalar landmark_color = cv::Scalar(0, 255, 0); for (size_t i = 0; i < result.landmarks.size(); ++i) { cv::Point landmark; - landmark.x = static_cast( - result.landmarks[i][0]); - landmark.y = static_cast( - result.landmarks[i][1]); + landmark.x = static_cast(result.landmarks[i][0]); + landmark.y = static_cast(result.landmarks[i][1]); cv::circle(vis_im, landmark, line_size, landmark_color, -1); } return vis_im; } } // namespace vision -} // namespace fastdeploy - -#endif +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/visualize/face_detection.cc b/fastdeploy/vision/visualize/face_detection.cc index edc5b545c..f4cf3d492 100644 --- a/fastdeploy/vision/visualize/face_detection.cc +++ b/fastdeploy/vision/visualize/face_detection.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/imgproc/imgproc.hpp" @@ -137,5 +135,3 @@ cv::Mat Visualize::VisFaceDetection(const cv::Mat& im, } // namespace vision } // namespace fastdeploy - -#endif diff --git a/fastdeploy/vision/visualize/headpose.cc b/fastdeploy/vision/visualize/headpose.cc index 389d11136..ab04b655b 100644 --- a/fastdeploy/vision/visualize/headpose.cc +++ b/fastdeploy/vision/visualize/headpose.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/imgproc/imgproc.hpp" @@ -21,8 +19,8 @@ namespace fastdeploy { namespace vision { -cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result, - int size, int line_size) { +cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result, int size, + int line_size) { const float PI = 3.1415926535; auto vis_im = im.clone(); int h = im.rows; @@ -37,23 +35,28 @@ cv::Mat VisHeadPose(const cv::Mat& im, const HeadPoseResult& result, // X-Axis | drawn in red int x1 = static_cast(size * std::cos(yaw) * std::cos(roll)) + tdx; - int y1 = static_cast(size * (std::cos(pitch) * std::sin(roll) + - std::cos(roll) * std::sin(pitch) * std::sin(yaw))) + tdy; + int y1 = static_cast( + size * (std::cos(pitch) * std::sin(roll) + + std::cos(roll) * std::sin(pitch) * std::sin(yaw))) + + tdy; // Y-Axis | drawn in green int x2 = static_cast(-size * std::cos(yaw) * std::sin(roll)) + tdx; - int y2 = static_cast(size * (std::cos(pitch) * std::cos(roll) - - std::sin(pitch) * std::sin(yaw) * std::sin(roll))) + tdy; + int y2 = static_cast( + size * (std::cos(pitch) * std::cos(roll) - + std::sin(pitch) * std::sin(yaw) * std::sin(roll))) + + tdy; // Z-Axis | drawn in blue int x3 = static_cast(size * std::sin(yaw)) + tdx; int y3 = static_cast(-size * std::cos(yaw) * std::sin(pitch)) + tdy; - cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1), cv::Scalar(0, 0, 255), line_size); - cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2), cv::Scalar(0, 255, 0), line_size); - cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3), cv::Scalar(255, 0, 0), line_size); + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x1, y1), + cv::Scalar(0, 0, 255), line_size); + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x2, y2), + cv::Scalar(0, 255, 0), line_size); + cv::line(vis_im, cv::Point2i(tdx, tdy), cv::Point2i(x3, y3), + cv::Scalar(255, 0, 0), line_size); return vis_im; } } // namespace vision -} // namespace fastdeploy - -#endif \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/visualize/keypoint.cc b/fastdeploy/vision/visualize/keypoint.cc index 21be32561..37d547732 100644 --- a/fastdeploy/vision/visualize/keypoint.cc +++ b/fastdeploy/vision/visualize/keypoint.cc @@ -11,9 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" namespace fastdeploy { @@ -29,7 +26,7 @@ cv::Mat VisKeypointDetection(const cv::Mat& im, auto colormap = GenerateColorMap(); cv::Mat vis_img = im.clone(); int detection_nums = results.keypoints.size() / 17; - for (int i = 0; i < detection_nums; i++){ + for (int i = 0; i < detection_nums; i++) { int index = i * 17; bool is_over_threshold = true; for (int j = 0; j < results.num_joints; j++) { @@ -43,20 +40,18 @@ cv::Mat VisKeypointDetection(const cv::Mat& im, int x_coord = int(results.keypoints[index + k][0]); int y_coord = int(results.keypoints[index + k][1]); cv::circle(vis_img, cv::Point2d(x_coord, y_coord), 1, - cv::Scalar(0, 0, 255), 2); + cv::Scalar(0, 0, 255), 2); int x_start = int(results.keypoints[index + edge[k][0]][0]); int y_start = int(results.keypoints[index + edge[k][0]][1]); int x_end = int(results.keypoints[index + edge[k][1]][0]); int y_end = int(results.keypoints[index + edge[k][1]][1]); - cv::line(vis_img, cv::Point2d(x_start, y_start), cv::Point2d(x_end, y_end), - colormap[k], 1); + cv::line(vis_img, cv::Point2d(x_start, y_start), + cv::Point2d(x_end, y_end), colormap[k], 1); } } - } return vis_img; } } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/matting.cc b/fastdeploy/vision/visualize/matting.cc index 54b9ea440..14514a5a3 100644 --- a/fastdeploy/vision/visualize/matting.cc +++ b/fastdeploy/vision/visualize/matting.cc @@ -11,9 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -120,4 +117,3 @@ cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im, } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/mot.cc b/fastdeploy/vision/visualize/mot.cc index a04fda8e7..eb5572b98 100644 --- a/fastdeploy/vision/visualize/mot.cc +++ b/fastdeploy/vision/visualize/mot.cc @@ -12,21 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE -#include "fastdeploy/vision/visualize/visualize.h" #include +#include "fastdeploy/vision/visualize/visualize.h" + namespace fastdeploy { namespace vision { cv::Scalar GetMOTBoxColor(int idx) { idx = idx * 3; - cv::Scalar color = cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255); + cv::Scalar color = + cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255); return color; } cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results, - float score_threshold, tracking::TrailRecorder* recorder) { + float score_threshold, tracking::TrailRecorder *recorder) { cv::Mat vis_img = img.clone(); int im_h = img.rows; int im_w = img.cols; @@ -35,56 +36,44 @@ cv::Mat VisMOT(const cv::Mat &img, const MOTResult &results, float line_thickness = std::max(1, static_cast(im_w / 500.)); for (int i = 0; i < results.boxes.size(); ++i) { if (results.scores[i] < score_threshold) { - continue; + continue; } const int obj_id = results.ids[i]; const float score = results.scores[i]; cv::Scalar color = GetMOTBoxColor(obj_id); - if (recorder != nullptr){ + if (recorder != nullptr) { int id = results.ids[i]; auto iter = recorder->records.find(id); if (iter != recorder->records.end()) { for (int j = 0; j < iter->second.size(); j++) { - cv::Point center(iter->second[j][0], iter->second[j][1]); - cv::circle(vis_img, center, text_thickness, color); + cv::Point center(iter->second[j][0], iter->second[j][1]); + cv::circle(vis_img, center, text_thickness, color); } } } cv::Point pt1 = cv::Point(results.boxes[i][0], results.boxes[i][1]); cv::Point pt2 = cv::Point(results.boxes[i][2], results.boxes[i][3]); - cv::Point id_pt = - cv::Point(results.boxes[i][0], results.boxes[i][1] + 10); + cv::Point id_pt = cv::Point(results.boxes[i][0], results.boxes[i][1] + 10); cv::Point score_pt = - cv::Point(results.boxes[i][0], results.boxes[i][1] - 10); + cv::Point(results.boxes[i][0], results.boxes[i][1] - 10); cv::rectangle(vis_img, pt1, pt2, color, line_thickness); std::ostringstream idoss; idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4); idoss << obj_id; std::string id_text = idoss.str(); - cv::putText(vis_img, - id_text, - id_pt, - cv::FONT_HERSHEY_PLAIN, - text_scale, - color, - text_thickness); + cv::putText(vis_img, id_text, id_pt, cv::FONT_HERSHEY_PLAIN, text_scale, + color, text_thickness); std::ostringstream soss; soss << std::setiosflags(std::ios::fixed) << std::setprecision(2); soss << score; std::string score_text = soss.str(); - cv::putText(vis_img, - score_text, - score_pt, - cv::FONT_HERSHEY_PLAIN, - text_scale, - color, - text_thickness); + cv::putText(vis_img, score_text, score_pt, cv::FONT_HERSHEY_PLAIN, + text_scale, color, text_thickness); } return vis_img; } -}// namespace vision -} //namespace fastdepoly -#endif +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc index ac8f36312..4946e08e7 100644 --- a/fastdeploy/vision/visualize/ocr.cc +++ b/fastdeploy/vision/visualize/ocr.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" namespace fastdeploy { @@ -63,4 +61,3 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/remove_small_connnected_area.cc b/fastdeploy/vision/visualize/remove_small_connnected_area.cc index d4b4afd4b..bbcc74459 100644 --- a/fastdeploy/vision/visualize/remove_small_connnected_area.cc +++ b/fastdeploy/vision/visualize/remove_small_connnected_area.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -110,4 +108,3 @@ cv::Mat Visualize::RemoveSmallConnectedArea(const cv::Mat& alpha_pred, } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/segmentation.cc b/fastdeploy/vision/visualize/segmentation.cc index 5fa8c7891..cfdf4a2e3 100644 --- a/fastdeploy/vision/visualize/segmentation.cc +++ b/fastdeploy/vision/visualize/segmentation.cc @@ -12,19 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" #include "fastdeploy/vision/visualize/segmentation_arm.h" +#include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" namespace fastdeploy { namespace vision { -static cv::Mat VisSegmentationCommonCpu( - const cv::Mat& im, const SegmentationResult& result, - float weight) { +static cv::Mat VisSegmentationCommonCpu(const cv::Mat& im, + const SegmentationResult& result, + float weight) { // Use the native c++ version without any optimization. auto color_map = GenerateColorMap(1000); int64_t height = result.shape[0]; @@ -52,12 +50,12 @@ static cv::Mat VisSegmentationCommonCpu( cv::Mat VisSegmentation(const cv::Mat& im, const SegmentationResult& result, float weight) { - // TODO: Support SSE/AVX on x86_64 platforms -#ifdef __ARM_NEON + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON return VisSegmentationNEON(im, result, weight, true); -#else +#else return VisSegmentationCommonCpu(im, result, weight); -#endif +#endif } cv::Mat Visualize::VisSegmentation(const cv::Mat& im, @@ -65,14 +63,13 @@ cv::Mat Visualize::VisSegmentation(const cv::Mat& im, FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisSegmentation is " "deprecated, please use fastdeploy::vision:VisSegmentation " "function instead." - << std::endl; -#ifdef __ARM_NEON + << std::endl; +#ifdef __ARM_NEON return VisSegmentationNEON(im, result, 0.5f, true); -#else +#else return VisSegmentationCommonCpu(im, result, 0.5f); -#endif +#endif } } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/segmentation_arm.cc b/fastdeploy/vision/visualize/segmentation_arm.cc index 154883f77..b4856b3db 100644 --- a/fastdeploy/vision/visualize/segmentation_arm.cc +++ b/fastdeploy/vision/visualize/segmentation_arm.cc @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - #include "fastdeploy/vision/visualize/segmentation_arm.h" #ifdef __ARM_NEON #include @@ -24,8 +22,9 @@ namespace vision { static constexpr int _OMP_THREADS = 2; -static inline void QuantizeBlendingWeight8( - float weight, uint8_t* old_multi_factor, uint8_t* new_multi_factor) { +static inline void QuantizeBlendingWeight8(float weight, + uint8_t* old_multi_factor, + uint8_t* new_multi_factor) { // Quantize the weight to boost blending performance. // if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7 // if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6 @@ -39,34 +38,34 @@ static inline void QuantizeBlendingWeight8( *old_multi_factor = (8 - weight_quantize); } -cv::Mat VisSegmentationNEON( - const cv::Mat& im, const SegmentationResult& result, - float weight, bool quantize_weight) { -#ifndef __ARM_NEON - FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!") +cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result, + float weight, bool quantize_weight) { +#ifndef __ARM_NEON + FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!") #else int64_t height = result.shape[0]; int64_t width = result.shape[1]; auto vis_img = cv::Mat(height, width, CV_8UC3); - + int32_t size = static_cast(height * width); - uint8_t *vis_ptr = static_cast(vis_img.data); - const uint8_t *label_ptr = static_cast(result.label_map.data()); - const uint8_t *im_ptr = static_cast(im.data); + uint8_t* vis_ptr = static_cast(vis_img.data); + const uint8_t* label_ptr = + static_cast(result.label_map.data()); + const uint8_t* im_ptr = static_cast(im.data); if (!quantize_weight) { uint8x16_t zerox16 = vdupq_n_u8(0); - #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) for (int i = 0; i < size - 15; i += 16) { uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes - uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes uint8x16_t ibx16 = bgrx16x3.val[0]; uint8x16_t igx16 = bgrx16x3.val[1]; uint8x16_t irx16 = bgrx16x3.val[2]; // e.g 0b00000001 << 7 -> 0b10000000 128; - uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); - uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); - uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); uint8x16x3_t vbgrx16x3; // Keep the pixels of input im if mask = 0 uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16); @@ -77,35 +76,34 @@ cv::Mat VisSegmentationNEON( } for (int i = size - 15; i < size; i++) { uint8_t label = label_ptr[i]; - vis_ptr[i * 3 + 0] = (label << 7); - vis_ptr[i * 3 + 1] = (label << 4); - vis_ptr[i * 3 + 2] = (label << 3); + vis_ptr[i * 3 + 0] = (label << 7); + vis_ptr[i * 3 + 1] = (label << 4); + vis_ptr[i * 3 + 2] = (label << 3); } // Blend the colors use OpenCV cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img); return vis_img; } - + // Quantize the weight to boost blending performance. // After that, we can directly use shift instructions - // to blend the colors from input im and mask. Please + // to blend the colors from input im and mask. Please // check QuantizeBlendingWeight8 for more details. uint8_t old_multi_factor, new_multi_factor; - QuantizeBlendingWeight8(weight, &old_multi_factor, - &new_multi_factor); + QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor); if (new_multi_factor == 0) { - return im; // Only keep origin image. - } - + return im; // Only keep origin image. + } + if (new_multi_factor == 8) { - // Only keep mask, no need to blending with origin image. - #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) +// Only keep mask, no need to blending with origin image. +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) for (int i = 0; i < size - 15; i += 16) { - uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes // e.g 0b00000001 << 7 -> 0b10000000 128; - uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); - uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); - uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); uint8x16x3_t vbgr16x3; vbgr16x3.val[0] = mbx16; vbgr16x3.val[1] = mgx16; @@ -114,36 +112,36 @@ cv::Mat VisSegmentationNEON( } for (int i = size - 15; i < size; i++) { uint8_t label = label_ptr[i]; - vis_ptr[i * 3 + 0] = (label << 7); - vis_ptr[i * 3 + 1] = (label << 4); - vis_ptr[i * 3 + 2] = (label << 3); - } - return vis_img; + vis_ptr[i * 3 + 0] = (label << 7); + vis_ptr[i * 3 + 1] = (label << 4); + vis_ptr[i * 3 + 2] = (label << 3); + } + return vis_img; } - + uint8x16_t zerox16 = vdupq_n_u8(0); uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor); uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor); - // Blend the two colors together with quantize 'weight'. - #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) +// Blend the two colors together with quantize 'weight'. +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) for (int i = 0; i < size - 15; i += 16) { uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes - uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes + uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes uint8x16_t ibx16 = bgrx16x3.val[0]; uint8x16_t igx16 = bgrx16x3.val[1]; uint8x16_t irx16 = bgrx16x3.val[2]; // e.g 0b00000001 << 7 -> 0b10000000 128; - uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); - uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); - uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); + uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7); + uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4); + uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3); // Moving 7 bits to the right tends to result in zero, - // So, We choose to shift 3 bits to get an approximation + // So, We choose to shift 3 bits to get an approximation uint8x16_t ibx16_mshr = vmulq_u8(vshrq_n_u8(ibx16, 3), old_fx16); - uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16); + uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16); uint8x16_t irx16_mshr = vmulq_u8(vshrq_n_u8(irx16, 3), old_fx16); uint8x16_t mbx16_mshr = vmulq_u8(vshrq_n_u8(mbx16, 3), new_fx16); uint8x16_t mgx16_mshr = vmulq_u8(vshrq_n_u8(mgx16, 3), new_fx16); - uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16); + uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16); uint8x16_t qbx16 = vqaddq_u8(ibx16_mshr, mbx16_mshr); uint8x16_t qgx16 = vqaddq_u8(igx16_mshr, mgx16_mshr); uint8x16_t qrx16 = vqaddq_u8(irx16_mshr, mrx16_mshr); @@ -152,10 +150,10 @@ cv::Mat VisSegmentationNEON( uint8x16_t abx16 = vandq_u8(cezx16, ibx16); uint8x16_t agx16 = vandq_u8(cezx16, igx16); uint8x16_t arx16 = vandq_u8(cezx16, irx16); - uint8x16x3_t vbgr16x3; - // Reset qx values to 0 if label is 0, then, keep mask values - // if label is not 0 - uint8x16_t ncezx16 = vmvnq_u8(cezx16); + uint8x16x3_t vbgr16x3; + // Reset qx values to 0 if label is 0, then, keep mask values + // if label is not 0 + uint8x16_t ncezx16 = vmvnq_u8(cezx16); vbgr16x3.val[0] = vorrq_u8(abx16, vandq_u8(ncezx16, qbx16)); vbgr16x3.val[1] = vorrq_u8(agx16, vandq_u8(ncezx16, qgx16)); vbgr16x3.val[2] = vorrq_u8(arx16, vandq_u8(ncezx16, qrx16)); @@ -164,18 +162,16 @@ cv::Mat VisSegmentationNEON( } for (int i = size - 15; i < size; i++) { uint8_t label = label_ptr[i]; - vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor - + ((label << 7) >> 3) * new_multi_factor; - vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor - + ((label << 4) >> 3) * new_multi_factor; - vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor - + ((label << 3) >> 3) * new_multi_factor; - } + vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor + + ((label << 7) >> 3) * new_multi_factor; + vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor + + ((label << 4) >> 3) * new_multi_factor; + vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor + + ((label << 3) >> 3) * new_multi_factor; + } return vis_img; -#endif +#endif } } // namespace vision -} // namespace fastdeploy - -#endif \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/visualize/segmentation_arm.h b/fastdeploy/vision/visualize/segmentation_arm.h index 15c91eb54..89e6bad5f 100644 --- a/fastdeploy/vision/visualize/segmentation_arm.h +++ b/fastdeploy/vision/visualize/segmentation_arm.h @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE #pragma once #include "fastdeploy/vision/common/result.h" @@ -26,6 +25,3 @@ cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result, } // namespace vision } // namespace fastdeploy - -#endif - diff --git a/fastdeploy/vision/visualize/swap_background.cc b/fastdeploy/vision/visualize/swap_background.cc index c7669332b..f5714e2e0 100644 --- a/fastdeploy/vision/visualize/swap_background.cc +++ b/fastdeploy/vision/visualize/swap_background.cc @@ -12,20 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" +#include "fastdeploy/utils/utils.h" #include "fastdeploy/vision/visualize/swap_background_arm.h" +#include "fastdeploy/vision/visualize/visualize.h" #include "opencv2/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" -#include "fastdeploy/utils/utils.h" namespace fastdeploy { namespace vision { -static cv::Mat SwapBackgroundCommonCpu( - const cv::Mat& im, const cv::Mat& background, - const MattingResult& result, bool remove_small_connected_area) { +static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im, + const cv::Mat& background, + const MattingResult& result, + bool remove_small_connected_area) { FDASSERT((!im.empty()), "Image can't be empty!"); FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); FDASSERT((!background.empty()), "Background image can't be empty!"); @@ -77,9 +76,10 @@ static cv::Mat SwapBackgroundCommonCpu( return vis_img; } -static cv::Mat SwapBackgroundCommonCpu( - const cv::Mat& im, const cv::Mat& background, - const SegmentationResult& result, int background_label) { +static cv::Mat SwapBackgroundCommonCpu(const cv::Mat& im, + const cv::Mat& background, + const SegmentationResult& result, + int background_label) { FDASSERT((!im.empty()), "Image can't be empty!"); FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); FDASSERT((!background.empty()), "Background image can't be empty!"); @@ -129,25 +129,25 @@ static cv::Mat SwapBackgroundCommonCpu( cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background, const MattingResult& result, bool remove_small_connected_area) { - // TODO: Support SSE/AVX on x86_64 platforms -#ifdef __ARM_NEON - return SwapBackgroundNEON(im, background, result, - remove_small_connected_area); -#else - return SwapBackgroundCommonCpu(im, background, result, - remove_small_connected_area); -#endif + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return SwapBackgroundNEON(im, background, result, + remove_small_connected_area); +#else + return SwapBackgroundCommonCpu(im, background, result, + remove_small_connected_area); +#endif } cv::Mat SwapBackground(const cv::Mat& im, const cv::Mat& background, const SegmentationResult& result, int background_label) { - // TODO: Support SSE/AVX on x86_64 platforms -#ifdef __ARM_NEON + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON // return SwapBackgroundNEON(im, background, result, background_label); return SwapBackgroundNEON(im, background, result, background_label); -#else +#else return SwapBackgroundCommonCpu(im, background, result, background_label); -#endif +#endif } // DEPRECATED @@ -155,27 +155,26 @@ cv::Mat Visualize::SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background, const MattingResult& result, bool remove_small_connected_area) { -// TODO: Support SSE/AVX on x86_64 platforms -#ifdef __ARM_NEON - return SwapBackgroundNEON(im, background, result, +// TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON + return SwapBackgroundNEON(im, background, result, remove_small_connected_area); -#else - return SwapBackgroundCommonCpu(im, background, result, +#else + return SwapBackgroundCommonCpu(im, background, result, remove_small_connected_area); -#endif +#endif } cv::Mat Visualize::SwapBackgroundSegmentation( const cv::Mat& im, const cv::Mat& background, int background_label, const SegmentationResult& result) { - // TODO: Support SSE/AVX on x86_64 platforms -#ifdef __ARM_NEON + // TODO: Support SSE/AVX on x86_64 platforms +#ifdef __ARM_NEON return SwapBackgroundNEON(im, background, result, background_label); -#else +#else return SwapBackgroundCommonCpu(im, background, result, background_label); -#endif +#endif } } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/swap_background_arm.cc b/fastdeploy/vision/visualize/swap_background_arm.cc index 3abbffd95..5faa683d4 100644 --- a/fastdeploy/vision/visualize/swap_background_arm.cc +++ b/fastdeploy/vision/visualize/swap_background_arm.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE -#include "fastdeploy/vision/visualize/visualize.h" #include "fastdeploy/vision/visualize/swap_background_arm.h" + +#include "fastdeploy/vision/visualize/visualize.h" #ifdef __ARM_NEON #include #endif @@ -25,209 +25,214 @@ namespace vision { static constexpr int _OMP_THREADS = 2; -cv::Mat SwapBackgroundNEON(const cv::Mat& im, - const cv::Mat& background, - const MattingResult& result, +cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background, + const MattingResult& result, bool remove_small_connected_area) { -#ifndef __ARM_NEON - FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!"); +#ifndef __ARM_NEON + FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!"); #else - FDASSERT((!im.empty()), "Image can't be empty!"); - FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); - FDASSERT((!background.empty()), "Background image can't be empty!"); - FDASSERT((background.channels() == 3), - "Only support 3 channels background image mat!"); - int out_h = static_cast(result.shape[0]); - int out_w = static_cast(result.shape[1]); - int height = im.rows; - int width = im.cols; - int bg_height = background.rows; - int bg_width = background.cols; - - // WARN: may change the original alpha - float* alpha_ptr = const_cast(result.alpha.data()); + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; - cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); - if (remove_small_connected_area) { - alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f); - } - auto vis_img = cv::Mat(height, width, CV_8UC3); - - cv::Mat background_ref; - if ((bg_height != height) || (bg_width != width)) { - cv::resize(background, background_ref, cv::Size(width, height)); - } else { - background_ref = background; // ref only - } - if ((background_ref).type() != CV_8UC3) { - (background_ref).convertTo((background_ref), CV_8UC3); - } + // WARN: may change the original alpha + float* alpha_ptr = const_cast(result.alpha.data()); - if ((out_h != height) || (out_w != width)) { - cv::resize(alpha, alpha, cv::Size(width, height)); - } + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + alpha = Visualize::RemoveSmallConnectedArea(alpha, 0.05f); + } + auto vis_img = cv::Mat(height, width, CV_8UC3); - uint8_t* vis_data = static_cast(vis_img.data); - const uint8_t* background_data = static_cast(background_ref.data); - const uint8_t* im_data = static_cast(im.data); - const float* alpha_data = reinterpret_cast(alpha.data); + cv::Mat background_ref; + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_ref, cv::Size(width, height)); + } else { + background_ref = background; // ref only + } + if ((background_ref).type() != CV_8UC3) { + (background_ref).convertTo((background_ref), CV_8UC3); + } - const int32_t size = static_cast(height * width); - #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) - for(int i = 0; i < size - 7; i += 8) { - uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3); // 24 bytes - // u8 -> u16 -> u32 -> f32 - uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]); - uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]); - uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]); - uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3); // 24 bytes - uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]); - uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]); - uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]); + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } - uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8)); - uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8)); - uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8)); - uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8)); - uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8)); - uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8)); + uint8_t* vis_data = static_cast(vis_img.data); + const uint8_t* background_data = + static_cast(background_ref.data); + const uint8_t* im_data = static_cast(im.data); + const float* alpha_data = reinterpret_cast(alpha.data); - uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8)); - uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8)); - uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8)); - uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8)); - uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8)); - uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8)); + const int32_t size = static_cast(height * width); +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 7; i += 8) { + uint8x8x3_t ibgrx8x3 = vld3_u8(im_data + i * 3); // 24 bytes + // u8 -> u16 -> u32 -> f32 + uint16x8_t ibx8 = vmovl_u8(ibgrx8x3.val[0]); + uint16x8_t igx8 = vmovl_u8(ibgrx8x3.val[1]); + uint16x8_t irx8 = vmovl_u8(ibgrx8x3.val[2]); + uint8x8x3_t bbgrx8x3 = vld3_u8(background_data + i * 3); // 24 bytes + uint16x8_t bbx8 = vmovl_u8(bbgrx8x3.val[0]); + uint16x8_t bgx8 = vmovl_u8(bbgrx8x3.val[1]); + uint16x8_t brx8 = vmovl_u8(bbgrx8x3.val[2]); - float32x4_t fhibx4 = vcvtq_f32_u32(hibx4); - float32x4_t fhigx4 = vcvtq_f32_u32(higx4); - float32x4_t fhirx4 = vcvtq_f32_u32(hirx4); - float32x4_t flibx4 = vcvtq_f32_u32(libx4); - float32x4_t fligx4 = vcvtq_f32_u32(ligx4); - float32x4_t flirx4 = vcvtq_f32_u32(lirx4); + uint32x4_t hibx4 = vmovl_u16(vget_high_u16(ibx8)); + uint32x4_t higx4 = vmovl_u16(vget_high_u16(igx8)); + uint32x4_t hirx4 = vmovl_u16(vget_high_u16(irx8)); + uint32x4_t libx4 = vmovl_u16(vget_low_u16(ibx8)); + uint32x4_t ligx4 = vmovl_u16(vget_low_u16(igx8)); + uint32x4_t lirx4 = vmovl_u16(vget_low_u16(irx8)); - float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4); - float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4); - float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4); - float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4); - float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4); - float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4); - - // alpha load from little end - float32x4_t lalpx4 = vld1q_f32(alpha_data + i); // low bits - float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits - float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4); - float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4); + uint32x4_t hbbx4 = vmovl_u16(vget_high_u16(bbx8)); + uint32x4_t hbgx4 = vmovl_u16(vget_high_u16(bgx8)); + uint32x4_t hbrx4 = vmovl_u16(vget_high_u16(brx8)); + uint32x4_t lbbx4 = vmovl_u16(vget_low_u16(bbx8)); + uint32x4_t lbgx4 = vmovl_u16(vget_low_u16(bgx8)); + uint32x4_t lbrx4 = vmovl_u16(vget_low_u16(brx8)); - // blending - float32x4_t fhvbx4 = vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4)); - float32x4_t fhvgx4 = vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4)); - float32x4_t fhvrx4 = vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4)); - float32x4_t flvbx4 = vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4)); - float32x4_t flvgx4 = vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4)); - float32x4_t flvrx4 = vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4)); + float32x4_t fhibx4 = vcvtq_f32_u32(hibx4); + float32x4_t fhigx4 = vcvtq_f32_u32(higx4); + float32x4_t fhirx4 = vcvtq_f32_u32(hirx4); + float32x4_t flibx4 = vcvtq_f32_u32(libx4); + float32x4_t fligx4 = vcvtq_f32_u32(ligx4); + float32x4_t flirx4 = vcvtq_f32_u32(lirx4); - // f32 -> u32 -> u16 -> u8 - uint8x8x3_t vbgrx8x3; - // combine low 64 bits and high 64 bits into one 128 neon register - vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)), - vmovn_u32(vcvtq_u32_f32(fhvbx4)))); - vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)), - vmovn_u32(vcvtq_u32_f32(fhvgx4)))); - vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)), - vmovn_u32(vcvtq_u32_f32(fhvrx4)))); - vst3_u8(vis_data + i * 3, vbgrx8x3); - } + float32x4_t fhbbx4 = vcvtq_f32_u32(hbbx4); + float32x4_t fhbgx4 = vcvtq_f32_u32(hbgx4); + float32x4_t fhbrx4 = vcvtq_f32_u32(hbrx4); + float32x4_t flbbx4 = vcvtq_f32_u32(lbbx4); + float32x4_t flbgx4 = vcvtq_f32_u32(lbgx4); + float32x4_t flbrx4 = vcvtq_f32_u32(lbrx4); - for (int i = size - 7; i < size; i++) { - float alp = alpha_data[i]; - for (int c = 0; c < 3; ++c) { - vis_data[i * 3 + 0] = cv::saturate_cast( - static_cast(im_data[i * 3 + c]) * alp + (1.0f - alp) - * static_cast(background_data[i * 3 + c])); - } - } + // alpha load from little end + float32x4_t lalpx4 = vld1q_f32(alpha_data + i); // low bits + float32x4_t halpx4 = vld1q_f32(alpha_data + i + 4); // high bits + float32x4_t rlalpx4 = vsubq_f32(vdupq_n_f32(1.0f), lalpx4); + float32x4_t rhalpx4 = vsubq_f32(vdupq_n_f32(1.0f), halpx4); - return vis_img; + // blending + float32x4_t fhvbx4 = + vaddq_f32(vmulq_f32(fhibx4, halpx4), vmulq_f32(fhbbx4, rhalpx4)); + float32x4_t fhvgx4 = + vaddq_f32(vmulq_f32(fhigx4, halpx4), vmulq_f32(fhbgx4, rhalpx4)); + float32x4_t fhvrx4 = + vaddq_f32(vmulq_f32(fhirx4, halpx4), vmulq_f32(fhbrx4, rhalpx4)); + float32x4_t flvbx4 = + vaddq_f32(vmulq_f32(flibx4, lalpx4), vmulq_f32(flbbx4, rlalpx4)); + float32x4_t flvgx4 = + vaddq_f32(vmulq_f32(fligx4, lalpx4), vmulq_f32(flbgx4, rlalpx4)); + float32x4_t flvrx4 = + vaddq_f32(vmulq_f32(flirx4, lalpx4), vmulq_f32(flbrx4, rlalpx4)); + + // f32 -> u32 -> u16 -> u8 + uint8x8x3_t vbgrx8x3; + // combine low 64 bits and high 64 bits into one 128 neon register + vbgrx8x3.val[0] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvbx4)), + vmovn_u32(vcvtq_u32_f32(fhvbx4)))); + vbgrx8x3.val[1] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvgx4)), + vmovn_u32(vcvtq_u32_f32(fhvgx4)))); + vbgrx8x3.val[2] = vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(flvrx4)), + vmovn_u32(vcvtq_u32_f32(fhvrx4)))); + vst3_u8(vis_data + i * 3, vbgrx8x3); + } + + for (int i = size - 7; i < size; i++) { + float alp = alpha_data[i]; + for (int c = 0; c < 3; ++c) { + vis_data[i * 3 + 0] = cv::saturate_cast( + static_cast(im_data[i * 3 + c]) * alp + + (1.0f - alp) * static_cast(background_data[i * 3 + c])); + } + } + + return vis_img; #endif } -cv::Mat SwapBackgroundNEON(const cv::Mat& im, - const cv::Mat& background, +cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background, const SegmentationResult& result, int background_label) { -#ifndef __ARM_NEON - FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!") +#ifndef __ARM_NEON + FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!") #else - FDASSERT((!im.empty()), "Image can't be empty!"); - FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); - FDASSERT((!background.empty()), "Background image can't be empty!"); - FDASSERT((background.channels() == 3), - "Only support 3 channels background image mat!"); - int out_h = static_cast(result.shape[0]); - int out_w = static_cast(result.shape[1]); - int height = im.rows; - int width = im.cols; - int bg_height = background.rows; - int bg_width = background.cols; - auto vis_img = cv::Mat(height, width, CV_8UC3); - - cv::Mat background_ref; - if ((bg_height != height) || (bg_width != width)) { - cv::resize(background, background_ref, cv::Size(width, height)); - } else { - background_ref = background; // ref only - } - if ((background_ref).type() != CV_8UC3) { - (background_ref).convertTo((background_ref), CV_8UC3); - } - - uint8_t* vis_data = static_cast(vis_img.data); - const uint8_t* background_data = static_cast(background_ref.data); - const uint8_t* im_data = static_cast(im.data); - const uint8_t *label_data = static_cast(result.label_map.data()); + FDASSERT((!im.empty()), "Image can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels image mat!"); + FDASSERT((!background.empty()), "Background image can't be empty!"); + FDASSERT((background.channels() == 3), + "Only support 3 channels background image mat!"); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + int bg_height = background.rows; + int bg_width = background.cols; + auto vis_img = cv::Mat(height, width, CV_8UC3); - const uint8_t background_label_ = static_cast(background_label); - const int32_t size = static_cast(height * width); + cv::Mat background_ref; + if ((bg_height != height) || (bg_width != width)) { + cv::resize(background, background_ref, cv::Size(width, height)); + } else { + background_ref = background; // ref only + } + if ((background_ref).type() != CV_8UC3) { + (background_ref).convertTo((background_ref), CV_8UC3); + } - uint8x16_t backgroundx16 = vdupq_n_u8(background_label_); - #pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) - for (int i = 0; i < size - 15; i += 16) { - uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes - uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3); - uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes - // Set mask bit = 1 if label != background_label - uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16); - uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1 - uint8x16x3_t vbgr16x3; - vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16), - vandq_u8(bbgr16x3.val[0], nkeepx16)); - vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16), - vandq_u8(bbgr16x3.val[1], nkeepx16)); - vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16), - vandq_u8(bbgr16x3.val[2], nkeepx16)); - // Store the blended pixels to vis img - vst3q_u8(vis_data + i * 3, vbgr16x3); - } + uint8_t* vis_data = static_cast(vis_img.data); + const uint8_t* background_data = + static_cast(background_ref.data); + const uint8_t* im_data = static_cast(im.data); + const uint8_t* label_data = + static_cast(result.label_map.data()); - for (int i = size - 15; i < size; i++) { - uint8_t label = label_data[i]; - if (label != background_label_) { - vis_data[i * 3 + 0] = im_data[i * 3 + 0]; - vis_data[i * 3 + 1] = im_data[i * 3 + 1]; - vis_data[i * 3 + 2] = im_data[i * 3 + 2]; - } else { - vis_data[i * 3 + 0] = background_data[i * 3 + 0]; - vis_data[i * 3 + 1] = background_data[i * 3 + 1]; - vis_data[i * 3 + 2] = background_data[i * 3 + 2]; - } - } + const uint8_t background_label_ = static_cast(background_label); + const int32_t size = static_cast(height * width); - return vis_img; + uint8x16_t backgroundx16 = vdupq_n_u8(background_label_); +#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS) + for (int i = 0; i < size - 15; i += 16) { + uint8x16x3_t ibgr16x3 = vld3q_u8(im_data + i * 3); // 48 bytes + uint8x16x3_t bbgr16x3 = vld3q_u8(background_data + i * 3); + uint8x16_t labelx16 = vld1q_u8(label_data + i); // 16 bytes + // Set mask bit = 1 if label != background_label + uint8x16_t nkeepx16 = vceqq_u8(labelx16, backgroundx16); + uint8x16_t keepx16 = vmvnq_u8(nkeepx16); // keep_value = 1 + uint8x16x3_t vbgr16x3; + vbgr16x3.val[0] = vorrq_u8(vandq_u8(ibgr16x3.val[0], keepx16), + vandq_u8(bbgr16x3.val[0], nkeepx16)); + vbgr16x3.val[1] = vorrq_u8(vandq_u8(ibgr16x3.val[1], keepx16), + vandq_u8(bbgr16x3.val[1], nkeepx16)); + vbgr16x3.val[2] = vorrq_u8(vandq_u8(ibgr16x3.val[2], keepx16), + vandq_u8(bbgr16x3.val[2], nkeepx16)); + // Store the blended pixels to vis img + vst3q_u8(vis_data + i * 3, vbgr16x3); + } + + for (int i = size - 15; i < size; i++) { + uint8_t label = label_data[i]; + if (label != background_label_) { + vis_data[i * 3 + 0] = im_data[i * 3 + 0]; + vis_data[i * 3 + 1] = im_data[i * 3 + 1]; + vis_data[i * 3 + 2] = im_data[i * 3 + 2]; + } else { + vis_data[i * 3 + 0] = background_data[i * 3 + 0]; + vis_data[i * 3 + 1] = background_data[i * 3 + 1]; + vis_data[i * 3 + 2] = background_data[i * 3 + 2]; + } + } + + return vis_img; #endif } } // namespace vision -} // namespace fastdeploy - -#endif \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/visualize/swap_background_arm.h b/fastdeploy/vision/visualize/swap_background_arm.h index eb401e656..99595f338 100644 --- a/fastdeploy/vision/visualize/swap_background_arm.h +++ b/fastdeploy/vision/visualize/swap_background_arm.h @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE #pragma once #include "fastdeploy/vision/common/result.h" @@ -21,18 +20,15 @@ namespace fastdeploy { namespace vision { -cv::Mat SwapBackgroundNEON(const cv::Mat& im, - const cv::Mat& background, - const MattingResult& result, +cv::Mat SwapBackgroundNEON(const cv::Mat& im, + const cv::Mat& background, + const MattingResult& result, bool remove_small_connected_area = false); cv::Mat SwapBackgroundNEON(const cv::Mat& im, const cv::Mat& background, const SegmentationResult& result, - int background_label); + int background_label); } // namespace vision } // namespace fastdeploy - -#endif - diff --git a/fastdeploy/vision/visualize/visualize.cc b/fastdeploy/vision/visualize/visualize.cc index bf0fdcb88..4e955ba9c 100644 --- a/fastdeploy/vision/visualize/visualize.cc +++ b/fastdeploy/vision/visualize/visualize.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE #include "fastdeploy/vision/visualize/visualize.h" namespace fastdeploy { @@ -66,4 +65,3 @@ const std::vector& Visualize::GetColorMap(int num_classes) { } // namespace vision } // namespace fastdeploy -#endif diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h index 3507c6499..f56a51eec 100755 --- a/fastdeploy/vision/visualize/visualize.h +++ b/fastdeploy/vision/visualize/visualize.h @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef ENABLE_VISION_VISUALIZE #pragma once #include "fastdeploy/vision/common/result.h" @@ -202,4 +201,3 @@ FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im, } // namespace vision } // namespace fastdeploy -#endif diff --git a/scripts/android/build_android_cpp.sh b/scripts/android/build_android_cpp.sh index 5179a5117..da00eacb7 100755 --- a/scripts/android/build_android_cpp.sh +++ b/scripts/android/build_android_cpp.sh @@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() { -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ -DENABLE_ORT_BACKEND=OFF \ -DENABLE_LITE_BACKEND=ON \ - -DENABLE_PADDLE_FRONTEND=OFF \ + -DENABLE_PADDLE2ONNX=OFF \ -DENABLE_FLYCV=ON \ -DENABLE_TEXT=OFF \ -DENABLE_VISION=ON \ - -DENABLE_VISION_VISUALIZE=ON \ -DBUILD_EXAMPLES=ON \ -DWITH_OPENCV_STATIC=OFF \ -DWITH_LITE_STATIC=OFF \ diff --git a/scripts/android/build_android_cpp_with_text_api.sh b/scripts/android/build_android_cpp_with_text_api.sh index ac1ca4e6f..1a002bcee 100755 --- a/scripts/android/build_android_cpp_with_text_api.sh +++ b/scripts/android/build_android_cpp_with_text_api.sh @@ -88,11 +88,10 @@ __build_fastdeploy_android_shared() { -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ -DENABLE_ORT_BACKEND=OFF \ -DENABLE_LITE_BACKEND=ON \ - -DENABLE_PADDLE_FRONTEND=OFF \ + -DENABLE_PADDLE2ONNX=OFF \ -DENABLE_FLYCV=ON \ -DENABLE_TEXT=ON \ -DENABLE_VISION=ON \ - -DENABLE_VISION_VISUALIZE=ON \ -DBUILD_EXAMPLES=ON \ -DWITH_OPENCV_STATIC=OFF \ -DWITH_LITE_STATIC=OFF \ diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh index fa7c0aacb..1038fe030 100644 --- a/serving/scripts/build.sh +++ b/serving/scripts/build.sh @@ -13,32 +13,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARGS=`getopt -a -o w:n:h:hs -l WITH_GPU:,docker_name:,http_proxy:,https_proxy: -- "$@"` +ARGS=`getopt -a -o w:n:h:hs -l WITH_GPU:,docker_name:,http_proxy:,https_proxy: -- "$@"` -eval set -- "${ARGS}" +eval set -- "${ARGS}" echo "parse start" -while true -do - case "$1" in - -w|--WITH_GPU) - WITH_GPU="$2" +while true +do + case "$1" in + -w|--WITH_GPU) + WITH_GPU="$2" shift;; - -n|--docker_name) - docker_name="$2" + -n|--docker_name) + docker_name="$2" shift;; - -h|--http_proxy) - http_proxy="$2" + -h|--http_proxy) + http_proxy="$2" shift;; - -hs|--https_proxy) - https_proxy="$2" + -hs|--https_proxy) + https_proxy="$2" shift;; - --) + --) shift - break;; + break;; esac shift -done +done if [ -z $WITH_GPU ];then WITH_GPU="ON" @@ -88,7 +88,7 @@ nvidia-docker run -i --rm --name ${docker_name} \ python setup.py bdist_wheel; cd /workspace/fastdeploy; rm -rf build; mkdir -p build;cd build; - cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; + cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; make -j`nproc`; make install; cd /workspace/fastdeploy/serving; @@ -121,7 +121,7 @@ docker run -i --rm --name ${docker_name} \ python setup.py bdist_wheel; cd /workspace/fastdeploy; rm -rf build; mkdir build; cd build; - cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; + cmake .. -DENABLE_TRT_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=OFF -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime; make -j`nproc`; make install; cd /workspace/fastdeploy/serving;