diff --git a/CMakeLists.txt b/CMakeLists.txt index ee87c6d4d..14abd7fcb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,7 @@ endif() ############################# Basic Options for FastDeploy ################################ option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF) option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF) +option(WITH_OPENCL "Whether WITH_OPENCL=ON, will enable paddle-lite-gpu" OFF) option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF) option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) @@ -334,6 +335,10 @@ if(WITH_GPU) include(${PROJECT_SOURCE_DIR}/cmake/cuda.cmake) endif() +if(WITH_OPENCL) + add_definitions(-DWITH_OPENCL) +endif() + if(ENABLE_TRT_BACKEND) set(ENABLE_PADDLE2ONNX ON) if(APPLE OR ANDROID OR IOS) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index abdb56015..fea096299 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -10,6 +10,7 @@ set(NEED_ABI0 @NEED_ABI0@) # Hardware and Language API set(WITH_GPU @WITH_GPU@) set(WITH_IPU @WITH_IPU@) +set(WITH_OPENCL @WITH_OPENCL@) set(WITH_ASCEND @WITH_ASCEND@) set(WITH_DIRECTML @WITH_DIRECTML@) set(WITH_TIMVX @WITH_TIMVX@) diff --git a/cmake/check.cmake b/cmake/check.cmake index 5e0ce1794..1ddcf3de3 100644 --- a/cmake/check.cmake +++ b/cmake/check.cmake @@ -33,9 +33,13 @@ endif() if(WITH_GPU) if(APPLE) message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") - set(WITH_GPU OFF) elseif(ANDROID OR IOS) message(FATAL_ERROR "Cannot enable GPU while compling in Android or IOS.") - set(WITH_GPU OFF) + endif() +endif() + +if(WITH_OPENCL) + if(NOT ANDROID OR NOT ENABLE_LITE_BACKEND) + message(FATAL_ERROR "Cannot enable OpenCL while compling unless in Android and Paddle Lite backend is enbaled.") endif() endif() diff --git a/cmake/paddlelite.cmake b/cmake/paddlelite.cmake index 672ad3227..da7d81f46 100755 --- a/cmake/paddlelite.cmake +++ b/cmake/paddlelite.cmake @@ -83,10 +83,19 @@ else() if(WIN32 OR APPLE OR IOS) message(FATAL_ERROR "Doesn't support windows/mac/ios platform with backend Paddle Lite now.") elseif(ANDROID) - set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-0.0.0.ab000121e.tgz") - if(ANDROID_ABI MATCHES "arm64-v8a") - set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-fp16-0.0.0.ab000121e.tgz") - endif() + # Mobile GPU + if(WITH_OPENCL) + set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-opencl-0.0.0.ab000121e.tgz") + if(ANDROID_ABI MATCHES "arm64-v8a") + set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-fp16-opencl-0.0.0.ab000121e.tgz") + endif() + else() + # Mobile Arm CPU + set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-0.0.0.ab000121e.tgz") + if(ANDROID_ABI MATCHES "arm64-v8a") + set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-android-${ANDROID_ABI}-fp16-0.0.0.ab000121e.tgz") + endif() + endif() set(PADDLELITE_VERSION 0.0.0.ab000121e) else() # Linux if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") diff --git a/cmake/summary.cmake b/cmake/summary.cmake index ebc66032e..076c28dd5 100755 --- a/cmake/summary.cmake +++ b/cmake/summary.cmake @@ -48,6 +48,7 @@ function(fastdeploy_summary) message(STATUS " ENABLE_CVCUDA : ${ENABLE_CVCUDA}") message(STATUS " WITH_GPU : ${WITH_GPU}") message(STATUS " WITH_IPU : ${WITH_IPU}") + message(STATUS " WITH_OPENCL : ${WITH_OPENCL}") message(STATUS " WITH_TESTING : ${WITH_TESTING}") message(STATUS " WITH_ASCEND : ${WITH_ASCEND}") message(STATUS " WITH_DIRECTML : ${WITH_DIRECTML}") diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc index 28d22d2db..c8a2af83a 100644 --- a/fastdeploy/runtime/backends/lite/configure_hardware.cc +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -49,6 +49,31 @@ void LiteBackend::ConfigureCpu(const LiteBackendOption& option) { config_.set_valid_places(GetPlacesForCpu(option)); } +void LiteBackend::ConfigureGpu(const LiteBackendOption& option) { + std::vector valid_places; + if (option.enable_fp16) { + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kFP16), DATALAYOUT(kImageDefault)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kFP16), DATALAYOUT(kImageFolder)}); + } + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kFloat)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kAny), DATALAYOUT(kImageDefault)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kAny), DATALAYOUT(kImageFolder)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kAny)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kOpenCL), + PRECISION(kInt32)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kARM), + PRECISION(kInt8)}); + valid_places.emplace_back(paddle::lite_api::Place{TARGET(kARM), + PRECISION(kFloat)}); + config_.set_valid_places(valid_places); +} + void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { std::vector valid_places; // TODO(yeliang): Placing kInt8 first may cause accuracy issues of some model diff --git a/fastdeploy/runtime/backends/lite/lite_backend.cc b/fastdeploy/runtime/backends/lite/lite_backend.cc index 26fcc0acc..bc6487853 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.cc +++ b/fastdeploy/runtime/backends/lite/lite_backend.cc @@ -40,6 +40,8 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { if (option_.device == Device::CPU) { ConfigureCpu(option_); + } else if (option_.device == Device::GPU) { + ConfigureGpu(option_); } else if (option_.device == Device::TIMVX) { ConfigureTimvx(option_); } else if (option_.device == Device::KUNLUNXIN) { @@ -70,15 +72,21 @@ bool LiteBackend::Init(const RuntimeOption& runtime_option) { return false; } if (runtime_option.device != Device::CPU && + runtime_option.device != Device::GPU && runtime_option.device != Device::KUNLUNXIN && runtime_option.device != Device::ASCEND && runtime_option.device != Device::TIMVX) { FDERROR << "PaddleLiteBackend only supports " - "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND, " + "Device::CPU/Device::GPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND, " "but now it's " << runtime_option.device << "." << std::endl; return false; } + if (runtime_option.device == Device::GPU && + !paddle::lite_api::IsOpenCLBackendValid()) { + FDERROR << "PaddleLiteBackend GPU (OpenCL) is not supported by the current device." + << std::endl; + } if (runtime_option.model_from_memory_) { FDERROR << "PaddleLiteBackend doesn't support load model from memory, " "please load model from disk." diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index 15e71b50a..aaad37b94 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -51,6 +51,7 @@ class LiteBackend : public BaseBackend { void BuildOption(const LiteBackendOption& option); void ConfigureCpu(const LiteBackendOption& option); + void ConfigureGpu(const LiteBackendOption& option); void ConfigureTimvx(const LiteBackendOption& option); void ConfigureAscend(const LiteBackendOption& option); void ConfigureKunlunXin(const LiteBackendOption& option); diff --git a/fastdeploy/runtime/enum_variables.h b/fastdeploy/runtime/enum_variables.h index b40f814b2..b8427303e 100644 --- a/fastdeploy/runtime/enum_variables.h +++ b/fastdeploy/runtime/enum_variables.h @@ -35,7 +35,7 @@ enum Backend { PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only - LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only + LITE, ///< Paddle Lite, support Paddle format model, ARM CPU / ARM GPU RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU @@ -93,7 +93,8 @@ static std::map> s_default_backends_by_device = { {Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::POROS}}, - {Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}}, + {Device::GPU, {Backend::LITE, Backend::PDINFER, Backend::ORT, + Backend::TRT, Backend::POROS}}, {Device::RKNPU, {Backend::RKNPU2}}, {Device::SUNRISENPU, {Backend::HORIZONNPU}}, {Device::IPU, {Backend::PDINFER}}, diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index b71360353..4bbc8f721 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -46,9 +46,14 @@ void RuntimeOption::SetEncryptionKey(const std::string& encryption_key) { } void RuntimeOption::UseGpu(int gpu_id) { -#ifdef WITH_GPU +#if defined(WITH_GPU) || defined(WITH_OPENCL) device = Device::GPU; device_id = gpu_id; + +#if defined(WITH_OPENCL) && defined(ENABLE_LITE_BACKEND) + paddle_lite_option.device = device; +#endif + #else FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU." << std::endl;