diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h index 5affeb756..6fe1c0c32 100644 --- a/fastdeploy/runtime/backends/backend.h +++ b/fastdeploy/runtime/backends/backend.h @@ -55,17 +55,27 @@ class BaseBackend { virtual bool Initialized() const { return initialized_; } + // Get number of inputs of the model virtual int NumInputs() const = 0; + // Get number of outputs of the model virtual int NumOutputs() const = 0; + // Get information of input tensor virtual TensorInfo GetInputInfo(int index) = 0; + // Get information of output tensor virtual TensorInfo GetOutputInfo(int index) = 0; + // Get information of all the input tensors virtual std::vector GetInputInfos() = 0; + // Get information of all the output tensors virtual std::vector GetOutputInfos() = 0; + // if copy_to_fd is true, copy memory data to FDTensor // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it) virtual bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) = 0; + + // Optional: For those backends which can share memory + // while creating multiple inference engines with same model file virtual std::unique_ptr Clone(void *stream = nullptr, int device_id = -1) { FDERROR << "Clone no support" << std::endl; diff --git a/fastdeploy/runtime/enum_variables.cc b/fastdeploy/runtime/enum_variables.cc index 6f474c883..22afeb9cd 100644 --- a/fastdeploy/runtime/enum_variables.cc +++ b/fastdeploy/runtime/enum_variables.cc @@ -78,8 +78,9 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { out << "ModelFormat::SOPHGO"; } else if (format == ModelFormat::TORCHSCRIPT) { out << "ModelFormat::TORCHSCRIPT"; + } else { + out << "UNKNOWN-ModelFormat"; } - out << "UNKNOWN-ModelFormat"; return out; } diff --git a/fastdeploy/runtime/enum_variables.h b/fastdeploy/runtime/enum_variables.h index ae3450b7f..582ea7418 100644 --- a/fastdeploy/runtime/enum_variables.h +++ b/fastdeploy/runtime/enum_variables.h @@ -72,7 +72,8 @@ enum ModelFormat { }; /// Describle all the supported backends for specified model format -static std::map> s_default_backends_cfg = { +static std::map> + s_default_backends_by_format = { {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE, Backend::ORT, Backend::OPENVINO, Backend::TRT}}, {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}}, @@ -81,8 +82,22 @@ static std::map> s_default_backends_cfg = { {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}} }; +/// Describle all the supported backends for specified device +static std::map> + s_default_backends_by_device = { + {Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT, + Backend::OPENVINO, Backend::POROS}}, + {Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}}, + {Device::RKNPU, {Backend::RKNPU2}}, + {Device::IPU, {Backend::PDINFER}}, + {Device::TIMVX, {Backend::LITE}}, + {Device::KUNLUNXIN, {Backend::LITE}}, + {Device::ASCEND, {Backend::LITE}}, + {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}} +}; + + FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f); - } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index ffa135a3a..e247cba5c 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -51,55 +51,82 @@ namespace fastdeploy { -bool Runtime::Init(const RuntimeOption& _option) { - option = _option; - // Choose default backend by model format - if (option.backend == Backend::UNKNOWN) { - auto iter = s_default_backends_cfg.find(option.model_format); - if (iter == s_default_backends_cfg.end()) { - FDERROR << "Cannot found a default backend for model format: " - << option.model_format - << ", please define the inference backend in RuntimeOption." - << std::endl; - return false; - } - for (const auto& b : iter->second) { - if (IsBackendAvailable(b)) { - option.backend = b; - FDINFO << "FastDeploy will choose " << b << " to inference this model." - << std::endl; +bool AutoSelectBackend(RuntimeOption& option) { + auto iter0 = s_default_backends_by_format.find(option.model_format); + if (iter0 == s_default_backends_by_format.end()) { + FDERROR << "Cannot found a default backend for model format: " + << option.model_format + << ", please define the inference backend in RuntimeOption." + << std::endl; + return false; + } + + auto iter1 = s_default_backends_by_device.find(option.device); + if (iter1 == s_default_backends_by_device.end()) { + FDERROR << "Cannot found a default backend for device: " << option.device + << ", please define the inference backend in RuntimeOption." + << std::endl; + return false; + } + + std::vector candidates; + for (const auto& b0 : iter0->second) { + for (const auto& b1 : iter1->second) { + if (b0 == b1) { + candidates.push_back(b0); } } - if (option.backend == Backend::UNKNOWN) { - FDERROR << "Cannot found available backends for model format: " - << option.model_format << "." << std::endl; + } + + if (candidates.size() == 0) { + FDERROR << "Cannot found availabel inference backends by model format: " + << option.model_format << " with device: " << option.device + << std::endl; + return false; + } + + for (const auto& b : candidates) { + if (IsBackendAvailable(b)) { + option.backend = b; + FDINFO << "FastDeploy will choose " << b << " to inference this model." + << std::endl; + return true; + } + } + std::string debug_message = Str(candidates); + FDERROR << "The candiate backends for " << option.model_format << " & " + << option.device << " are " << debug_message + << ", but both of them have not been compiled with current " + "FastDeploy yet." + << std::endl; + return false; +} + +bool Runtime::Init(const RuntimeOption& _option) { + option = _option; + + // Choose default backend by model format and device if backend is not + // specified + if (option.backend == Backend::UNKNOWN) { + if (!AutoSelectBackend(option)) { return false; } } if (option.backend == Backend::ORT) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::ORT only supports Device::CPU/Device::GPU."); CreateOrtBackend(); - FDINFO << "Runtime initialized with Backend::ORT in " << option.device - << "." << std::endl; } else if (option.backend == Backend::TRT) { - FDASSERT(option.device == Device::GPU, - "Backend::TRT only supports Device::GPU."); CreateTrtBackend(); - FDINFO << "Runtime initialized with Backend::TRT in " << option.device - << "." << std::endl; } else if (option.backend == Backend::PDINFER) { - FDASSERT( - option.device == Device::CPU || option.device == Device::GPU || - option.device == Device::IPU, - "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU."); - FDASSERT( - option.model_format == ModelFormat::PADDLE, - "Backend::PDINFER only supports model format of ModelFormat::PADDLE."); CreatePaddleBackend(); - FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device - << "." << std::endl; + } else if (option.backend == Backend::OPENVINO) { + CreateOpenVINOBackend(); + } else if (option.backend == Backend::LITE) { + CreateLiteBackend(); + } else if (option.backend == Backend::RKNPU2) { + CreateRKNPU2Backend(); + } else if (option.backend == Backend::SOPHGOTPU) { + CreateSophgoNPUBackend(); } else if (option.backend == Backend::POROS) { FDASSERT(option.device == Device::CPU || option.device == Device::GPU, "Backend::POROS only supports Device::CPU/Device::GPU."); @@ -109,35 +136,6 @@ bool Runtime::Init(const RuntimeOption& _option) { FDINFO << "Runtime initialized with Backend::POROS in " << option.device << "." << std::endl; return true; - } else if (option.backend == Backend::OPENVINO) { - FDASSERT(option.device == Device::CPU, - "Backend::OPENVINO only supports Device::CPU"); - CreateOpenVINOBackend(); - FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device - << "." << std::endl; - } else if (option.backend == Backend::LITE) { - FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || - option.device == Device::KUNLUNXIN || - option.device == Device::ASCEND, - "Backend::LITE only supports " - "Device::CPU/Device::TIMVX/Device::KUNLUNXIN."); - CreateLiteBackend(); - FDINFO << "Runtime initialized with Backend::LITE in " << option.device - << "." << std::endl; - } else if (option.backend == Backend::RKNPU2) { - FDASSERT(option.device == Device::RKNPU, - "Backend::RKNPU2 only supports Device::RKNPU2"); - CreateRKNPU2Backend(); - - FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device - << "." << std::endl; - } else if (option.backend == Backend::SOPHGOTPU) { - FDASSERT(option.device == Device::SOPHGOTPUD, - "Backend::SOPHGO only supports Device::SOPHGO"); - CreateSophgoNPUBackend(); - - FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device - << "." << std::endl; } else { FDERROR << "Runtime only support " "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " @@ -211,6 +209,13 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) { } void Runtime::CreatePaddleBackend() { + FDASSERT( + option.device == Device::CPU || option.device == Device::GPU || + option.device == Device::IPU, + "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU."); + FDASSERT( + option.model_format == ModelFormat::PADDLE, + "Backend::PDINFER only supports model format of ModelFormat::PADDLE."); #ifdef ENABLE_PADDLE_BACKEND auto pd_option = PaddleBackendOption(); pd_option.model_file = option.model_file; @@ -265,8 +270,6 @@ void Runtime::CreatePaddleBackend() { pd_option.ipu_option = ipu_option; } #endif - FDASSERT(option.model_format == ModelFormat::PADDLE, - "PaddleBackend only support model format of ModelFormat::PADDLE."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); if (pd_option.model_from_memory_) { @@ -283,9 +286,17 @@ void Runtime::CreatePaddleBackend() { "PaddleBackend is not available, please compiled with " "ENABLE_PADDLE_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device + << "." << std::endl; } void Runtime::CreateOpenVINOBackend() { + FDASSERT(option.device == Device::CPU, + "Backend::OPENVINO only supports Device::CPU"); + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "OpenVINOBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); #ifdef ENABLE_OPENVINO_BACKEND auto ov_option = OpenVINOBackendOption(); ov_option.cpu_thread_num = option.cpu_thread_num; @@ -295,10 +306,6 @@ void Runtime::CreateOpenVINOBackend() { for (const auto& op : option.ov_cpu_operators) { ov_option.cpu_operators.insert(op); } - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "OpenVINOBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); @@ -315,9 +322,17 @@ void Runtime::CreateOpenVINOBackend() { "OpenVINOBackend is not available, please compiled with " "ENABLE_OPENVINO_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device + << "." << std::endl; } void Runtime::CreateOrtBackend() { + FDASSERT(option.device == Device::CPU || option.device == Device::GPU, + "Backend::ORT only supports Device::CPU/Device::GPU."); + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "OrtBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); #ifdef ENABLE_ORT_BACKEND auto ort_option = OrtBackendOption(); ort_option.graph_optimization_level = option.ort_graph_opt_level; @@ -328,10 +343,6 @@ void Runtime::CreateOrtBackend() { ort_option.gpu_id = option.device_id; ort_option.external_stream_ = option.external_stream_; - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "OrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); if (option.model_format == ModelFormat::ONNX) { @@ -347,9 +358,17 @@ void Runtime::CreateOrtBackend() { "OrtBackend is not available, please compiled with " "ENABLE_ORT_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "." + << std::endl; } void Runtime::CreateTrtBackend() { + FDASSERT(option.device == Device::GPU, + "Backend::TRT only supports Device::GPU."); + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "TrtBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); #ifdef ENABLE_TRT_BACKEND auto trt_option = TrtBackendOption(); trt_option.model_file = option.model_file; @@ -367,10 +386,6 @@ void Runtime::CreateTrtBackend() { trt_option.enable_pinned_memory = option.enable_pinned_memory; trt_option.external_stream_ = option.external_stream_; - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "TrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); if (option.model_format == ModelFormat::ONNX) { @@ -386,12 +401,19 @@ void Runtime::CreateTrtBackend() { "TrtBackend is not available, please compiled with " "ENABLE_TRT_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "." + << std::endl; } void Runtime::CreateLiteBackend() { -#ifdef ENABLE_LITE_BACKEND + FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || + option.device == Device::KUNLUNXIN || + option.device == Device::ASCEND, + "Backend::LITE only supports " + "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND."); FDASSERT(option.model_format == ModelFormat::PADDLE, "LiteBackend only support model format of ModelFormat::PADDLE"); +#ifdef ENABLE_LITE_BACKEND backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, @@ -402,15 +424,19 @@ void Runtime::CreateLiteBackend() { "LiteBackend is not available, please compiled with " "ENABLE_LITE_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "." + << std::endl; } void Runtime::CreateRKNPU2Backend() { + FDASSERT(option.device == Device::RKNPU, + "Backend::RKNPU2 only supports Device::RKNPU2"); + FDASSERT(option.model_format == ModelFormat::RKNN, + "RKNPU2Backend only support model format of ModelFormat::RKNN"); #ifdef ENABLE_RKNPU2_BACKEND auto rknpu2_option = RKNPU2BackendOption(); rknpu2_option.cpu_name = option.rknpu2_cpu_name_; rknpu2_option.core_mask = option.rknpu2_core_mask_; - FDASSERT(option.model_format == ModelFormat::RKNN, - "RKNPU2Backend only support model format of ModelFormat::RKNN"); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option), @@ -420,13 +446,17 @@ void Runtime::CreateRKNPU2Backend() { "RKNPU2Backend is not available, please compiled with " "ENABLE_RKNPU2_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device + << "." << std::endl; } void Runtime::CreateSophgoNPUBackend() { -#ifdef ENABLE_SOPHGO_BACKEND - auto sophgo_option = SophgoBackendOption(); + FDASSERT(option.device == Device::SOPHGOTPUD, + "Backend::SOPHGO only supports Device::SOPHGO"); FDASSERT(option.model_format == ModelFormat::SOPHGO, "SophgoBackend only support model format of ModelFormat::SOPHGO"); +#ifdef ENABLE_SOPHGO_BACKEND + auto sophgo_option = SophgoBackendOption(); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option), @@ -436,6 +466,8 @@ void Runtime::CreateSophgoNPUBackend() { "SophgoBackend is not available, please compiled with " "ENABLE_SOPHGO_BACKEND=ON."); #endif + FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device + << "." << std::endl; } Runtime* Runtime::Clone(void* stream, int device_id) { @@ -458,4 +490,36 @@ Runtime* Runtime::Clone(void* stream, int device_id) { return runtime; } +// only for poros backend +bool Runtime::Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option) { +#ifdef ENABLE_POROS_BACKEND + option = _option; + auto poros_option = PorosBackendOption(); + poros_option.use_gpu = (option.device == Device::GPU) ? true : false; + poros_option.gpu_id = option.device_id; + poros_option.long_to_int = option.long_to_int; + poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; + poros_option.unconst_ops_thres = option.unconst_ops_thres; + poros_option.poros_file = option.poros_file; + poros_option.is_dynamic = option.is_dynamic; + poros_option.enable_fp16 = option.trt_enable_fp16; + poros_option.max_batch_size = option.trt_max_batch_size; + poros_option.max_workspace_size = option.trt_max_workspace_size; + FDASSERT( + option.model_format == ModelFormat::TORCHSCRIPT, + "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT( + casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), + "Load model from Torchscript failed while initliazing PorosBackend."); +#else + FDASSERT(false, + "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + return true; +} + } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h index f256c23d5..b3438caa4 100755 --- a/fastdeploy/runtime/runtime.h +++ b/fastdeploy/runtime/runtime.h @@ -51,15 +51,6 @@ struct FASTDEPLOY_DECL Runtime { */ bool Infer(); - /** \brief Compile TorchScript Module, only for Poros backend - * - * \param[in] prewarm_tensors Prewarm datas for compile - * \param[in] _option Runtime option - * \return true if compile successed, otherwise false - */ - bool Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option); - /** \brief Get number of inputs */ int NumInputs() { return backend_->NumInputs(); } @@ -94,6 +85,15 @@ struct FASTDEPLOY_DECL Runtime { RuntimeOption option; + /** \brief Compile TorchScript Module, only for Poros backend + * + * \param[in] prewarm_tensors Prewarm datas for compile + * \param[in] _option Runtime option + * \return true if compile successed, otherwise false + */ + bool Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option); + private: void CreateOrtBackend(); void CreatePaddleBackend(); diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 1f1bfa1ad..42594474e 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -337,37 +337,6 @@ void RuntimeOption::SetOpenVINOStreams(int num_streams) { ov_num_streams = num_streams; } -bool Runtime::Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option) { -#ifdef ENABLE_POROS_BACKEND - option = _option; - auto poros_option = PorosBackendOption(); - poros_option.use_gpu = (option.device == Device::GPU) ? true : false; - poros_option.gpu_id = option.device_id; - poros_option.long_to_int = option.long_to_int; - poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; - poros_option.unconst_ops_thres = option.unconst_ops_thres; - poros_option.poros_file = option.poros_file; - poros_option.is_dynamic = option.is_dynamic; - poros_option.enable_fp16 = option.trt_enable_fp16; - poros_option.max_batch_size = option.trt_max_batch_size; - poros_option.max_workspace_size = option.trt_max_workspace_size; - FDASSERT( - option.model_format == ModelFormat::TORCHSCRIPT, - "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT( - casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), - "Load model from Torchscript failed while initliazing PorosBackend."); -#else - FDASSERT(false, - "PorosBackend is not available, please compiled with " - "ENABLE_POROS_BACKEND=ON."); -#endif - return true; -} - void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; } void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; } diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h index a2ea3a3a7..1306d2f60 100644 --- a/fastdeploy/utils/utils.h +++ b/fastdeploy/utils/utils.h @@ -203,8 +203,7 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, FASTDEPLOY_DECL std::vector GetStride(const std::vector& dims); -template ::value, - bool>::type = true> +template std::string Str(const std::vector& shape) { std::ostringstream oss; oss << "[ " << shape[0];