diff --git a/fastdeploy/runtime/backends/lite/lite_backend.cc b/fastdeploy/runtime/backends/lite/lite_backend.cc index 39cf2ebdd..26fcc0acc 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.cc +++ b/fastdeploy/runtime/backends/lite/lite_backend.cc @@ -56,18 +56,39 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { } } -bool LiteBackend::InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const LiteBackendOption& option) { +bool LiteBackend::Init(const RuntimeOption& runtime_option) { if (initialized_) { FDERROR << "LiteBackend is already initialized, cannot initialize again." << std::endl; return false; } - config_.set_model_file(model_file); - config_.set_param_file(params_file); - BuildOption(option); + if (runtime_option.model_format != ModelFormat::PADDLE) { + FDERROR + << "PaddleLiteBackend only supports model format PADDLE, but now it's " + << runtime_option.model_format << "." << std::endl; + return false; + } + if (runtime_option.device != Device::CPU && + runtime_option.device != Device::KUNLUNXIN && + runtime_option.device != Device::ASCEND && + runtime_option.device != Device::TIMVX) { + FDERROR << "PaddleLiteBackend only supports " + "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND, " + "but now it's " + << runtime_option.device << "." << std::endl; + return false; + } + if (runtime_option.model_from_memory_) { + FDERROR << "PaddleLiteBackend doesn't support load model from memory, " + "please load model from disk." + << std::endl; + return false; + } + + config_.set_model_file(runtime_option.model_file); + config_.set_param_file(runtime_option.params_file); + BuildOption(runtime_option.paddle_lite_option); predictor_ = paddle::lite_api::CreatePaddlePredictor( config_); @@ -177,7 +198,7 @@ bool LiteBackend::Infer(std::vector& inputs, FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype); } } - + RUNTIME_PROFILE_LOOP_BEGIN(1) predictor_->Run(); RUNTIME_PROFILE_LOOP_END diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index bb01551a0..bd738545a 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -22,6 +22,7 @@ #include "paddle_api.h" // NOLINT #include "fastdeploy/runtime/backends/backend.h" +#include "fastdeploy/runtime/runtime_option.h" #include "fastdeploy/runtime/backends/lite/option.h" namespace fastdeploy { @@ -30,11 +31,8 @@ class LiteBackend : public BaseBackend { public: LiteBackend() {} virtual ~LiteBackend() = default; - void BuildOption(const LiteBackendOption& option); - bool InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const LiteBackendOption& option = LiteBackendOption()); + bool Init(const RuntimeOption& option); bool Infer(std::vector& inputs, std::vector* outputs, @@ -50,6 +48,8 @@ class LiteBackend : public BaseBackend { std::vector GetOutputInfos() override; private: + void BuildOption(const LiteBackendOption& option); + void ConfigureCpu(const LiteBackendOption& option); void ConfigureTimvx(const LiteBackendOption& option); void ConfigureAscend(const LiteBackendOption& option); diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc index d64a946f7..74bd3ae4f 100644 --- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc +++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc @@ -113,6 +113,50 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) { return true; } +bool TrtBackend::Init(const RuntimeOption& runtime_option) { + if (runtime_option.device != Device::GPU) { + FDERROR << "TrtBackend only supports Device::GPU, but now it's " + << runtime_option.device << "." << std::endl; + return false; + } + if (runtime_option.model_format != ModelFormat::PADDLE && + runtime_option.model_format != ModelFormat::ONNX) { + FDERROR + << "TrtBackend only supports model format PADDLE/ONNX, but now it's " + << runtime_option.model_format << "." << std::endl; + return false; + } + if (runtime_option.model_format == ModelFormat::PADDLE) { + if (runtime_option.model_from_memory_) { + return InitFromPaddle(runtime_option.model_file, + runtime_option.params_file, + runtime_option.trt_option); + } else { + std::string model_buffer; + std::string params_buffer; + FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer), + "Failed to read model file %s.", + runtime_option.model_file.c_str()); + FDASSERT(ReadBinaryFromFile(runtime_option.params_file, ¶ms_buffer), + "Failed to read parameters file %s.", + runtime_option.params_file.c_str()); + return InitFromPaddle(model_buffer, params_buffer, + runtime_option.trt_option); + } + } else { + if (runtime_option.model_from_memory_) { + return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option); + } else { + std::string model_buffer; + FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer), + "Failed to read model file %s.", + runtime_option.model_file.c_str()); + return InitFromOnnx(model_buffer, runtime_option.trt_option); + } + } + return true; +} + bool TrtBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const TrtBackendOption& option, bool verbose) { @@ -291,14 +335,14 @@ bool TrtBackend::Infer(std::vector& inputs, cudaSetDevice(option_.gpu_id); SetInputs(inputs); AllocateOutputsBuffer(outputs, copy_to_fd); - + RUNTIME_PROFILE_LOOP_BEGIN(1) if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) { FDERROR << "Failed to Infer with TensorRT." << std::endl; return false; } RUNTIME_PROFILE_LOOP_END - + for (size_t i = 0; i < outputs->size(); ++i) { // if the final output tensor's dtype is different from the model output // tensor's dtype, then we need cast the data to the final output's dtype diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.h b/fastdeploy/runtime/backends/tensorrt/trt_backend.h index 84698ac9f..74d1da36f 100755 --- a/fastdeploy/runtime/backends/tensorrt/trt_backend.h +++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.h @@ -70,14 +70,8 @@ FDDataType GetFDDataType(const nvinfer1::DataType& dtype); class TrtBackend : public BaseBackend { public: TrtBackend() : engine_(nullptr), context_(nullptr) {} - void BuildOption(const TrtBackendOption& option); - bool InitFromPaddle(const std::string& model_buffer, - const std::string& params_buffer, - const TrtBackendOption& option = TrtBackendOption(), - bool verbose = false); - bool InitFromOnnx(const std::string& model_buffer, - const TrtBackendOption& option = TrtBackendOption()); + bool Init(const RuntimeOption& runtime_option); bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; @@ -98,6 +92,15 @@ class TrtBackend : public BaseBackend { } private: + void BuildOption(const TrtBackendOption& option); + + bool InitFromPaddle(const std::string& model_buffer, + const std::string& params_buffer, + const TrtBackendOption& option = TrtBackendOption(), + bool verbose = false); + bool InitFromOnnx(const std::string& model_buffer, + const TrtBackendOption& option = TrtBackendOption()); + TrtBackendOption option_; std::shared_ptr engine_; std::shared_ptr context_; diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index e6bd14456..2d68f7c87 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -324,12 +324,6 @@ void Runtime::CreateOrtBackend() { } void Runtime::CreateTrtBackend() { - FDASSERT(option.device == Device::GPU, - "Backend::TRT only supports Device::GPU."); - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "TrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); #ifdef ENABLE_TRT_BACKEND option.trt_option.model_file = option.model_file; option.trt_option.params_file = option.params_file; @@ -338,40 +332,8 @@ void Runtime::CreateTrtBackend() { option.trt_option.enable_pinned_memory = option.enable_pinned_memory; option.trt_option.external_stream_ = option.external_stream_; backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - casted_backend->benchmark_option_ = option.benchmark_option; - - if (option.model_format == ModelFormat::ONNX) { - if (option.model_from_memory_) { - FDASSERT( - casted_backend->InitFromOnnx(option.model_file, option.trt_option), - "Load model from ONNX failed while initliazing TrtBackend."); - ReleaseModelMemoryBuffer(); - } else { - std::string model_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), - "Fail to read binary from model file"); - FDASSERT(casted_backend->InitFromOnnx(model_buffer, option.trt_option), - "Load model from ONNX failed while initliazing TrtBackend."); - } - } else { - if (option.model_from_memory_) { - FDASSERT(casted_backend->InitFromPaddle( - option.model_file, option.params_file, option.trt_option), - "Load model from Paddle failed while initliazing TrtBackend."); - ReleaseModelMemoryBuffer(); - } else { - std::string model_buffer = ""; - std::string params_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), - "Fail to read binary from model file"); - FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), - "Fail to read binary from parameter file"); - FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer, - option.trt_option), - "Load model from Paddle failed while initliazing TrtBackend."); - } - } + backend_->benchmark_option_ = option.benchmark_option; + FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend."); #else FDASSERT(false, "TrtBackend is not available, please compiled with " @@ -383,29 +345,18 @@ void Runtime::CreateTrtBackend() { void Runtime::CreateLiteBackend() { #ifdef ENABLE_LITE_BACKEND - FDASSERT(option.model_from_memory_ == false, - "LiteBackend don't support to load model from memory"); - FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || - option.device == Device::KUNLUNXIN || - option.device == Device::ASCEND, - "Backend::LITE only supports " - "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND."); - FDASSERT(option.model_format == ModelFormat::PADDLE, - "LiteBackend only support model format of ModelFormat::PADDLE"); backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - casted_backend->benchmark_option_ = option.benchmark_option; + backend_->benchmark_option_ = option.benchmark_option; - FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, - option.paddle_lite_option), + FDASSERT(backend_->Init(option), "Load model from nb file failed while initializing LiteBackend."); #else FDASSERT(false, "LiteBackend is not available, please compiled with " "ENABLE_LITE_BACKEND=ON."); #endif - FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "." - << std::endl; + FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device + << "." << std::endl; } void Runtime::CreateRKNPU2Backend() {