diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index dc804e926..1fc45e990 100755 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -98,6 +98,33 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { } } +bool PaddleBackend::Init(const RuntimeOption& runtime_option) { + if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) { + return false; + } + + auto option = runtime_option; + option.paddle_infer_option.model_file = runtime_option.model_file; + option.paddle_infer_option.params_file = runtime_option.params_file; + option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_; + option.paddle_infer_option.device = runtime_option.device; + option.paddle_infer_option.device_id = runtime_option.device_id; + option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory; + option.paddle_infer_option.external_stream_ = runtime_option.external_stream_; + option.paddle_infer_option.trt_option = runtime_option.trt_option; + option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id; + if (option.model_from_memory_) { + return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option); + } else { + std::string model_buffer = ""; + std::string params_buffer = ""; + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str()); + FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file from %s.", option.params_file.c_str()); + return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option); + } + return false; +} + bool PaddleBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const PaddleBackendOption& option) { diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.h b/fastdeploy/runtime/backends/paddle/paddle_backend.h index 60079fed6..f662ca2b6 100755 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.h +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h @@ -54,12 +54,7 @@ class PaddleBackend : public BaseBackend { public: PaddleBackend() {} virtual ~PaddleBackend() = default; - void BuildOption(const PaddleBackendOption& option); - - bool InitFromPaddle(const std::string& model_buffer, - const std::string& params_buffer, - const PaddleBackendOption& option = PaddleBackendOption()); - + bool Init(const RuntimeOption& option); bool Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd = true) override; @@ -77,6 +72,12 @@ class PaddleBackend : public BaseBackend { std::vector GetOutputInfos() override; private: + void BuildOption(const PaddleBackendOption& option); + + bool InitFromPaddle(const std::string& model_buffer, + const std::string& params_buffer, + const PaddleBackendOption& option = PaddleBackendOption()); + void CollectShapeRun(paddle_infer::Predictor* predictor, const std::map>& shape) const; diff --git a/fastdeploy/runtime/backends/tensorrt/option.h b/fastdeploy/runtime/backends/tensorrt/option.h index 5cee0a7e3..ff28e3e3b 100755 --- a/fastdeploy/runtime/backends/tensorrt/option.h +++ b/fastdeploy/runtime/backends/tensorrt/option.h @@ -30,6 +30,9 @@ struct TrtBackendOption { /// `max_workspace_size` for TensorRT size_t max_workspace_size = 1 << 30; + /// Enable log while converting onnx model to tensorrt + bool enable_log_info = false; + /* * @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode */ diff --git a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc index d781256a5..f46f27f95 100644 --- a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc +++ b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc @@ -21,6 +21,7 @@ void BindTrtOption(pybind11::module& m) { pybind11::class_(m, "TrtBackendOption") .def(pybind11::init()) .def_readwrite("enable_fp16", &TrtBackendOption::enable_fp16) + .def_readwrite("enable_log_info", &TrtBackendOption::enable_log_info) .def_readwrite("max_batch_size", &TrtBackendOption::max_batch_size) .def_readwrite("max_workspace_size", &TrtBackendOption::max_workspace_size) diff --git a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc index 74bd3ae4f..99ccbe4c7 100644 --- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc +++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc @@ -114,6 +114,13 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) { } bool TrtBackend::Init(const RuntimeOption& runtime_option) { + auto trt_option = runtime_option.trt_option; + trt_option.model_file = runtime_option.model_file; + trt_option.params_file = runtime_option.params_file; + trt_option.model_format = runtime_option.model_format; + trt_option.gpu_id = runtime_option.device_id; + trt_option.enable_pinned_memory = runtime_option.enable_pinned_memory; + trt_option.external_stream_ = runtime_option.external_stream_; if (runtime_option.device != Device::GPU) { FDERROR << "TrtBackend only supports Device::GPU, but now it's " << runtime_option.device << "." << std::endl; @@ -130,7 +137,7 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) { if (runtime_option.model_from_memory_) { return InitFromPaddle(runtime_option.model_file, runtime_option.params_file, - runtime_option.trt_option); + trt_option); } else { std::string model_buffer; std::string params_buffer; @@ -141,17 +148,17 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) { "Failed to read parameters file %s.", runtime_option.params_file.c_str()); return InitFromPaddle(model_buffer, params_buffer, - runtime_option.trt_option); + trt_option); } } else { if (runtime_option.model_from_memory_) { - return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option); + return InitFromOnnx(runtime_option.model_file, trt_option); } else { std::string model_buffer; FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer), "Failed to read model file %s.", runtime_option.model_file.c_str()); - return InitFromOnnx(model_buffer, runtime_option.trt_option); + return InitFromOnnx(model_buffer, trt_option); } } return true; @@ -525,6 +532,9 @@ void TrtBackend::AllocateOutputsBuffer(std::vector* outputs, } bool TrtBackend::BuildTrtEngine() { + if (option_.enable_log_info) { + FDTrtLogger::Get()->SetLog(true, true); + } auto config = FDUniquePtr(builder_->createBuilderConfig()); if (!config) { diff --git a/fastdeploy/runtime/backends/tensorrt/utils.h b/fastdeploy/runtime/backends/tensorrt/utils.h index 3d4c11f31..b2fe8ee99 100644 --- a/fastdeploy/runtime/backends/tensorrt/utils.h +++ b/fastdeploy/runtime/backends/tensorrt/utils.h @@ -220,20 +220,30 @@ class FDTrtLogger : public nvinfer1::ILogger { logger = new FDTrtLogger(); return logger; } + void SetLog(bool enable_info = false, bool enable_warning = false) { + enable_info_ = enable_info; + enable_warning_ = enable_warning; + } + void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override { if (severity == nvinfer1::ILogger::Severity::kINFO) { - // Disable this log - // FDINFO << msg << std::endl; + if (enable_info_) { + FDINFO << msg << std::endl; + } } else if (severity == nvinfer1::ILogger::Severity::kWARNING) { - // Disable this log - // FDWARNING << msg << std::endl; + if (enable_warning_) { + FDWARNING << msg << std::endl; + } } else if (severity == nvinfer1::ILogger::Severity::kERROR) { FDERROR << msg << std::endl; } else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) { FDASSERT(false, "%s", msg); } } + private: + bool enable_info_ = false; + bool enable_warning_ = false; }; struct ShapeRangeInfo { diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index 0e6eecf32..df000c9ac 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -154,19 +154,10 @@ bool Runtime::Init(const RuntimeOption& _option) { } else if (option.backend == Backend::SOPHGOTPU) { CreateSophgoNPUBackend(); } else if (option.backend == Backend::POROS) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::POROS only supports Device::CPU/Device::GPU."); - FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT, - "Backend::POROS only supports model format of " - "ModelFormat::TORCHSCRIPT."); - FDINFO << "Runtime initialized with Backend::POROS in " << option.device - << "." << std::endl; - return true; + CreatePorosBackend(); } else { - FDERROR << "Runtime only support " - "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " - "backend now." - << std::endl; + std::string msg = Str(GetAvailableBackends()); + FDERROR << "The compiled FastDeploy only supports " << msg << ", " << option.backend << " is not supported now." << std::endl; return false; } backend_->benchmark_option_ = option.benchmark_option; @@ -264,43 +255,9 @@ void Runtime::ReleaseModelMemoryBuffer() { } void Runtime::CreatePaddleBackend() { - FDASSERT( - option.device == Device::CPU || option.device == Device::GPU || - option.device == Device::IPU, - "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU."); - FDASSERT( - option.model_format == ModelFormat::PADDLE, - "Backend::PDINFER only supports model format of ModelFormat::PADDLE."); #ifdef ENABLE_PADDLE_BACKEND - option.paddle_infer_option.model_file = option.model_file; - option.paddle_infer_option.params_file = option.params_file; - option.paddle_infer_option.model_from_memory_ = option.model_from_memory_; - option.paddle_infer_option.device = option.device; - option.paddle_infer_option.device_id = option.device_id; - option.paddle_infer_option.enable_pinned_memory = option.enable_pinned_memory; - option.paddle_infer_option.external_stream_ = option.external_stream_; - option.paddle_infer_option.trt_option = option.trt_option; - option.paddle_infer_option.trt_option.gpu_id = option.device_id; backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - - if (option.model_from_memory_) { - FDASSERT( - casted_backend->InitFromPaddle(option.model_file, option.params_file, - option.paddle_infer_option), - "Load model from Paddle failed while initliazing PaddleBackend."); - ReleaseModelMemoryBuffer(); - } else { - std::string model_buffer = ""; - std::string params_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), - "Fail to read binary from model file"); - FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), - "Fail to read binary from parameter file"); - FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer, - option.paddle_infer_option), - "Load model from Paddle failed while initliazing PaddleBackend."); - } + FDASSERT(backend_->Init(option), "Failed to initialized Paddle Inference backend."); #else FDASSERT(false, "PaddleBackend is not available, please compiled with " @@ -339,12 +296,6 @@ void Runtime::CreateOrtBackend() { void Runtime::CreateTrtBackend() { #ifdef ENABLE_TRT_BACKEND - option.trt_option.model_file = option.model_file; - option.trt_option.params_file = option.params_file; - option.trt_option.model_format = option.model_format; - option.trt_option.gpu_id = option.device_id; - option.trt_option.enable_pinned_memory = option.enable_pinned_memory; - option.trt_option.external_stream_ = option.external_stream_; backend_ = utils::make_unique(); FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend."); #else