diff --git a/examples/runtime/cpp/infer_torchscript_poros.cc b/examples/runtime/cpp/infer_torchscript_poros.cc index d9bf4ebad..0257513dc 100644 --- a/examples/runtime/cpp/infer_torchscript_poros.cc +++ b/examples/runtime/cpp/infer_torchscript_poros.cc @@ -84,11 +84,13 @@ int main(int argc, char* argv[]) { runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT); runtime_option.UsePorosBackend(); runtime_option.UseGpu(0); - runtime_option.is_dynamic = true; // Compile runtime std::unique_ptr runtime = std::unique_ptr(new fd::Runtime()); + + runtime->Init(runtime_option); + if (!runtime->Compile(prewarm_datas, runtime_option)) { std::cerr << "--- Init FastDeploy Runitme Failed! " << "\n--- Model: " << model_file << std::endl; @@ -114,4 +116,4 @@ int main(int argc, char* argv[]) { output_tensors[0].PrintInfo(); return 0; -} \ No newline at end of file +} diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 408c3ced2..a47181dff 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) { warm_datas[i][j].nbytes()); } } - return self.Compile(warm_tensors, _option); + return self.Compile(warm_tensors); }) .def("infer", [](Runtime& self, std::map& data) { diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h index 802db6fa1..7566806e5 100644 --- a/fastdeploy/runtime/backends/backend.h +++ b/fastdeploy/runtime/backends/backend.h @@ -58,7 +58,10 @@ class BaseBackend { virtual bool Initialized() const { return initialized_; } virtual bool Init(const RuntimeOption& option) { - FDERROR << "Not Implement Yet." << std::endl; + FDERROR << "Not Implement for " + << option.backend << " in " + << option.device << "." + << std::endl; return false; } @@ -89,59 +92,59 @@ class BaseBackend { return nullptr; } - benchmark::BenchmarkOption benchmark_option_; - benchmark::BenchmarkResult benchmark_result_; + benchmark::BenchmarkOption benchmark_option_; + benchmark::BenchmarkResult benchmark_result_; }; -/** \brief Macros for Runtime benchmark profiling. - * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' - * indicates that the least number of times the loop +/** \brief Macros for Runtime benchmark profiling. + * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' + * indicates that the least number of times the loop * will repeat when profiling mode is not enabled. - * In most cases, the value should be 1, i.e., results are - * obtained by running the inference process once, when - * the profile mode is turned off, such as ONNX Runtime, - * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, - * RKNPU2, SOPHGO etc. - * + * In most cases, the value should be 1, i.e., results are + * obtained by running the inference process once, when + * the profile mode is turned off, such as ONNX Runtime, + * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, + * RKNPU2, SOPHGO etc. + * * example code @code - * // OpenVINOBackend::Infer + * // OpenVINOBackend::Infer * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN - * // do something .... + * // do something .... * RUNTIME_PROFILE_LOOP_BEGIN(1) - * // The codes which wrapped by 'BEGIN(1) ~ END' scope + * // The codes which wrapped by 'BEGIN(1) ~ END' scope * // will only run once when profiling mode is not enabled. - * request_.infer(); + * request_.infer(); * RUNTIME_PROFILE_LOOP_END - * // do something .... + * // do something .... * RUNTIME_PROFILE_LOOP_H2D_D2H_END - * + * * @endcode In this case, No global variables inside a function - * are wrapped by BEGIN and END, which may be required for + * are wrapped by BEGIN and END, which may be required for * subsequent tasks. But, some times we need to set 'base_loop' * as 0, such as POROS. - * + * * * example code @code * // PorosBackend::Infer * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN - * // do something .... + * // do something .... * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0 - * // The codes which wrapped by 'BEGIN(0) ~ END' scope + * // The codes which wrapped by 'BEGIN(0) ~ END' scope * // will not run when profiling mode is not enabled. - * auto poros_outputs = _poros_module->forward(poros_inputs); + * auto poros_outputs = _poros_module->forward(poros_inputs); * RUNTIME_PROFILE_LOOP_END * // Run another inference beyond the scope of 'BEGIN ~ END' * // to get valid outputs for subsequent tasks. - * auto poros_outputs = _poros_module->forward(poros_inputs); + * auto poros_outputs = _poros_module->forward(poros_inputs); * // do something .... will use 'poros_outputs' ... * if (poros_outputs.isTensor()) { * // ... * } * RUNTIME_PROFILE_LOOP_H2D_D2H_END - * + * * @endcode In this case, 'poros_outputs' inside a function - * are wrapped by BEGIN and END, which may be required for + * are wrapped by BEGIN and END, which may be required for * subsequent tasks. So, we set 'base_loop' as 0 and lanuch - * another infer to get the valid outputs beyond the scope + * another infer to get the valid outputs beyond the scope * of 'BEGIN ~ END' for subsequent tasks. */ diff --git a/fastdeploy/runtime/backends/poros/poros_backend.h b/fastdeploy/runtime/backends/poros/poros_backend.h index 0d01a6884..91268efdb 100755 --- a/fastdeploy/runtime/backends/poros/poros_backend.h +++ b/fastdeploy/runtime/backends/poros/poros_backend.h @@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend { void BuildOption(const PorosBackendOption& option); + bool Init(const RuntimeOption& option) { + if (!(Supported(option.model_format, Backend::POROS) + && Supported(option.device, Backend::POROS))) { + return false; + } + if (option.model_from_memory_) { + FDERROR << "Poros backend doesn't support load model " + << "from memory, please load model from disk." + << std::endl; + return false; + } + return true; + } + bool Compile(const std::string& model_file, std::vector>& prewarm_tensors, const PorosBackendOption& option = PorosBackendOption()); diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc index ceb7c590c..0e6eecf32 100644 --- a/fastdeploy/runtime/runtime.cc +++ b/fastdeploy/runtime/runtime.cc @@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) { return runtime; } -// only for poros backend -bool Runtime::Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option) { +void Runtime::CreatePorosBackend() { +#ifdef ENABLE_POROS_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize Poros backend."); +#else + FDASSERT(false, + "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::POROS in " << option.device + << "." << std::endl; +} + +// only for poros backend +bool Runtime::Compile(std::vector>& prewarm_tensors) { #ifdef ENABLE_POROS_BACKEND - FDASSERT( - option.model_format == ModelFormat::TORCHSCRIPT, - "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); - if (option.device != Device::CPU && option.device != Device::GPU) { - FDERROR << "PorosBackend only supports CPU/GPU, but now its " - << option.device << "." << std::endl; - return false; - } option.poros_option.device = option.device; option.poros_option.device_id = option.device_id; option.poros_option.enable_fp16 = option.trt_option.enable_fp16; option.poros_option.max_batch_size = option.trt_option.max_batch_size; option.poros_option.max_workspace_size = option.trt_option.max_workspace_size; - backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT( casted_backend->Compile(option.model_file, prewarm_tensors, diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h index 4d045684e..e34b520f8 100755 --- a/fastdeploy/runtime/runtime.h +++ b/fastdeploy/runtime/runtime.h @@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime { /** \brief Compile TorchScript Module, only for Poros backend * * \param[in] prewarm_tensors Prewarm datas for compile - * \param[in] _option Runtime option * \return true if compile successed, otherwise false */ - bool Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option); + bool Compile(std::vector>& prewarm_tensors); /** \brief Get profile time of Runtime after the profile process is done. */ double GetProfileTime() { @@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime { void CreateLiteBackend(); void CreateRKNPU2Backend(); void CreateSophgoNPUBackend(); + void CreatePorosBackend(); std::unique_ptr backend_; std::vector input_tensors_; std::vector output_tensors_;