[Other] Optimize runtime module (#1195)

* Optimize code

* optimize code

* fix bug
This commit is contained in:
Jason
2023-01-31 10:11:16 +08:00
committed by GitHub
parent c232ceca77
commit 54ceb93670
7 changed files with 188 additions and 130 deletions

View File

@@ -55,17 +55,27 @@ class BaseBackend {
virtual bool Initialized() const { return initialized_; } virtual bool Initialized() const { return initialized_; }
// Get number of inputs of the model
virtual int NumInputs() const = 0; virtual int NumInputs() const = 0;
// Get number of outputs of the model
virtual int NumOutputs() const = 0; virtual int NumOutputs() const = 0;
// Get information of input tensor
virtual TensorInfo GetInputInfo(int index) = 0; virtual TensorInfo GetInputInfo(int index) = 0;
// Get information of output tensor
virtual TensorInfo GetOutputInfo(int index) = 0; virtual TensorInfo GetOutputInfo(int index) = 0;
// Get information of all the input tensors
virtual std::vector<TensorInfo> GetInputInfos() = 0; virtual std::vector<TensorInfo> GetInputInfos() = 0;
// Get information of all the output tensors
virtual std::vector<TensorInfo> GetOutputInfos() = 0; virtual std::vector<TensorInfo> GetOutputInfos() = 0;
// if copy_to_fd is true, copy memory data to FDTensor // if copy_to_fd is true, copy memory data to FDTensor
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it) // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
virtual bool Infer(std::vector<FDTensor>& inputs, virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) = 0; bool copy_to_fd = true) = 0;
// Optional: For those backends which can share memory
// while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr, virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) { int device_id = -1) {
FDERROR << "Clone no support" << std::endl; FDERROR << "Clone no support" << std::endl;

View File

@@ -78,8 +78,9 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "ModelFormat::SOPHGO"; out << "ModelFormat::SOPHGO";
} else if (format == ModelFormat::TORCHSCRIPT) { } else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT"; out << "ModelFormat::TORCHSCRIPT";
} else {
out << "UNKNOWN-ModelFormat";
} }
out << "UNKNOWN-ModelFormat";
return out; return out;
} }

View File

@@ -72,7 +72,8 @@ enum ModelFormat {
}; };
/// Describle all the supported backends for specified model format /// Describle all the supported backends for specified model format
static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = { static std::map<ModelFormat, std::vector<Backend>>
s_default_backends_by_format = {
{ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE, {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
Backend::ORT, Backend::OPENVINO, Backend::TRT}}, Backend::ORT, Backend::OPENVINO, Backend::TRT}},
{ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}}, {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
@@ -81,8 +82,22 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}} {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
}; };
/// Describle all the supported backends for specified device
static std::map<Device, std::vector<Backend>>
s_default_backends_by_device = {
{Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT,
Backend::OPENVINO, Backend::POROS}},
{Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}},
{Device::RKNPU, {Backend::RKNPU2}},
{Device::IPU, {Backend::PDINFER}},
{Device::TIMVX, {Backend::LITE}},
{Device::KUNLUNXIN, {Backend::LITE}},
{Device::ASCEND, {Backend::LITE}},
{Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}
};
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -51,55 +51,82 @@
namespace fastdeploy { namespace fastdeploy {
bool Runtime::Init(const RuntimeOption& _option) { bool AutoSelectBackend(RuntimeOption& option) {
option = _option; auto iter0 = s_default_backends_by_format.find(option.model_format);
// Choose default backend by model format if (iter0 == s_default_backends_by_format.end()) {
if (option.backend == Backend::UNKNOWN) { FDERROR << "Cannot found a default backend for model format: "
auto iter = s_default_backends_cfg.find(option.model_format); << option.model_format
if (iter == s_default_backends_cfg.end()) { << ", please define the inference backend in RuntimeOption."
FDERROR << "Cannot found a default backend for model format: " << std::endl;
<< option.model_format return false;
<< ", please define the inference backend in RuntimeOption." }
<< std::endl;
return false; auto iter1 = s_default_backends_by_device.find(option.device);
} if (iter1 == s_default_backends_by_device.end()) {
for (const auto& b : iter->second) { FDERROR << "Cannot found a default backend for device: " << option.device
if (IsBackendAvailable(b)) { << ", please define the inference backend in RuntimeOption."
option.backend = b; << std::endl;
FDINFO << "FastDeploy will choose " << b << " to inference this model." return false;
<< std::endl; }
std::vector<Backend> candidates;
for (const auto& b0 : iter0->second) {
for (const auto& b1 : iter1->second) {
if (b0 == b1) {
candidates.push_back(b0);
} }
} }
if (option.backend == Backend::UNKNOWN) { }
FDERROR << "Cannot found available backends for model format: "
<< option.model_format << "." << std::endl; if (candidates.size() == 0) {
FDERROR << "Cannot found availabel inference backends by model format: "
<< option.model_format << " with device: " << option.device
<< std::endl;
return false;
}
for (const auto& b : candidates) {
if (IsBackendAvailable(b)) {
option.backend = b;
FDINFO << "FastDeploy will choose " << b << " to inference this model."
<< std::endl;
return true;
}
}
std::string debug_message = Str(candidates);
FDERROR << "The candiate backends for " << option.model_format << " & "
<< option.device << " are " << debug_message
<< ", but both of them have not been compiled with current "
"FastDeploy yet."
<< std::endl;
return false;
}
bool Runtime::Init(const RuntimeOption& _option) {
option = _option;
// Choose default backend by model format and device if backend is not
// specified
if (option.backend == Backend::UNKNOWN) {
if (!AutoSelectBackend(option)) {
return false; return false;
} }
} }
if (option.backend == Backend::ORT) { if (option.backend == Backend::ORT) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::ORT only supports Device::CPU/Device::GPU.");
CreateOrtBackend(); CreateOrtBackend();
FDINFO << "Runtime initialized with Backend::ORT in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::TRT) { } else if (option.backend == Backend::TRT) {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
CreateTrtBackend(); CreateTrtBackend();
FDINFO << "Runtime initialized with Backend::TRT in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::PDINFER) { } else if (option.backend == Backend::PDINFER) {
FDASSERT(
option.device == Device::CPU || option.device == Device::GPU ||
option.device == Device::IPU,
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
FDASSERT(
option.model_format == ModelFormat::PADDLE,
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
CreatePaddleBackend(); CreatePaddleBackend();
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device } else if (option.backend == Backend::OPENVINO) {
<< "." << std::endl; CreateOpenVINOBackend();
} else if (option.backend == Backend::LITE) {
CreateLiteBackend();
} else if (option.backend == Backend::RKNPU2) {
CreateRKNPU2Backend();
} else if (option.backend == Backend::SOPHGOTPU) {
CreateSophgoNPUBackend();
} else if (option.backend == Backend::POROS) { } else if (option.backend == Backend::POROS) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU, FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::POROS only supports Device::CPU/Device::GPU."); "Backend::POROS only supports Device::CPU/Device::GPU.");
@@ -109,35 +136,6 @@ bool Runtime::Init(const RuntimeOption& _option) {
FDINFO << "Runtime initialized with Backend::POROS in " << option.device FDINFO << "Runtime initialized with Backend::POROS in " << option.device
<< "." << std::endl; << "." << std::endl;
return true; return true;
} else if (option.backend == Backend::OPENVINO) {
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
CreateOpenVINOBackend();
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::LITE) {
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND,
"Backend::LITE only supports "
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
CreateLiteBackend();
FDINFO << "Runtime initialized with Backend::LITE in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::RKNPU2) {
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
CreateRKNPU2Backend();
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::SOPHGOTPU) {
FDASSERT(option.device == Device::SOPHGOTPUD,
"Backend::SOPHGO only supports Device::SOPHGO");
CreateSophgoNPUBackend();
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
<< "." << std::endl;
} else { } else {
FDERROR << "Runtime only support " FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -211,6 +209,13 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
} }
void Runtime::CreatePaddleBackend() { void Runtime::CreatePaddleBackend() {
FDASSERT(
option.device == Device::CPU || option.device == Device::GPU ||
option.device == Device::IPU,
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
FDASSERT(
option.model_format == ModelFormat::PADDLE,
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
#ifdef ENABLE_PADDLE_BACKEND #ifdef ENABLE_PADDLE_BACKEND
auto pd_option = PaddleBackendOption(); auto pd_option = PaddleBackendOption();
pd_option.model_file = option.model_file; pd_option.model_file = option.model_file;
@@ -265,8 +270,6 @@ void Runtime::CreatePaddleBackend() {
pd_option.ipu_option = ipu_option; pd_option.ipu_option = ipu_option;
} }
#endif #endif
FDASSERT(option.model_format == ModelFormat::PADDLE,
"PaddleBackend only support model format of ModelFormat::PADDLE.");
backend_ = utils::make_unique<PaddleBackend>(); backend_ = utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get()); auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
if (pd_option.model_from_memory_) { if (pd_option.model_from_memory_) {
@@ -283,9 +286,17 @@ void Runtime::CreatePaddleBackend() {
"PaddleBackend is not available, please compiled with " "PaddleBackend is not available, please compiled with "
"ENABLE_PADDLE_BACKEND=ON."); "ENABLE_PADDLE_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
<< "." << std::endl;
} }
void Runtime::CreateOpenVINOBackend() { void Runtime::CreateOpenVINOBackend() {
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_OPENVINO_BACKEND #ifdef ENABLE_OPENVINO_BACKEND
auto ov_option = OpenVINOBackendOption(); auto ov_option = OpenVINOBackendOption();
ov_option.cpu_thread_num = option.cpu_thread_num; ov_option.cpu_thread_num = option.cpu_thread_num;
@@ -295,10 +306,6 @@ void Runtime::CreateOpenVINOBackend() {
for (const auto& op : option.ov_cpu_operators) { for (const auto& op : option.ov_cpu_operators) {
ov_option.cpu_operators.insert(op); ov_option.cpu_operators.insert(op);
} }
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<OpenVINOBackend>(); backend_ = utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get()); auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
@@ -315,9 +322,17 @@ void Runtime::CreateOpenVINOBackend() {
"OpenVINOBackend is not available, please compiled with " "OpenVINOBackend is not available, please compiled with "
"ENABLE_OPENVINO_BACKEND=ON."); "ENABLE_OPENVINO_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
<< "." << std::endl;
} }
void Runtime::CreateOrtBackend() { void Runtime::CreateOrtBackend() {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::ORT only supports Device::CPU/Device::GPU.");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_ORT_BACKEND #ifdef ENABLE_ORT_BACKEND
auto ort_option = OrtBackendOption(); auto ort_option = OrtBackendOption();
ort_option.graph_optimization_level = option.ort_graph_opt_level; ort_option.graph_optimization_level = option.ort_graph_opt_level;
@@ -328,10 +343,6 @@ void Runtime::CreateOrtBackend() {
ort_option.gpu_id = option.device_id; ort_option.gpu_id = option.device_id;
ort_option.external_stream_ = option.external_stream_; ort_option.external_stream_ = option.external_stream_;
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<OrtBackend>(); backend_ = utils::make_unique<OrtBackend>();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get()); auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) { if (option.model_format == ModelFormat::ONNX) {
@@ -347,9 +358,17 @@ void Runtime::CreateOrtBackend() {
"OrtBackend is not available, please compiled with " "OrtBackend is not available, please compiled with "
"ENABLE_ORT_BACKEND=ON."); "ENABLE_ORT_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
<< std::endl;
} }
void Runtime::CreateTrtBackend() { void Runtime::CreateTrtBackend() {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"TrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_TRT_BACKEND #ifdef ENABLE_TRT_BACKEND
auto trt_option = TrtBackendOption(); auto trt_option = TrtBackendOption();
trt_option.model_file = option.model_file; trt_option.model_file = option.model_file;
@@ -367,10 +386,6 @@ void Runtime::CreateTrtBackend() {
trt_option.enable_pinned_memory = option.enable_pinned_memory; trt_option.enable_pinned_memory = option.enable_pinned_memory;
trt_option.external_stream_ = option.external_stream_; trt_option.external_stream_ = option.external_stream_;
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"TrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<TrtBackend>(); backend_ = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get()); auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) { if (option.model_format == ModelFormat::ONNX) {
@@ -386,12 +401,19 @@ void Runtime::CreateTrtBackend() {
"TrtBackend is not available, please compiled with " "TrtBackend is not available, please compiled with "
"ENABLE_TRT_BACKEND=ON."); "ENABLE_TRT_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
<< std::endl;
} }
void Runtime::CreateLiteBackend() { void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND,
"Backend::LITE only supports "
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
FDASSERT(option.model_format == ModelFormat::PADDLE, FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE"); "LiteBackend only support model format of ModelFormat::PADDLE");
#ifdef ENABLE_LITE_BACKEND
backend_ = utils::make_unique<LiteBackend>(); backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get()); auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
@@ -402,15 +424,19 @@ void Runtime::CreateLiteBackend() {
"LiteBackend is not available, please compiled with " "LiteBackend is not available, please compiled with "
"ENABLE_LITE_BACKEND=ON."); "ENABLE_LITE_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "."
<< std::endl;
} }
void Runtime::CreateRKNPU2Backend() { void Runtime::CreateRKNPU2Backend() {
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
FDASSERT(option.model_format == ModelFormat::RKNN,
"RKNPU2Backend only support model format of ModelFormat::RKNN");
#ifdef ENABLE_RKNPU2_BACKEND #ifdef ENABLE_RKNPU2_BACKEND
auto rknpu2_option = RKNPU2BackendOption(); auto rknpu2_option = RKNPU2BackendOption();
rknpu2_option.cpu_name = option.rknpu2_cpu_name_; rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
rknpu2_option.core_mask = option.rknpu2_core_mask_; rknpu2_option.core_mask = option.rknpu2_core_mask_;
FDASSERT(option.model_format == ModelFormat::RKNN,
"RKNPU2Backend only support model format of ModelFormat::RKNN");
backend_ = utils::make_unique<RKNPU2Backend>(); backend_ = utils::make_unique<RKNPU2Backend>();
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get()); auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option), FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
@@ -420,13 +446,17 @@ void Runtime::CreateRKNPU2Backend() {
"RKNPU2Backend is not available, please compiled with " "RKNPU2Backend is not available, please compiled with "
"ENABLE_RKNPU2_BACKEND=ON."); "ENABLE_RKNPU2_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
<< "." << std::endl;
} }
void Runtime::CreateSophgoNPUBackend() { void Runtime::CreateSophgoNPUBackend() {
#ifdef ENABLE_SOPHGO_BACKEND FDASSERT(option.device == Device::SOPHGOTPUD,
auto sophgo_option = SophgoBackendOption(); "Backend::SOPHGO only supports Device::SOPHGO");
FDASSERT(option.model_format == ModelFormat::SOPHGO, FDASSERT(option.model_format == ModelFormat::SOPHGO,
"SophgoBackend only support model format of ModelFormat::SOPHGO"); "SophgoBackend only support model format of ModelFormat::SOPHGO");
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
backend_ = utils::make_unique<SophgoBackend>(); backend_ = utils::make_unique<SophgoBackend>();
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get()); auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option), FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
@@ -436,6 +466,8 @@ void Runtime::CreateSophgoNPUBackend() {
"SophgoBackend is not available, please compiled with " "SophgoBackend is not available, please compiled with "
"ENABLE_SOPHGO_BACKEND=ON."); "ENABLE_SOPHGO_BACKEND=ON.");
#endif #endif
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
<< "." << std::endl;
} }
Runtime* Runtime::Clone(void* stream, int device_id) { Runtime* Runtime::Clone(void* stream, int device_id) {
@@ -458,4 +490,36 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
return runtime; return runtime;
} }
// only for poros backend
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
"Load model from Torchscript failed while initliazing PorosBackend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
}
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -51,15 +51,6 @@ struct FASTDEPLOY_DECL Runtime {
*/ */
bool Infer(); bool Infer();
/** \brief Compile TorchScript Module, only for Poros backend
*
* \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false
*/
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option);
/** \brief Get number of inputs /** \brief Get number of inputs
*/ */
int NumInputs() { return backend_->NumInputs(); } int NumInputs() { return backend_->NumInputs(); }
@@ -94,6 +85,15 @@ struct FASTDEPLOY_DECL Runtime {
RuntimeOption option; RuntimeOption option;
/** \brief Compile TorchScript Module, only for Poros backend
*
* \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false
*/
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option);
private: private:
void CreateOrtBackend(); void CreateOrtBackend();
void CreatePaddleBackend(); void CreatePaddleBackend();

View File

@@ -337,37 +337,6 @@ void RuntimeOption::SetOpenVINOStreams(int num_streams) {
ov_num_streams = num_streams; ov_num_streams = num_streams;
} }
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
"Load model from Torchscript failed while initliazing PorosBackend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
}
void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; } void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; } void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }

View File

@@ -203,8 +203,7 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
FASTDEPLOY_DECL std::vector<int64_t> FASTDEPLOY_DECL std::vector<int64_t>
GetStride(const std::vector<int64_t>& dims); GetStride(const std::vector<int64_t>& dims);
template <typename T, typename std::enable_if<std::is_integral<T>::value, template <typename T>
bool>::type = true>
std::string Str(const std::vector<T>& shape) { std::string Str(const std::vector<T>& shape) {
std::ostringstream oss; std::ostringstream oss;
oss << "[ " << shape[0]; oss << "[ " << shape[0];