mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[Other] Optimize runtime module (#1195)
* Optimize code * optimize code * fix bug
This commit is contained in:
@@ -55,17 +55,27 @@ class BaseBackend {
|
|||||||
|
|
||||||
virtual bool Initialized() const { return initialized_; }
|
virtual bool Initialized() const { return initialized_; }
|
||||||
|
|
||||||
|
// Get number of inputs of the model
|
||||||
virtual int NumInputs() const = 0;
|
virtual int NumInputs() const = 0;
|
||||||
|
// Get number of outputs of the model
|
||||||
virtual int NumOutputs() const = 0;
|
virtual int NumOutputs() const = 0;
|
||||||
|
// Get information of input tensor
|
||||||
virtual TensorInfo GetInputInfo(int index) = 0;
|
virtual TensorInfo GetInputInfo(int index) = 0;
|
||||||
|
// Get information of output tensor
|
||||||
virtual TensorInfo GetOutputInfo(int index) = 0;
|
virtual TensorInfo GetOutputInfo(int index) = 0;
|
||||||
|
// Get information of all the input tensors
|
||||||
virtual std::vector<TensorInfo> GetInputInfos() = 0;
|
virtual std::vector<TensorInfo> GetInputInfos() = 0;
|
||||||
|
// Get information of all the output tensors
|
||||||
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
|
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
|
||||||
|
|
||||||
// if copy_to_fd is true, copy memory data to FDTensor
|
// if copy_to_fd is true, copy memory data to FDTensor
|
||||||
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
|
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
|
||||||
virtual bool Infer(std::vector<FDTensor>& inputs,
|
virtual bool Infer(std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs,
|
std::vector<FDTensor>* outputs,
|
||||||
bool copy_to_fd = true) = 0;
|
bool copy_to_fd = true) = 0;
|
||||||
|
|
||||||
|
// Optional: For those backends which can share memory
|
||||||
|
// while creating multiple inference engines with same model file
|
||||||
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||||
int device_id = -1) {
|
int device_id = -1) {
|
||||||
FDERROR << "Clone no support" << std::endl;
|
FDERROR << "Clone no support" << std::endl;
|
||||||
|
@@ -78,8 +78,9 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
|||||||
out << "ModelFormat::SOPHGO";
|
out << "ModelFormat::SOPHGO";
|
||||||
} else if (format == ModelFormat::TORCHSCRIPT) {
|
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||||
out << "ModelFormat::TORCHSCRIPT";
|
out << "ModelFormat::TORCHSCRIPT";
|
||||||
}
|
} else {
|
||||||
out << "UNKNOWN-ModelFormat";
|
out << "UNKNOWN-ModelFormat";
|
||||||
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -72,7 +72,8 @@ enum ModelFormat {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// Describle all the supported backends for specified model format
|
/// Describle all the supported backends for specified model format
|
||||||
static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
|
static std::map<ModelFormat, std::vector<Backend>>
|
||||||
|
s_default_backends_by_format = {
|
||||||
{ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
|
{ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
|
||||||
Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
||||||
{ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
{ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
||||||
@@ -81,8 +82,22 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
|
|||||||
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
|
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Describle all the supported backends for specified device
|
||||||
|
static std::map<Device, std::vector<Backend>>
|
||||||
|
s_default_backends_by_device = {
|
||||||
|
{Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT,
|
||||||
|
Backend::OPENVINO, Backend::POROS}},
|
||||||
|
{Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}},
|
||||||
|
{Device::RKNPU, {Backend::RKNPU2}},
|
||||||
|
{Device::IPU, {Backend::PDINFER}},
|
||||||
|
{Device::TIMVX, {Backend::LITE}},
|
||||||
|
{Device::KUNLUNXIN, {Backend::LITE}},
|
||||||
|
{Device::ASCEND, {Backend::LITE}},
|
||||||
|
{Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -51,55 +51,82 @@
|
|||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
bool Runtime::Init(const RuntimeOption& _option) {
|
bool AutoSelectBackend(RuntimeOption& option) {
|
||||||
option = _option;
|
auto iter0 = s_default_backends_by_format.find(option.model_format);
|
||||||
// Choose default backend by model format
|
if (iter0 == s_default_backends_by_format.end()) {
|
||||||
if (option.backend == Backend::UNKNOWN) {
|
|
||||||
auto iter = s_default_backends_cfg.find(option.model_format);
|
|
||||||
if (iter == s_default_backends_cfg.end()) {
|
|
||||||
FDERROR << "Cannot found a default backend for model format: "
|
FDERROR << "Cannot found a default backend for model format: "
|
||||||
<< option.model_format
|
<< option.model_format
|
||||||
<< ", please define the inference backend in RuntimeOption."
|
<< ", please define the inference backend in RuntimeOption."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (const auto& b : iter->second) {
|
|
||||||
|
auto iter1 = s_default_backends_by_device.find(option.device);
|
||||||
|
if (iter1 == s_default_backends_by_device.end()) {
|
||||||
|
FDERROR << "Cannot found a default backend for device: " << option.device
|
||||||
|
<< ", please define the inference backend in RuntimeOption."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Backend> candidates;
|
||||||
|
for (const auto& b0 : iter0->second) {
|
||||||
|
for (const auto& b1 : iter1->second) {
|
||||||
|
if (b0 == b1) {
|
||||||
|
candidates.push_back(b0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (candidates.size() == 0) {
|
||||||
|
FDERROR << "Cannot found availabel inference backends by model format: "
|
||||||
|
<< option.model_format << " with device: " << option.device
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& b : candidates) {
|
||||||
if (IsBackendAvailable(b)) {
|
if (IsBackendAvailable(b)) {
|
||||||
option.backend = b;
|
option.backend = b;
|
||||||
FDINFO << "FastDeploy will choose " << b << " to inference this model."
|
FDINFO << "FastDeploy will choose " << b << " to inference this model."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
std::string debug_message = Str(candidates);
|
||||||
|
FDERROR << "The candiate backends for " << option.model_format << " & "
|
||||||
|
<< option.device << " are " << debug_message
|
||||||
|
<< ", but both of them have not been compiled with current "
|
||||||
|
"FastDeploy yet."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Runtime::Init(const RuntimeOption& _option) {
|
||||||
|
option = _option;
|
||||||
|
|
||||||
|
// Choose default backend by model format and device if backend is not
|
||||||
|
// specified
|
||||||
if (option.backend == Backend::UNKNOWN) {
|
if (option.backend == Backend::UNKNOWN) {
|
||||||
FDERROR << "Cannot found available backends for model format: "
|
if (!AutoSelectBackend(option)) {
|
||||||
<< option.model_format << "." << std::endl;
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (option.backend == Backend::ORT) {
|
if (option.backend == Backend::ORT) {
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
|
||||||
"Backend::ORT only supports Device::CPU/Device::GPU.");
|
|
||||||
CreateOrtBackend();
|
CreateOrtBackend();
|
||||||
FDINFO << "Runtime initialized with Backend::ORT in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else if (option.backend == Backend::TRT) {
|
} else if (option.backend == Backend::TRT) {
|
||||||
FDASSERT(option.device == Device::GPU,
|
|
||||||
"Backend::TRT only supports Device::GPU.");
|
|
||||||
CreateTrtBackend();
|
CreateTrtBackend();
|
||||||
FDINFO << "Runtime initialized with Backend::TRT in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else if (option.backend == Backend::PDINFER) {
|
} else if (option.backend == Backend::PDINFER) {
|
||||||
FDASSERT(
|
|
||||||
option.device == Device::CPU || option.device == Device::GPU ||
|
|
||||||
option.device == Device::IPU,
|
|
||||||
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
|
|
||||||
FDASSERT(
|
|
||||||
option.model_format == ModelFormat::PADDLE,
|
|
||||||
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
|
|
||||||
CreatePaddleBackend();
|
CreatePaddleBackend();
|
||||||
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
|
} else if (option.backend == Backend::OPENVINO) {
|
||||||
<< "." << std::endl;
|
CreateOpenVINOBackend();
|
||||||
|
} else if (option.backend == Backend::LITE) {
|
||||||
|
CreateLiteBackend();
|
||||||
|
} else if (option.backend == Backend::RKNPU2) {
|
||||||
|
CreateRKNPU2Backend();
|
||||||
|
} else if (option.backend == Backend::SOPHGOTPU) {
|
||||||
|
CreateSophgoNPUBackend();
|
||||||
} else if (option.backend == Backend::POROS) {
|
} else if (option.backend == Backend::POROS) {
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
||||||
"Backend::POROS only supports Device::CPU/Device::GPU.");
|
"Backend::POROS only supports Device::CPU/Device::GPU.");
|
||||||
@@ -109,35 +136,6 @@ bool Runtime::Init(const RuntimeOption& _option) {
|
|||||||
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
|
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
|
||||||
<< "." << std::endl;
|
<< "." << std::endl;
|
||||||
return true;
|
return true;
|
||||||
} else if (option.backend == Backend::OPENVINO) {
|
|
||||||
FDASSERT(option.device == Device::CPU,
|
|
||||||
"Backend::OPENVINO only supports Device::CPU");
|
|
||||||
CreateOpenVINOBackend();
|
|
||||||
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else if (option.backend == Backend::LITE) {
|
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
|
|
||||||
option.device == Device::KUNLUNXIN ||
|
|
||||||
option.device == Device::ASCEND,
|
|
||||||
"Backend::LITE only supports "
|
|
||||||
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
|
|
||||||
CreateLiteBackend();
|
|
||||||
FDINFO << "Runtime initialized with Backend::LITE in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else if (option.backend == Backend::RKNPU2) {
|
|
||||||
FDASSERT(option.device == Device::RKNPU,
|
|
||||||
"Backend::RKNPU2 only supports Device::RKNPU2");
|
|
||||||
CreateRKNPU2Backend();
|
|
||||||
|
|
||||||
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else if (option.backend == Backend::SOPHGOTPU) {
|
|
||||||
FDASSERT(option.device == Device::SOPHGOTPUD,
|
|
||||||
"Backend::SOPHGO only supports Device::SOPHGO");
|
|
||||||
CreateSophgoNPUBackend();
|
|
||||||
|
|
||||||
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
} else {
|
} else {
|
||||||
FDERROR << "Runtime only support "
|
FDERROR << "Runtime only support "
|
||||||
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
|
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
|
||||||
@@ -211,6 +209,13 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreatePaddleBackend() {
|
void Runtime::CreatePaddleBackend() {
|
||||||
|
FDASSERT(
|
||||||
|
option.device == Device::CPU || option.device == Device::GPU ||
|
||||||
|
option.device == Device::IPU,
|
||||||
|
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
|
||||||
|
FDASSERT(
|
||||||
|
option.model_format == ModelFormat::PADDLE,
|
||||||
|
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
|
||||||
#ifdef ENABLE_PADDLE_BACKEND
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
auto pd_option = PaddleBackendOption();
|
auto pd_option = PaddleBackendOption();
|
||||||
pd_option.model_file = option.model_file;
|
pd_option.model_file = option.model_file;
|
||||||
@@ -265,8 +270,6 @@ void Runtime::CreatePaddleBackend() {
|
|||||||
pd_option.ipu_option = ipu_option;
|
pd_option.ipu_option = ipu_option;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
|
||||||
"PaddleBackend only support model format of ModelFormat::PADDLE.");
|
|
||||||
backend_ = utils::make_unique<PaddleBackend>();
|
backend_ = utils::make_unique<PaddleBackend>();
|
||||||
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
|
||||||
if (pd_option.model_from_memory_) {
|
if (pd_option.model_from_memory_) {
|
||||||
@@ -283,9 +286,17 @@ void Runtime::CreatePaddleBackend() {
|
|||||||
"PaddleBackend is not available, please compiled with "
|
"PaddleBackend is not available, please compiled with "
|
||||||
"ENABLE_PADDLE_BACKEND=ON.");
|
"ENABLE_PADDLE_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateOpenVINOBackend() {
|
void Runtime::CreateOpenVINOBackend() {
|
||||||
|
FDASSERT(option.device == Device::CPU,
|
||||||
|
"Backend::OPENVINO only supports Device::CPU");
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
#ifdef ENABLE_OPENVINO_BACKEND
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
auto ov_option = OpenVINOBackendOption();
|
auto ov_option = OpenVINOBackendOption();
|
||||||
ov_option.cpu_thread_num = option.cpu_thread_num;
|
ov_option.cpu_thread_num = option.cpu_thread_num;
|
||||||
@@ -295,10 +306,6 @@ void Runtime::CreateOpenVINOBackend() {
|
|||||||
for (const auto& op : option.ov_cpu_operators) {
|
for (const auto& op : option.ov_cpu_operators) {
|
||||||
ov_option.cpu_operators.insert(op);
|
ov_option.cpu_operators.insert(op);
|
||||||
}
|
}
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
|
||||||
option.model_format == ModelFormat::ONNX,
|
|
||||||
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
|
||||||
"ModelFormat::ONNX.");
|
|
||||||
backend_ = utils::make_unique<OpenVINOBackend>();
|
backend_ = utils::make_unique<OpenVINOBackend>();
|
||||||
auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
|
||||||
|
|
||||||
@@ -315,9 +322,17 @@ void Runtime::CreateOpenVINOBackend() {
|
|||||||
"OpenVINOBackend is not available, please compiled with "
|
"OpenVINOBackend is not available, please compiled with "
|
||||||
"ENABLE_OPENVINO_BACKEND=ON.");
|
"ENABLE_OPENVINO_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateOrtBackend() {
|
void Runtime::CreateOrtBackend() {
|
||||||
|
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
||||||
|
"Backend::ORT only supports Device::CPU/Device::GPU.");
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
#ifdef ENABLE_ORT_BACKEND
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
auto ort_option = OrtBackendOption();
|
auto ort_option = OrtBackendOption();
|
||||||
ort_option.graph_optimization_level = option.ort_graph_opt_level;
|
ort_option.graph_optimization_level = option.ort_graph_opt_level;
|
||||||
@@ -328,10 +343,6 @@ void Runtime::CreateOrtBackend() {
|
|||||||
ort_option.gpu_id = option.device_id;
|
ort_option.gpu_id = option.device_id;
|
||||||
ort_option.external_stream_ = option.external_stream_;
|
ort_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
|
||||||
option.model_format == ModelFormat::ONNX,
|
|
||||||
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
|
||||||
"ModelFormat::ONNX.");
|
|
||||||
backend_ = utils::make_unique<OrtBackend>();
|
backend_ = utils::make_unique<OrtBackend>();
|
||||||
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
|
||||||
if (option.model_format == ModelFormat::ONNX) {
|
if (option.model_format == ModelFormat::ONNX) {
|
||||||
@@ -347,9 +358,17 @@ void Runtime::CreateOrtBackend() {
|
|||||||
"OrtBackend is not available, please compiled with "
|
"OrtBackend is not available, please compiled with "
|
||||||
"ENABLE_ORT_BACKEND=ON.");
|
"ENABLE_ORT_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateTrtBackend() {
|
void Runtime::CreateTrtBackend() {
|
||||||
|
FDASSERT(option.device == Device::GPU,
|
||||||
|
"Backend::TRT only supports Device::GPU.");
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
auto trt_option = TrtBackendOption();
|
auto trt_option = TrtBackendOption();
|
||||||
trt_option.model_file = option.model_file;
|
trt_option.model_file = option.model_file;
|
||||||
@@ -367,10 +386,6 @@ void Runtime::CreateTrtBackend() {
|
|||||||
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
trt_option.external_stream_ = option.external_stream_;
|
trt_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
|
||||||
option.model_format == ModelFormat::ONNX,
|
|
||||||
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
|
||||||
"ModelFormat::ONNX.");
|
|
||||||
backend_ = utils::make_unique<TrtBackend>();
|
backend_ = utils::make_unique<TrtBackend>();
|
||||||
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
|
||||||
if (option.model_format == ModelFormat::ONNX) {
|
if (option.model_format == ModelFormat::ONNX) {
|
||||||
@@ -386,12 +401,19 @@ void Runtime::CreateTrtBackend() {
|
|||||||
"TrtBackend is not available, please compiled with "
|
"TrtBackend is not available, please compiled with "
|
||||||
"ENABLE_TRT_BACKEND=ON.");
|
"ENABLE_TRT_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateLiteBackend() {
|
void Runtime::CreateLiteBackend() {
|
||||||
#ifdef ENABLE_LITE_BACKEND
|
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
|
||||||
|
option.device == Device::KUNLUNXIN ||
|
||||||
|
option.device == Device::ASCEND,
|
||||||
|
"Backend::LITE only supports "
|
||||||
|
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
||||||
"LiteBackend only support model format of ModelFormat::PADDLE");
|
"LiteBackend only support model format of ModelFormat::PADDLE");
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
backend_ = utils::make_unique<LiteBackend>();
|
backend_ = utils::make_unique<LiteBackend>();
|
||||||
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
|
||||||
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
||||||
@@ -402,15 +424,19 @@ void Runtime::CreateLiteBackend() {
|
|||||||
"LiteBackend is not available, please compiled with "
|
"LiteBackend is not available, please compiled with "
|
||||||
"ENABLE_LITE_BACKEND=ON.");
|
"ENABLE_LITE_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "."
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateRKNPU2Backend() {
|
void Runtime::CreateRKNPU2Backend() {
|
||||||
|
FDASSERT(option.device == Device::RKNPU,
|
||||||
|
"Backend::RKNPU2 only supports Device::RKNPU2");
|
||||||
|
FDASSERT(option.model_format == ModelFormat::RKNN,
|
||||||
|
"RKNPU2Backend only support model format of ModelFormat::RKNN");
|
||||||
#ifdef ENABLE_RKNPU2_BACKEND
|
#ifdef ENABLE_RKNPU2_BACKEND
|
||||||
auto rknpu2_option = RKNPU2BackendOption();
|
auto rknpu2_option = RKNPU2BackendOption();
|
||||||
rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
|
rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
|
||||||
rknpu2_option.core_mask = option.rknpu2_core_mask_;
|
rknpu2_option.core_mask = option.rknpu2_core_mask_;
|
||||||
FDASSERT(option.model_format == ModelFormat::RKNN,
|
|
||||||
"RKNPU2Backend only support model format of ModelFormat::RKNN");
|
|
||||||
backend_ = utils::make_unique<RKNPU2Backend>();
|
backend_ = utils::make_unique<RKNPU2Backend>();
|
||||||
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
|
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
|
||||||
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
|
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
|
||||||
@@ -420,13 +446,17 @@ void Runtime::CreateRKNPU2Backend() {
|
|||||||
"RKNPU2Backend is not available, please compiled with "
|
"RKNPU2Backend is not available, please compiled with "
|
||||||
"ENABLE_RKNPU2_BACKEND=ON.");
|
"ENABLE_RKNPU2_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreateSophgoNPUBackend() {
|
void Runtime::CreateSophgoNPUBackend() {
|
||||||
#ifdef ENABLE_SOPHGO_BACKEND
|
FDASSERT(option.device == Device::SOPHGOTPUD,
|
||||||
auto sophgo_option = SophgoBackendOption();
|
"Backend::SOPHGO only supports Device::SOPHGO");
|
||||||
FDASSERT(option.model_format == ModelFormat::SOPHGO,
|
FDASSERT(option.model_format == ModelFormat::SOPHGO,
|
||||||
"SophgoBackend only support model format of ModelFormat::SOPHGO");
|
"SophgoBackend only support model format of ModelFormat::SOPHGO");
|
||||||
|
#ifdef ENABLE_SOPHGO_BACKEND
|
||||||
|
auto sophgo_option = SophgoBackendOption();
|
||||||
backend_ = utils::make_unique<SophgoBackend>();
|
backend_ = utils::make_unique<SophgoBackend>();
|
||||||
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
|
||||||
FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
|
FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
|
||||||
@@ -436,6 +466,8 @@ void Runtime::CreateSophgoNPUBackend() {
|
|||||||
"SophgoBackend is not available, please compiled with "
|
"SophgoBackend is not available, please compiled with "
|
||||||
"ENABLE_SOPHGO_BACKEND=ON.");
|
"ENABLE_SOPHGO_BACKEND=ON.");
|
||||||
#endif
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
Runtime* Runtime::Clone(void* stream, int device_id) {
|
Runtime* Runtime::Clone(void* stream, int device_id) {
|
||||||
@@ -458,4 +490,36 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
|
|||||||
return runtime;
|
return runtime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only for poros backend
|
||||||
|
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
|
const RuntimeOption& _option) {
|
||||||
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
|
option = _option;
|
||||||
|
auto poros_option = PorosBackendOption();
|
||||||
|
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||||
|
poros_option.gpu_id = option.device_id;
|
||||||
|
poros_option.long_to_int = option.long_to_int;
|
||||||
|
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
|
||||||
|
poros_option.unconst_ops_thres = option.unconst_ops_thres;
|
||||||
|
poros_option.poros_file = option.poros_file;
|
||||||
|
poros_option.is_dynamic = option.is_dynamic;
|
||||||
|
poros_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
|
poros_option.max_batch_size = option.trt_max_batch_size;
|
||||||
|
poros_option.max_workspace_size = option.trt_max_workspace_size;
|
||||||
|
FDASSERT(
|
||||||
|
option.model_format == ModelFormat::TORCHSCRIPT,
|
||||||
|
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
||||||
|
backend_ = utils::make_unique<PorosBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
||||||
|
FDASSERT(
|
||||||
|
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
|
||||||
|
"Load model from Torchscript failed while initliazing PorosBackend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"PorosBackend is not available, please compiled with "
|
||||||
|
"ENABLE_POROS_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -51,15 +51,6 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
*/
|
*/
|
||||||
bool Infer();
|
bool Infer();
|
||||||
|
|
||||||
/** \brief Compile TorchScript Module, only for Poros backend
|
|
||||||
*
|
|
||||||
* \param[in] prewarm_tensors Prewarm datas for compile
|
|
||||||
* \param[in] _option Runtime option
|
|
||||||
* \return true if compile successed, otherwise false
|
|
||||||
*/
|
|
||||||
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
|
||||||
const RuntimeOption& _option);
|
|
||||||
|
|
||||||
/** \brief Get number of inputs
|
/** \brief Get number of inputs
|
||||||
*/
|
*/
|
||||||
int NumInputs() { return backend_->NumInputs(); }
|
int NumInputs() { return backend_->NumInputs(); }
|
||||||
@@ -94,6 +85,15 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
|
|
||||||
RuntimeOption option;
|
RuntimeOption option;
|
||||||
|
|
||||||
|
/** \brief Compile TorchScript Module, only for Poros backend
|
||||||
|
*
|
||||||
|
* \param[in] prewarm_tensors Prewarm datas for compile
|
||||||
|
* \param[in] _option Runtime option
|
||||||
|
* \return true if compile successed, otherwise false
|
||||||
|
*/
|
||||||
|
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
|
const RuntimeOption& _option);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void CreateOrtBackend();
|
void CreateOrtBackend();
|
||||||
void CreatePaddleBackend();
|
void CreatePaddleBackend();
|
||||||
|
@@ -337,37 +337,6 @@ void RuntimeOption::SetOpenVINOStreams(int num_streams) {
|
|||||||
ov_num_streams = num_streams;
|
ov_num_streams = num_streams;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
|
||||||
const RuntimeOption& _option) {
|
|
||||||
#ifdef ENABLE_POROS_BACKEND
|
|
||||||
option = _option;
|
|
||||||
auto poros_option = PorosBackendOption();
|
|
||||||
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
|
||||||
poros_option.gpu_id = option.device_id;
|
|
||||||
poros_option.long_to_int = option.long_to_int;
|
|
||||||
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
|
|
||||||
poros_option.unconst_ops_thres = option.unconst_ops_thres;
|
|
||||||
poros_option.poros_file = option.poros_file;
|
|
||||||
poros_option.is_dynamic = option.is_dynamic;
|
|
||||||
poros_option.enable_fp16 = option.trt_enable_fp16;
|
|
||||||
poros_option.max_batch_size = option.trt_max_batch_size;
|
|
||||||
poros_option.max_workspace_size = option.trt_max_workspace_size;
|
|
||||||
FDASSERT(
|
|
||||||
option.model_format == ModelFormat::TORCHSCRIPT,
|
|
||||||
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
|
||||||
backend_ = utils::make_unique<PorosBackend>();
|
|
||||||
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
|
||||||
FDASSERT(
|
|
||||||
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
|
|
||||||
"Load model from Torchscript failed while initliazing PorosBackend.");
|
|
||||||
#else
|
|
||||||
FDASSERT(false,
|
|
||||||
"PorosBackend is not available, please compiled with "
|
|
||||||
"ENABLE_POROS_BACKEND=ON.");
|
|
||||||
#endif
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
|
void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
|
||||||
|
|
||||||
void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
|
void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
|
||||||
|
@@ -203,8 +203,7 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
|
|||||||
FASTDEPLOY_DECL std::vector<int64_t>
|
FASTDEPLOY_DECL std::vector<int64_t>
|
||||||
GetStride(const std::vector<int64_t>& dims);
|
GetStride(const std::vector<int64_t>& dims);
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_integral<T>::value,
|
template <typename T>
|
||||||
bool>::type = true>
|
|
||||||
std::string Str(const std::vector<T>& shape) {
|
std::string Str(const std::vector<T>& shape) {
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "[ " << shape[0];
|
oss << "[ " << shape[0];
|
||||||
|
Reference in New Issue
Block a user