[Other] Optimize runtime module (#1195)

* Optimize code

* optimize code

* fix bug
This commit is contained in:
Jason
2023-01-31 10:11:16 +08:00
committed by GitHub
parent c232ceca77
commit 54ceb93670
7 changed files with 188 additions and 130 deletions

View File

@@ -55,17 +55,27 @@ class BaseBackend {
virtual bool Initialized() const { return initialized_; }
// Get number of inputs of the model
virtual int NumInputs() const = 0;
// Get number of outputs of the model
virtual int NumOutputs() const = 0;
// Get information of input tensor
virtual TensorInfo GetInputInfo(int index) = 0;
// Get information of output tensor
virtual TensorInfo GetOutputInfo(int index) = 0;
// Get information of all the input tensors
virtual std::vector<TensorInfo> GetInputInfos() = 0;
// Get information of all the output tensors
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
// if copy_to_fd is true, copy memory data to FDTensor
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) = 0;
// Optional: For those backends which can share memory
// while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) {
FDERROR << "Clone no support" << std::endl;

View File

@@ -78,8 +78,9 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "ModelFormat::SOPHGO";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
}
} else {
out << "UNKNOWN-ModelFormat";
}
return out;
}

View File

@@ -72,7 +72,8 @@ enum ModelFormat {
};
/// Describle all the supported backends for specified model format
static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
static std::map<ModelFormat, std::vector<Backend>>
s_default_backends_by_format = {
{ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
Backend::ORT, Backend::OPENVINO, Backend::TRT}},
{ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
@@ -81,8 +82,22 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
};
/// Describle all the supported backends for specified device
static std::map<Device, std::vector<Backend>>
s_default_backends_by_device = {
{Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT,
Backend::OPENVINO, Backend::POROS}},
{Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}},
{Device::RKNPU, {Backend::RKNPU2}},
{Device::IPU, {Backend::PDINFER}},
{Device::TIMVX, {Backend::LITE}},
{Device::KUNLUNXIN, {Backend::LITE}},
{Device::ASCEND, {Backend::LITE}},
{Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}
};
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
} // namespace fastdeploy

View File

@@ -51,55 +51,82 @@
namespace fastdeploy {
bool Runtime::Init(const RuntimeOption& _option) {
option = _option;
// Choose default backend by model format
if (option.backend == Backend::UNKNOWN) {
auto iter = s_default_backends_cfg.find(option.model_format);
if (iter == s_default_backends_cfg.end()) {
bool AutoSelectBackend(RuntimeOption& option) {
auto iter0 = s_default_backends_by_format.find(option.model_format);
if (iter0 == s_default_backends_by_format.end()) {
FDERROR << "Cannot found a default backend for model format: "
<< option.model_format
<< ", please define the inference backend in RuntimeOption."
<< std::endl;
return false;
}
for (const auto& b : iter->second) {
auto iter1 = s_default_backends_by_device.find(option.device);
if (iter1 == s_default_backends_by_device.end()) {
FDERROR << "Cannot found a default backend for device: " << option.device
<< ", please define the inference backend in RuntimeOption."
<< std::endl;
return false;
}
std::vector<Backend> candidates;
for (const auto& b0 : iter0->second) {
for (const auto& b1 : iter1->second) {
if (b0 == b1) {
candidates.push_back(b0);
}
}
}
if (candidates.size() == 0) {
FDERROR << "Cannot found availabel inference backends by model format: "
<< option.model_format << " with device: " << option.device
<< std::endl;
return false;
}
for (const auto& b : candidates) {
if (IsBackendAvailable(b)) {
option.backend = b;
FDINFO << "FastDeploy will choose " << b << " to inference this model."
<< std::endl;
return true;
}
}
std::string debug_message = Str(candidates);
FDERROR << "The candiate backends for " << option.model_format << " & "
<< option.device << " are " << debug_message
<< ", but both of them have not been compiled with current "
"FastDeploy yet."
<< std::endl;
return false;
}
bool Runtime::Init(const RuntimeOption& _option) {
option = _option;
// Choose default backend by model format and device if backend is not
// specified
if (option.backend == Backend::UNKNOWN) {
FDERROR << "Cannot found available backends for model format: "
<< option.model_format << "." << std::endl;
if (!AutoSelectBackend(option)) {
return false;
}
}
if (option.backend == Backend::ORT) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::ORT only supports Device::CPU/Device::GPU.");
CreateOrtBackend();
FDINFO << "Runtime initialized with Backend::ORT in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::TRT) {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
CreateTrtBackend();
FDINFO << "Runtime initialized with Backend::TRT in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::PDINFER) {
FDASSERT(
option.device == Device::CPU || option.device == Device::GPU ||
option.device == Device::IPU,
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
FDASSERT(
option.model_format == ModelFormat::PADDLE,
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
CreatePaddleBackend();
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::OPENVINO) {
CreateOpenVINOBackend();
} else if (option.backend == Backend::LITE) {
CreateLiteBackend();
} else if (option.backend == Backend::RKNPU2) {
CreateRKNPU2Backend();
} else if (option.backend == Backend::SOPHGOTPU) {
CreateSophgoNPUBackend();
} else if (option.backend == Backend::POROS) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::POROS only supports Device::CPU/Device::GPU.");
@@ -109,35 +136,6 @@ bool Runtime::Init(const RuntimeOption& _option) {
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
<< "." << std::endl;
return true;
} else if (option.backend == Backend::OPENVINO) {
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
CreateOpenVINOBackend();
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::LITE) {
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND,
"Backend::LITE only supports "
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
CreateLiteBackend();
FDINFO << "Runtime initialized with Backend::LITE in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::RKNPU2) {
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
CreateRKNPU2Backend();
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
<< "." << std::endl;
} else if (option.backend == Backend::SOPHGOTPU) {
FDASSERT(option.device == Device::SOPHGOTPUD,
"Backend::SOPHGO only supports Device::SOPHGO");
CreateSophgoNPUBackend();
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
<< "." << std::endl;
} else {
FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -211,6 +209,13 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
}
void Runtime::CreatePaddleBackend() {
FDASSERT(
option.device == Device::CPU || option.device == Device::GPU ||
option.device == Device::IPU,
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
FDASSERT(
option.model_format == ModelFormat::PADDLE,
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
#ifdef ENABLE_PADDLE_BACKEND
auto pd_option = PaddleBackendOption();
pd_option.model_file = option.model_file;
@@ -265,8 +270,6 @@ void Runtime::CreatePaddleBackend() {
pd_option.ipu_option = ipu_option;
}
#endif
FDASSERT(option.model_format == ModelFormat::PADDLE,
"PaddleBackend only support model format of ModelFormat::PADDLE.");
backend_ = utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
if (pd_option.model_from_memory_) {
@@ -283,9 +286,17 @@ void Runtime::CreatePaddleBackend() {
"PaddleBackend is not available, please compiled with "
"ENABLE_PADDLE_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
<< "." << std::endl;
}
void Runtime::CreateOpenVINOBackend() {
FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_OPENVINO_BACKEND
auto ov_option = OpenVINOBackendOption();
ov_option.cpu_thread_num = option.cpu_thread_num;
@@ -295,10 +306,6 @@ void Runtime::CreateOpenVINOBackend() {
for (const auto& op : option.ov_cpu_operators) {
ov_option.cpu_operators.insert(op);
}
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
@@ -315,9 +322,17 @@ void Runtime::CreateOpenVINOBackend() {
"OpenVINOBackend is not available, please compiled with "
"ENABLE_OPENVINO_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
<< "." << std::endl;
}
void Runtime::CreateOrtBackend() {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::ORT only supports Device::CPU/Device::GPU.");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_ORT_BACKEND
auto ort_option = OrtBackendOption();
ort_option.graph_optimization_level = option.ort_graph_opt_level;
@@ -328,10 +343,6 @@ void Runtime::CreateOrtBackend() {
ort_option.gpu_id = option.device_id;
ort_option.external_stream_ = option.external_stream_;
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<OrtBackend>();
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) {
@@ -347,9 +358,17 @@ void Runtime::CreateOrtBackend() {
"OrtBackend is not available, please compiled with "
"ENABLE_ORT_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
<< std::endl;
}
void Runtime::CreateTrtBackend() {
FDASSERT(option.device == Device::GPU,
"Backend::TRT only supports Device::GPU.");
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"TrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
#ifdef ENABLE_TRT_BACKEND
auto trt_option = TrtBackendOption();
trt_option.model_file = option.model_file;
@@ -367,10 +386,6 @@ void Runtime::CreateTrtBackend() {
trt_option.enable_pinned_memory = option.enable_pinned_memory;
trt_option.external_stream_ = option.external_stream_;
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"TrtBackend only support model format of ModelFormat::PADDLE / "
"ModelFormat::ONNX.");
backend_ = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
if (option.model_format == ModelFormat::ONNX) {
@@ -386,12 +401,19 @@ void Runtime::CreateTrtBackend() {
"TrtBackend is not available, please compiled with "
"ENABLE_TRT_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
<< std::endl;
}
void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
option.device == Device::KUNLUNXIN ||
option.device == Device::ASCEND,
"Backend::LITE only supports "
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE");
#ifdef ENABLE_LITE_BACKEND
backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
@@ -402,15 +424,19 @@ void Runtime::CreateLiteBackend() {
"LiteBackend is not available, please compiled with "
"ENABLE_LITE_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "."
<< std::endl;
}
void Runtime::CreateRKNPU2Backend() {
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
FDASSERT(option.model_format == ModelFormat::RKNN,
"RKNPU2Backend only support model format of ModelFormat::RKNN");
#ifdef ENABLE_RKNPU2_BACKEND
auto rknpu2_option = RKNPU2BackendOption();
rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
rknpu2_option.core_mask = option.rknpu2_core_mask_;
FDASSERT(option.model_format == ModelFormat::RKNN,
"RKNPU2Backend only support model format of ModelFormat::RKNN");
backend_ = utils::make_unique<RKNPU2Backend>();
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
@@ -420,13 +446,17 @@ void Runtime::CreateRKNPU2Backend() {
"RKNPU2Backend is not available, please compiled with "
"ENABLE_RKNPU2_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
<< "." << std::endl;
}
void Runtime::CreateSophgoNPUBackend() {
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
FDASSERT(option.device == Device::SOPHGOTPUD,
"Backend::SOPHGO only supports Device::SOPHGO");
FDASSERT(option.model_format == ModelFormat::SOPHGO,
"SophgoBackend only support model format of ModelFormat::SOPHGO");
#ifdef ENABLE_SOPHGO_BACKEND
auto sophgo_option = SophgoBackendOption();
backend_ = utils::make_unique<SophgoBackend>();
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
@@ -436,6 +466,8 @@ void Runtime::CreateSophgoNPUBackend() {
"SophgoBackend is not available, please compiled with "
"ENABLE_SOPHGO_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
<< "." << std::endl;
}
Runtime* Runtime::Clone(void* stream, int device_id) {
@@ -458,4 +490,36 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
return runtime;
}
// only for poros backend
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
"Load model from Torchscript failed while initliazing PorosBackend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
}
} // namespace fastdeploy

View File

@@ -51,15 +51,6 @@ struct FASTDEPLOY_DECL Runtime {
*/
bool Infer();
/** \brief Compile TorchScript Module, only for Poros backend
*
* \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false
*/
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option);
/** \brief Get number of inputs
*/
int NumInputs() { return backend_->NumInputs(); }
@@ -94,6 +85,15 @@ struct FASTDEPLOY_DECL Runtime {
RuntimeOption option;
/** \brief Compile TorchScript Module, only for Poros backend
*
* \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false
*/
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option);
private:
void CreateOrtBackend();
void CreatePaddleBackend();

View File

@@ -337,37 +337,6 @@ void RuntimeOption::SetOpenVINOStreams(int num_streams) {
ov_num_streams = num_streams;
}
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
"Load model from Torchscript failed while initliazing PorosBackend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
}
void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }

View File

@@ -203,8 +203,7 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
FASTDEPLOY_DECL std::vector<int64_t>
GetStride(const std::vector<int64_t>& dims);
template <typename T, typename std::enable_if<std::is_integral<T>::value,
bool>::type = true>
template <typename T>
std::string Str(const std::vector<T>& shape) {
std::ostringstream oss;
oss << "[ " << shape[0];