[Other] Optimize runtime module (#1211)

* modify ort option

* update code

* Unify backend
This commit is contained in:
Jason
2023-02-02 13:10:40 +08:00
committed by GitHub
parent 147cb2c32e
commit a711f99c69
8 changed files with 63 additions and 76 deletions

View File

@@ -56,6 +56,11 @@ class BaseBackend {
virtual bool Initialized() const { return initialized_; }
virtual bool Init(const RuntimeOption& option) {
FDERROR << "Not Implement Yet." << std::endl;
return false;
}
// Get number of inputs of the model
virtual int NumInputs() const = 0;
// Get number of outputs of the model

View File

@@ -34,12 +34,8 @@ struct OrtBackendOption {
// 0: ORT_SEQUENTIAL
// 1: ORT_PARALLEL
int execution_mode = -1;
bool use_gpu = false;
int gpu_id = 0;
Device device = Device::CPU;
int device_id = 0;
void* external_stream_ = nullptr;
// inside parameter, maybe remove next version
bool remove_multiclass_nms_ = false;
std::map<std::string, std::string> custom_op_info_;
};
} // namespace fastdeploy

View File

@@ -45,7 +45,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
if (option.execution_mode >= 0) {
session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
}
if (option.use_gpu) {
if (option.device == Device::GPU) {
auto all_providers = Ort::GetAvailableProviders();
bool support_cuda = false;
std::string providers_msg = "";
@@ -60,10 +60,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
"support GPU, the available providers are "
<< providers_msg << "will fallback to CPUExecutionProvider."
<< std::endl;
option_.use_gpu = false;
option_.device = Device::CPU;
} else {
OrtCUDAProviderOptions cuda_options;
cuda_options.device_id = option.gpu_id;
cuda_options.device_id = option.device_id;
if (option.external_stream_) {
cuda_options.has_user_compute_stream = 1;
cuda_options.user_compute_stream = option.external_stream_;
@@ -73,6 +73,44 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
}
}
bool OrtBackend::Init(const RuntimeOption& option) {
if (option.device != Device::CPU && option.device != Device::GPU) {
FDERROR
<< "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
<< option.device << "." << std::endl;
return false;
}
OrtBackendOption ort_option = option.ort_option;
ort_option.device = option.device;
ort_option.device_id = option.device_id;
ort_option.external_stream_ = option.external_stream_;
if (option.model_format == ModelFormat::PADDLE) {
if (option.model_from_memory_) {
return InitFromPaddle(option.model_file, option.params_file, ort_option);
}
std::string model_buffer, params_buffer;
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Failed to read model file.");
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Failed to read parameters file.");
return InitFromPaddle(model_buffer, params_buffer, ort_option);
} else if (option.model_format == ModelFormat::ONNX) {
if (option.model_from_memory_) {
return InitFromOnnx(option.model_file, ort_option);
}
std::string model_buffer;
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Failed to read model file.");
return InitFromOnnx(model_buffer, ort_option);
} else {
FDERROR << "Only support Paddle/ONNX model format for OrtBackend."
<< std::endl;
return false;
}
return false;
}
bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const OrtBackendOption& option, bool verbose) {
@@ -221,7 +259,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
// from FDTensor to Ort Inputs
for (size_t i = 0; i < inputs.size(); ++i) {
auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU);
binding_->BindInput(inputs[i].name.c_str(), ort_value);
}
@@ -297,7 +335,7 @@ void OrtBackend::InitCustomOperators() {
if (custom_operators_.size() == 0) {
MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
custom_operators_.push_back(multiclass_nms);
if (option_.use_gpu) {
if (option_.device == Device::GPU) {
AdaptivePool2dOp* adaptive_pool2d =
new AdaptivePool2dOp{"CUDAExecutionProvider"};
custom_operators_.push_back(adaptive_pool2d);

View File

@@ -39,13 +39,7 @@ class OrtBackend : public BaseBackend {
void BuildOption(const OrtBackendOption& option);
bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_buffer,
const OrtBackendOption& option = OrtBackendOption());
bool Init(const RuntimeOption& option);
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
bool copy_to_fd = true) override;
@@ -62,6 +56,14 @@ class OrtBackend : public BaseBackend {
void InitCustomOperators();
private:
bool InitFromPaddle(const std::string& model_buffer,
const std::string& params_buffer,
const OrtBackendOption& option = OrtBackendOption(),
bool verbose = false);
bool InitFromOnnx(const std::string& model_buffer,
const OrtBackendOption& option = OrtBackendOption());
Ort::Env env_;
Ort::Session session_{nullptr};
Ort::SessionOptions session_options_;