[Backend & Serving] Serving and Runtime support Clone (#464)

* Add Serving and Runtime use Clone

* support TRT, OpenVINO and Paddle Backend

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
heliqi
2022-11-04 17:16:40 +08:00
committed by GitHub
parent 61634caf28
commit 277bec38c7
13 changed files with 343 additions and 150 deletions

View File

@@ -74,6 +74,8 @@ ov::element::Type FDDataTypeToOV(const FDDataType& type) {
return ov::element::f32;
}
ov::Core OpenVINOBackend::core_;
void OpenVINOBackend::InitTensorInfo(
const std::vector<ov::Output<ov::Node>>& ov_outputs,
std::map<std::string, TensorInfo>* tensor_infos) {
@@ -96,10 +98,6 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
return false;
}
option_ = option;
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
@@ -149,7 +147,19 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
output_infos_.push_back(iter->second);
}
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
if (option_.ov_num_streams == -1) {
properties["NUM_STREAMS"] = ov::streams::AUTO;
} else if (option_.ov_num_streams == -2) {
properties["NUM_STREAMS"] = ov::streams::NUMA;
} else if (option_.ov_num_streams > 0) {
properties["NUM_STREAMS"] = option_.ov_num_streams;
}
compiled_model_ = core_.compile_model(model, "CPU", properties);
request_ = compiled_model_.create_infer_request();
initialized_ = true;
return true;
@@ -185,10 +195,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
return false;
}
option_ = option;
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
@@ -238,8 +244,21 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
output_infos_.push_back(iter->second);
}
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
if (option_.ov_num_streams == -1) {
properties["NUM_STREAMS"] = ov::streams::AUTO;
} else if (option_.ov_num_streams == -2) {
properties["NUM_STREAMS"] = ov::streams::NUMA;
} else if (option_.ov_num_streams > 0) {
properties["NUM_STREAMS"] = option_.ov_num_streams;
}
compiled_model_ = core_.compile_model(model, "CPU", properties);
request_ = compiled_model_.create_infer_request();
initialized_ = true;
return true;
}
@@ -281,4 +300,14 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void *stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
casted_backend->option_ = option_;
casted_backend->request_ = compiled_model_.create_infer_request();
casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
casted_backend->output_infos_.assign(output_infos_.begin(), output_infos_.end());
return new_backend;
}
} // namespace fastdeploy