mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-08 10:00:29 +08:00
[Backend] Support Intel GPU with heterogeneous mode (#701)
* Add some comments for python api * support openvino gpu * Add cpu operators * add interface to specify hetero operators * remove useless dir * format code * remove debug code * Support GPU for ONNX
This commit is contained in:
@@ -32,6 +32,14 @@ std::vector<int64_t> PartialShapeToVec(const ov::PartialShape& shape) {
|
||||
return res;
|
||||
}
|
||||
|
||||
ov::PartialShape VecToPartialShape(const std::vector<int64_t>& shape) {
|
||||
std::vector<ov::Dimension> dims;
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
dims.emplace_back(ov::Dimension(shape[i]));
|
||||
}
|
||||
return ov::PartialShape(dims);
|
||||
}
|
||||
|
||||
FDDataType OpenVINODataTypeToFD(const ov::element::Type& type) {
|
||||
if (type == ov::element::f32) {
|
||||
return FDDataType::FP32;
|
||||
@@ -100,6 +108,26 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
|
||||
option_ = option;
|
||||
|
||||
std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
|
||||
if (option_.shape_infos.size() > 0) {
|
||||
std::map<std::string, ov::PartialShape> shape_infos;
|
||||
for (const auto& item : option_.shape_infos) {
|
||||
shape_infos[item.first] = VecToPartialShape(item.second);
|
||||
}
|
||||
model->reshape(shape_infos);
|
||||
}
|
||||
|
||||
if (option_.device.find("HETERO") != std::string::npos) {
|
||||
auto supported_ops = core_.query_model(model, option_.device);
|
||||
for (auto&& op : model->get_ops()) {
|
||||
auto& affinity = supported_ops[op->get_friendly_name()];
|
||||
if (option_.cpu_operators.find(op->description()) !=
|
||||
option_.cpu_operators.end()) {
|
||||
op->get_rt_info()["affinity"] = "CPU";
|
||||
} else {
|
||||
op->get_rt_info()["affinity"] = affinity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get inputs/outputs information from loaded model
|
||||
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
|
||||
@@ -151,14 +179,25 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
|
||||
if (option_.cpu_thread_num > 0) {
|
||||
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
||||
}
|
||||
if (option_.ov_num_streams == -1) {
|
||||
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||
} else if (option_.ov_num_streams == -2) {
|
||||
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||
} else if (option_.ov_num_streams > 0) {
|
||||
properties["NUM_STREAMS"] = option_.ov_num_streams;
|
||||
if (option_.device == "CPU") {
|
||||
if (option_.num_streams == -1) {
|
||||
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||
} else if (option_.num_streams == -2) {
|
||||
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||
} else if (option_.num_streams > 0) {
|
||||
properties["NUM_STREAMS"] = option_.num_streams;
|
||||
}
|
||||
} else {
|
||||
if (option_.num_streams != 0) {
|
||||
FDWARNING << "NUM_STREAMS only available on device CPU, currently the "
|
||||
"device is set as "
|
||||
<< option_.device << ", the NUM_STREAMS will be ignored."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." << std::endl;
|
||||
|
||||
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
|
||||
<< std::endl;
|
||||
compiled_model_ = core_.compile_model(model, option.device, properties);
|
||||
|
||||
request_ = compiled_model_.create_infer_request();
|
||||
@@ -199,6 +238,27 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
|
||||
|
||||
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
|
||||
|
||||
if (option_.shape_infos.size() > 0) {
|
||||
std::map<std::string, ov::PartialShape> shape_infos;
|
||||
for (const auto& item : option_.shape_infos) {
|
||||
shape_infos[item.first] = VecToPartialShape(item.second);
|
||||
}
|
||||
model->reshape(shape_infos);
|
||||
}
|
||||
|
||||
if (option_.device.find("HETERO") != std::string::npos) {
|
||||
auto supported_ops = core_.query_model(model, option_.device);
|
||||
for (auto&& op : model->get_ops()) {
|
||||
auto& affinity = supported_ops[op->get_friendly_name()];
|
||||
if (option_.cpu_operators.find(op->description()) !=
|
||||
option_.cpu_operators.end()) {
|
||||
op->get_rt_info()["affinity"] = "CPU";
|
||||
} else {
|
||||
op->get_rt_info()["affinity"] = affinity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get inputs/outputs information from loaded model
|
||||
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
|
||||
std::map<std::string, TensorInfo> input_infos;
|
||||
@@ -249,18 +309,29 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
|
||||
if (option_.cpu_thread_num > 0) {
|
||||
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
||||
}
|
||||
if (option_.ov_num_streams == -1) {
|
||||
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||
} else if (option_.ov_num_streams == -2) {
|
||||
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||
} else if (option_.ov_num_streams > 0) {
|
||||
properties["NUM_STREAMS"] = option_.ov_num_streams;
|
||||
if (option_.device == "CPU") {
|
||||
if (option_.num_streams == -1) {
|
||||
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||
} else if (option_.num_streams == -2) {
|
||||
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||
} else if (option_.num_streams > 0) {
|
||||
properties["NUM_STREAMS"] = option_.num_streams;
|
||||
}
|
||||
} else {
|
||||
if (option_.num_streams != 0) {
|
||||
FDWARNING << "NUM_STREAMS only available on device CPU, currently the "
|
||||
"device is set as "
|
||||
<< option_.device << ", the NUM_STREAMS will be ignored."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." << std::endl;
|
||||
|
||||
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
|
||||
<< std::endl;
|
||||
compiled_model_ = core_.compile_model(model, option.device, properties);
|
||||
|
||||
request_ = compiled_model_.create_infer_request();
|
||||
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
@@ -302,13 +373,16 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void *stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<OpenVINOBackend>();
|
||||
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void* stream,
|
||||
int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend =
|
||||
utils::make_unique<OpenVINOBackend>();
|
||||
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
|
||||
casted_backend->option_ = option_;
|
||||
casted_backend->request_ = compiled_model_.create_infer_request();
|
||||
casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
|
||||
casted_backend->output_infos_.assign(output_infos_.begin(), output_infos_.end());
|
||||
casted_backend->output_infos_.assign(output_infos_.begin(),
|
||||
output_infos_.end());
|
||||
return new_backend;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user