Support Poros Backend (#188)

* Add poros backend

* Add torch lib

* Add python3 lib

* set c++ 14 for poros

* fixed bugs

* fixed grammer bugs

* fixed grammer bugs

* fixed code bugs

* fixed code bugs

* fixed CreatePorosValue bug

* Add AtType2String for Log

* fixed trt_option

* fixed poros.cmake path

* fixed grammer bug

* fixed grammer bug

* fixed ambiguous reference

* fixed ambiguous reference

* fixed reference error

* fixed include files

* rm ENABLE_TRT_BACKEND in poros

* update CMakeLists.txt

* fixed CMakeLists.txt

* Add libtorch.so in CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed copy bug

* Fixed copy bug

* Fixed copy bug

* Fixed Cmake

* Fixed Cmake

* debug

* debug

* debug

* debug

* debug

* debug

* debug utils

* debug utils

* copy to cpu

* rm log info

* test share mem

* test share mem

* test share mem

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* time cost

* time cost

* fixed bug

* time collect

* mem copy

* mem copy

* rm time log

* rm share mem

* fixed multi inputs bug

* add set_input_dtypes func

* add SetInputDtypes

* fixed bug

* fixed bug

* fixed prewarm data order

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* fixed bug

* Add compile func

* Add compile func

* Add compile func

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* rm infer log

* add cuda11.6 poros lib

* fixed bug

* fixed bug

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* rm logs

* test

* test

* test

* add test log

* add test log

* add test log

* add test log

* support cpu

* support cpu

* support cpu

* support cpu

* support member variable definition

* rm useless log

* fixed name

* resolve conflict

* resolve conflict

* resolve conflict

* fixed cmake

* add GetInputInfos&GetOutputInfos

* add GetInputInfos&GetOutputInfos

* fixed bug

* fixed runtime.py

* add compile func

* add np

* deal with comments

* rm to_inter func

* add property
This commit is contained in:
WJJ1995
2022-10-17 15:28:12 +08:00
committed by GitHub
parent c8db2dd1ef
commit f5c94e5471
19 changed files with 1333 additions and 12 deletions

View File

@@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) {
.def("use_cpu", &RuntimeOption::UseCpu)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
@@ -62,7 +63,12 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size);
&RuntimeOption::trt_max_workspace_size)
.def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
.def_readwrite("long_to_int", &RuntimeOption::long_to_int)
.def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
.def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
.def_readwrite("poros_file", &RuntimeOption::poros_file);
pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name)
@@ -72,6 +78,30 @@ void BindRuntime(pybind11::module& m) {
pybind11::class_<Runtime>(m, "Runtime")
.def(pybind11::init())
.def("init", &Runtime::Init)
.def("compile",
[](Runtime& self,
std::vector<std::vector<pybind11::array>>& warm_datas,
const RuntimeOption& _option) {
size_t rows = warm_datas.size();
size_t columns = warm_datas[0].size();
std::vector<std::vector<FDTensor>> warm_tensors(
rows, std::vector<FDTensor>(columns));
for (size_t i = 0; i < rows; ++i) {
for (size_t j = 0; j < columns; ++j) {
auto dtype =
NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
std::vector<int64_t> data_shape;
data_shape.insert(
data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() + warm_datas[i][j].ndim());
warm_tensors[i][j].Resize(data_shape, dtype);
memcpy(warm_tensors[i][j].MutableData(),
warm_datas[i][j].mutable_data(),
warm_datas[i][j].nbytes());
}
}
return self.Compile(warm_tensors, _option);
})
.def("infer",
[](Runtime& self, std::vector<FDTensor>& inputs) {
std::vector<FDTensor> outputs(self.NumOutputs());
@@ -121,11 +151,13 @@ void BindRuntime(pybind11::module& m) {
.value("UNKOWN", Backend::UNKNOWN)
.value("ORT", Backend::ORT)
.value("TRT", Backend::TRT)
.value("POROS", Backend::POROS)
.value("PDINFER", Backend::PDINFER)
.value("LITE", Backend::LITE);
pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
"ModelFormat for inference.")
.value("PADDLE", ModelFormat::PADDLE)
.value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
.value("ONNX", ModelFormat::ONNX);
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
"Device for inference.")