Support Poros Backend (#188)

* Add poros backend * Add torch lib * Add python3 lib * set c++ 14 for poros * fixed bugs * fixed grammer bugs * fixed grammer bugs * fixed code bugs * fixed code bugs * fixed CreatePorosValue bug * Add AtType2String for Log * fixed trt_option * fixed poros.cmake path * fixed grammer bug * fixed grammer bug * fixed ambiguous reference * fixed ambiguous reference * fixed reference error * fixed include files * rm ENABLE_TRT_BACKEND in poros * update CMakeLists.txt * fixed CMakeLists.txt * Add libtorch.so in CMakeLists.txt * Fixed CMakeLists.txt * Fixed CMakeLists.txt * Fixed copy bug * Fixed copy bug * Fixed copy bug * Fixed Cmake * Fixed Cmake * debug * debug * debug * debug * debug * debug * debug utils * debug utils * copy to cpu * rm log info * test share mem * test share mem * test share mem * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * time cost * time cost * fixed bug * time collect * mem copy * mem copy * rm time log * rm share mem * fixed multi inputs bug * add set_input_dtypes func * add SetInputDtypes * fixed bug * fixed bug * fixed prewarm data order * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * fixed bug * Add compile func * Add compile func * Add compile func * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * rm infer log * add cuda11.6 poros lib * fixed bug * fixed bug * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * rm logs * test * test * test * add test log * add test log * add test log * add test log * support cpu * support cpu * support cpu * support cpu * support member variable definition * rm useless log * fixed name * resolve conflict * resolve conflict * resolve conflict * fixed cmake * add GetInputInfos&GetOutputInfos * add GetInputInfos&GetOutputInfos * fixed bug * fixed runtime.py * add compile func * add np * deal with comments * rm to_inter func * add property
2025-10-06 17:17:14 +08:00 · 2022-10-17 15:28:12 +08:00
parent c8db2dd1ef
commit f5c94e5471
19 changed files with 1333 additions and 12 deletions
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) {
      .def("use_cpu", &RuntimeOption::UseCpu)
      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
+      .def("use_poros_backend", &RuntimeOption::UsePorosBackend)
      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
      .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
@@ -62,7 +63,12 @@ void BindRuntime(pybind11::module& m) {
      .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
      .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
      .def_readwrite("trt_max_workspace_size",
-                     &RuntimeOption::trt_max_workspace_size);
+                     &RuntimeOption::trt_max_workspace_size)
+      .def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
+      .def_readwrite("long_to_int", &RuntimeOption::long_to_int)
+      .def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
+      .def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
+      .def_readwrite("poros_file", &RuntimeOption::poros_file);

  pybind11::class_<TensorInfo>(m, "TensorInfo")
      .def_readwrite("name", &TensorInfo::name)
@@ -72,6 +78,30 @@ void BindRuntime(pybind11::module& m) {
  pybind11::class_<Runtime>(m, "Runtime")
      .def(pybind11::init())
      .def("init", &Runtime::Init)
+      .def("compile",
+           [](Runtime& self,
+              std::vector<std::vector<pybind11::array>>& warm_datas,
+              const RuntimeOption& _option) {
+             size_t rows = warm_datas.size();
+             size_t columns = warm_datas[0].size();
+             std::vector<std::vector<FDTensor>> warm_tensors(
+                 rows, std::vector<FDTensor>(columns));
+             for (size_t i = 0; i < rows; ++i) {
+               for (size_t j = 0; j < columns; ++j) {
+                 auto dtype =
+                     NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
+                 std::vector<int64_t> data_shape;
+                 data_shape.insert(
+                     data_shape.begin(), warm_datas[i][j].shape(),
+                     warm_datas[i][j].shape() + warm_datas[i][j].ndim());
+                 warm_tensors[i][j].Resize(data_shape, dtype);
+                 memcpy(warm_tensors[i][j].MutableData(),
+                        warm_datas[i][j].mutable_data(),
+                        warm_datas[i][j].nbytes());
+               }
+             }
+             return self.Compile(warm_tensors, _option);
+           })
      .def("infer",
           [](Runtime& self, std::vector<FDTensor>& inputs) {
             std::vector<FDTensor> outputs(self.NumOutputs());
@@ -121,11 +151,13 @@ void BindRuntime(pybind11::module& m) {
      .value("UNKOWN", Backend::UNKNOWN)
      .value("ORT", Backend::ORT)
      .value("TRT", Backend::TRT)
+      .value("POROS", Backend::POROS)
      .value("PDINFER", Backend::PDINFER)
      .value("LITE", Backend::LITE);
  pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
                               "ModelFormat for inference.")
      .value("PADDLE", ModelFormat::PADDLE)
+      .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
      .value("ONNX", ModelFormat::ONNX);
  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
                          "Device for inference.")