mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00

* 10-29/14:05 * 新增cmake * 新增rknpu2 backend * 10-29/14:43 * Runtime fd_type新增RKNPU代码 * 10-29/15:02 * 新增ppseg RKNPU2推理代码 * 10-29/15:46 * 新增ppseg RKNPU2 cpp example代码 * 10-29/15:51 * 新增README文档 * 10-29/15:51 * 按照要求修改部分注释以及变量名称 * 10-29/15:51 * 修复重命名之后,cc文件中的部分代码还用旧函数名的bug * 10-29/22:32 * str(Device::NPU)将输出NPU而不是UNKOWN * 修改runtime文件中的注释格式 * 新增Building Summary ENABLE_RKNPU2_BACKEND输出 * pybind新增支持rknpu2 * 新增python编译选项 * 新增PPSeg Python代码 * 新增以及更新各种文档 * 10-30/14:11 * 尝试修复编译cuda时产生的错误 * 10-30/19:27 * 修改CpuName和CoreMask层级 * 修改ppseg rknn推理层级 * 图片将移动到网络进行下载 * 10-30/19:39 * 更新文档 * 10-30/19:39 * 更新文档 * 更新ppseg rknpu2 example中的函数命名方式 * 更新ppseg rknpu2 example为一个cc文件 * 修复disable_normalize_and_permute部分的逻辑错误 * 移除rknpu2初始化时的无用参数 * 10-30/19:39 * 尝试重置python代码 * 10-30/10:16 * rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题 * 10-31/14:31 * 修改pybind,支持最新的rknpu2 backends * 再次支持ppseg python推理 * 移动cpuname 和 coremask的层级 * 10-31/15:35 * 尝试修复rknpu2导入错误 * 10-31/19:00 * 新增RKNPU2模型导出代码以及其对应的文档 * 更新大量文档错误 * 10-31/19:00 * 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC * 10-31/19:26 * 修改部分错误文档 * 10-31/19:26 * 修复错误删除的部分 * 修复各种错误文档 * 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时,提示错误的信息 * 修复rknpu2_backend.cc中存在的中文注释 * 10-31/20:45 * 删除无用的注释 * 10-31/20:45 * 按照要求修改Device::NPU为Device::RKNPU,硬件将共用valid_hardware_backends * 删除无用注释以及debug代码 * 11-01/09:45 * 更新变量命名方式 * 11-01/10:16 * 修改部分文档,修改函数命名方式 Co-authored-by: Jason <jiangjiajun@baidu.com>
236 lines
12 KiB
C++
Executable File
236 lines
12 KiB
C++
Executable File
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "fastdeploy/pybind/main.h"
|
|
|
|
namespace fastdeploy {
|
|
|
|
void BindRuntime(pybind11::module& m) {
|
|
pybind11::class_<RuntimeOption>(m, "RuntimeOption")
|
|
.def(pybind11::init())
|
|
.def("set_model_path", &RuntimeOption::SetModelPath)
|
|
.def("use_gpu", &RuntimeOption::UseGpu)
|
|
.def("use_cpu", &RuntimeOption::UseCpu)
|
|
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
|
|
.def("set_external_stream", &RuntimeOption::SetExternalStream)
|
|
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
|
|
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
|
|
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
|
|
.def("use_ort_backend", &RuntimeOption::UseOrtBackend)
|
|
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
|
|
.def("use_trt_backend", &RuntimeOption::UseTrtBackend)
|
|
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
|
|
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
|
|
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
|
|
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
|
|
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
|
|
.def("set_paddle_mkldnn_cache_size",
|
|
&RuntimeOption::SetPaddleMKLDNNCacheSize)
|
|
.def("enable_lite_fp16", &RuntimeOption::EnableLiteFP16)
|
|
.def("disable_lite_fp16", &RuntimeOption::DisableLiteFP16)
|
|
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
|
|
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
|
|
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
|
|
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
|
|
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
|
|
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
|
|
.def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile)
|
|
.def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory)
|
|
.def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory)
|
|
.def("enable_paddle_trt_collect_shape", &RuntimeOption::EnablePaddleTrtCollectShape)
|
|
.def("disable_paddle_trt_collect_shape", &RuntimeOption::DisablePaddleTrtCollectShape)
|
|
.def("use_ipu", &RuntimeOption::UseIpu)
|
|
.def("set_ipu_config", &RuntimeOption::SetIpuConfig)
|
|
.def_readwrite("model_file", &RuntimeOption::model_file)
|
|
.def_readwrite("params_file", &RuntimeOption::params_file)
|
|
.def_readwrite("model_format", &RuntimeOption::model_format)
|
|
.def_readwrite("backend", &RuntimeOption::backend)
|
|
.def_readwrite("backend", &RuntimeOption::external_stream_)
|
|
.def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
|
|
.def_readwrite("device_id", &RuntimeOption::device_id)
|
|
.def_readwrite("device", &RuntimeOption::device)
|
|
.def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
|
|
.def_readwrite("ort_inter_op_num_threads",
|
|
&RuntimeOption::ort_inter_op_num_threads)
|
|
.def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
|
|
.def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
|
|
.def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
|
|
.def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
|
|
.def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file)
|
|
.def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16)
|
|
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
|
|
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
|
|
.def_readwrite("trt_max_workspace_size",
|
|
&RuntimeOption::trt_max_workspace_size)
|
|
.def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
|
|
.def_readwrite("long_to_int", &RuntimeOption::long_to_int)
|
|
.def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
|
|
.def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
|
|
.def_readwrite("poros_file", &RuntimeOption::poros_file)
|
|
.def_readwrite("ipu_device_num", &RuntimeOption::ipu_device_num)
|
|
.def_readwrite("ipu_micro_batch_size",
|
|
&RuntimeOption::ipu_micro_batch_size)
|
|
.def_readwrite("ipu_enable_pipelining",
|
|
&RuntimeOption::ipu_enable_pipelining)
|
|
.def_readwrite("ipu_batches_per_step",
|
|
&RuntimeOption::ipu_batches_per_step)
|
|
.def_readwrite("ipu_enable_fp16", &RuntimeOption::ipu_enable_fp16)
|
|
.def_readwrite("ipu_replica_num", &RuntimeOption::ipu_replica_num)
|
|
.def_readwrite("ipu_available_memory_proportion",
|
|
&RuntimeOption::ipu_available_memory_proportion)
|
|
.def_readwrite("ipu_enable_half_partial",
|
|
&RuntimeOption::ipu_enable_half_partial);
|
|
|
|
pybind11::class_<TensorInfo>(m, "TensorInfo")
|
|
.def_readwrite("name", &TensorInfo::name)
|
|
.def_readwrite("shape", &TensorInfo::shape)
|
|
.def_readwrite("dtype", &TensorInfo::dtype);
|
|
|
|
pybind11::class_<Runtime>(m, "Runtime")
|
|
.def(pybind11::init())
|
|
.def("init", &Runtime::Init)
|
|
.def("compile",
|
|
[](Runtime& self,
|
|
std::vector<std::vector<pybind11::array>>& warm_datas,
|
|
const RuntimeOption& _option) {
|
|
size_t rows = warm_datas.size();
|
|
size_t columns = warm_datas[0].size();
|
|
std::vector<std::vector<FDTensor>> warm_tensors(
|
|
rows, std::vector<FDTensor>(columns));
|
|
for (size_t i = 0; i < rows; ++i) {
|
|
for (size_t j = 0; j < columns; ++j) {
|
|
auto dtype =
|
|
NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
|
|
std::vector<int64_t> data_shape;
|
|
data_shape.insert(
|
|
data_shape.begin(), warm_datas[i][j].shape(),
|
|
warm_datas[i][j].shape() + warm_datas[i][j].ndim());
|
|
warm_tensors[i][j].Resize(data_shape, dtype);
|
|
memcpy(warm_tensors[i][j].MutableData(),
|
|
warm_datas[i][j].mutable_data(),
|
|
warm_datas[i][j].nbytes());
|
|
}
|
|
}
|
|
return self.Compile(warm_tensors, _option);
|
|
})
|
|
.def("infer",
|
|
[](Runtime& self, std::vector<FDTensor>& inputs) {
|
|
std::vector<FDTensor> outputs(self.NumOutputs());
|
|
self.Infer(inputs, &outputs);
|
|
return outputs;
|
|
})
|
|
.def("infer",
|
|
[](Runtime& self, std::map<std::string, pybind11::array>& data) {
|
|
std::vector<FDTensor> inputs(data.size());
|
|
int index = 0;
|
|
for (auto iter = data.begin(); iter != data.end(); ++iter) {
|
|
std::vector<int64_t> data_shape;
|
|
data_shape.insert(data_shape.begin(), iter->second.shape(),
|
|
iter->second.shape() + iter->second.ndim());
|
|
auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
|
|
// TODO(jiangjiajun) Maybe skip memory copy is a better choice
|
|
// use SetExternalData
|
|
inputs[index].Resize(data_shape, dtype);
|
|
memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
|
|
iter->second.nbytes());
|
|
inputs[index].name = iter->first;
|
|
index += 1;
|
|
}
|
|
|
|
std::vector<FDTensor> outputs(self.NumOutputs());
|
|
self.Infer(inputs, &outputs);
|
|
|
|
std::vector<pybind11::array> results;
|
|
results.reserve(outputs.size());
|
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
|
auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
|
|
results.emplace_back(
|
|
pybind11::array(numpy_dtype, outputs[i].shape));
|
|
memcpy(results[i].mutable_data(), outputs[i].Data(),
|
|
outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
|
|
}
|
|
return results;
|
|
})
|
|
.def("num_inputs", &Runtime::NumInputs)
|
|
.def("num_outputs", &Runtime::NumOutputs)
|
|
.def("get_input_info", &Runtime::GetInputInfo)
|
|
.def("get_output_info", &Runtime::GetOutputInfo)
|
|
.def_readonly("option", &Runtime::option);
|
|
|
|
pybind11::enum_<Backend>(m, "Backend", pybind11::arithmetic(),
|
|
"Backend for inference.")
|
|
.value("UNKOWN", Backend::UNKNOWN)
|
|
.value("ORT", Backend::ORT)
|
|
.value("TRT", Backend::TRT)
|
|
.value("POROS", Backend::POROS)
|
|
.value("PDINFER", Backend::PDINFER)
|
|
.value("RKNPU2", Backend::RKNPU2)
|
|
.value("LITE", Backend::LITE);
|
|
pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
|
|
"ModelFormat for inference.")
|
|
.value("PADDLE", ModelFormat::PADDLE)
|
|
.value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
|
|
.value("RKNN", ModelFormat::RKNN)
|
|
.value("ONNX", ModelFormat::ONNX);
|
|
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
|
|
"Device for inference.")
|
|
.value("CPU", Device::CPU)
|
|
.value("GPU", Device::GPU)
|
|
.value("IPU", Device::IPU)
|
|
.value("RKNPU", Device::RKNPU);
|
|
|
|
pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
|
|
"Data type of FastDeploy.")
|
|
.value("BOOL", FDDataType::BOOL)
|
|
.value("INT8", FDDataType::INT8)
|
|
.value("INT16", FDDataType::INT16)
|
|
.value("INT32", FDDataType::INT32)
|
|
.value("INT64", FDDataType::INT64)
|
|
.value("FP16", FDDataType::FP16)
|
|
.value("FP32", FDDataType::FP32)
|
|
.value("FP64", FDDataType::FP64)
|
|
.value("UINT8", FDDataType::UINT8);
|
|
|
|
pybind11::class_<FDTensor>(m, "FDTensor", pybind11::buffer_protocol())
|
|
.def(pybind11::init())
|
|
.def("cpu_data",
|
|
[](FDTensor& self) {
|
|
auto ptr = self.CpuData();
|
|
auto numel = self.Numel();
|
|
auto dtype = FDDataTypeToNumpyDataType(self.dtype);
|
|
auto base = pybind11::array(dtype, self.shape);
|
|
return pybind11::array(dtype, self.shape, ptr, base);
|
|
})
|
|
.def("resize", static_cast<void (FDTensor::*)(size_t)>(&FDTensor::Resize))
|
|
.def("resize",
|
|
static_cast<void (FDTensor::*)(const std::vector<int64_t>&)>(
|
|
&FDTensor::Resize))
|
|
.def(
|
|
"resize",
|
|
[](FDTensor& self, const std::vector<int64_t>& shape,
|
|
const FDDataType& dtype, const std::string& name,
|
|
const Device& device) { self.Resize(shape, dtype, name, device); })
|
|
.def("numel", &FDTensor::Numel)
|
|
.def("nbytes", &FDTensor::Nbytes)
|
|
.def_readwrite("name", &FDTensor::name)
|
|
.def_readwrite("is_pinned_memory", &FDTensor::is_pinned_memory)
|
|
.def_readonly("shape", &FDTensor::shape)
|
|
.def_readonly("dtype", &FDTensor::dtype)
|
|
.def_readonly("device", &FDTensor::device);
|
|
|
|
m.def("get_available_backends", []() { return GetAvailableBackends(); });
|
|
}
|
|
|
|
} // namespace fastdeploy
|