#include "fastdeploy/runtime/backends/tvm/tvm_backend.h" #include "yaml-cpp/yaml.h" namespace fastdeploy { bool TVMBackend::Init(const fastdeploy::RuntimeOption& runtime_option) { if (!(Supported(runtime_option.model_format, Backend::TVM) && Supported(runtime_option.device, Backend::TVM))) { FDERROR << "TVMBackend only supports model " "ModelFormat::TVMFormat/Backend::TVM, but now its " << runtime_option.model_format << "/" << runtime_option.device << std::endl; return false; } if (runtime_option.model_from_memory_) { FDERROR << "TVMBackend doesn't support load model from memory, please " "load model from disk." << std::endl; return false; } if (!BuildDLDevice(runtime_option.device)) { FDERROR << "TVMBackend only don't support run in this device." << std::endl; return false; } if (!BuildModel(runtime_option)) { FDERROR << "TVMBackend only don't support run with this model path." << std::endl; return false; } if (!InitInputAndOutputTensor()) { FDERROR << "InitInputAndOutputTensor failed." << std::endl; return false; } return true; } bool TVMBackend::InitInputAndOutputTensor() { input_tensor_.resize(NumInputs()); for (int i = 0; i < NumInputs(); ++i) { TensorInfo tensor_info = GetInputInfo(i); tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end()); input_tensor_[i] = tvm::runtime::NDArray::Empty( shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_); } output_tensor_.resize(NumOutputs()); for (int i = 0; i < NumOutputs(); ++i) { TensorInfo tensor_info = GetOutputInfo(i); tvm::ShapeTuple shape(tensor_info.shape.begin(), tensor_info.shape.end()); output_tensor_[i] = tvm::runtime::NDArray::Empty( shape, FDDataTypeToDLDataType(tensor_info.dtype), dev_); } return true; } bool TVMBackend::BuildModel(const RuntimeOption& runtime_option) { // load in the library tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile(runtime_option.model_file); // create the graph executor module gmod_ = mod_factory.GetFunction("default")(dev_); // load params std::ifstream params_in(runtime_option.params_file, std::ios::binary); std::string params_data((std::istreambuf_iterator(params_in)), std::istreambuf_iterator()); params_in.close(); TVMByteArray params_arr; params_arr.data = params_data.c_str(); params_arr.size = params_data.length(); tvm::runtime::PackedFunc load_params = gmod_.GetFunction("load_params"); load_params(params_arr); // read input and output info tvm::runtime::PackedFunc get_input_info = gmod_.GetFunction("get_input_info"); tvm::Map input_info = get_input_info(); auto input_info_shape = tvm::Downcast>( input_info["shape"]); inputs_desc_.reserve(input_info_shape.size()); for (auto map_node : input_info_shape) { std::string temp_name = map_node.first; tvm::ShapeTuple tup = map_node.second; std::vector temp_shape{}; temp_shape.resize(tup.size()); for (int j = 0; j < tup.size(); ++j) { temp_shape[j] = static_cast(tup[j]); } FDDataType temp_dtype = fastdeploy::UNKNOWN1; TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; inputs_desc_.emplace_back(temp_input_info); } int input_dtype_index = 0; auto input_info_dtype = tvm::Downcast>(input_info["dtype"]); for (auto map_node : input_info_dtype) { tvm::String tup = map_node.second; inputs_desc_[input_dtype_index].dtype = TVMTensorTypeToFDDataType(tup); input_dtype_index++; } tvm::runtime::PackedFunc get_output_info = gmod_.GetFunction("get_output_info"); tvm::Map output_info = get_output_info(); auto output_info_shape = tvm::Downcast>( output_info["shape"]); outputs_desc_.reserve(output_info_shape.size()); for (auto map_node : output_info_shape) { std::string temp_name = map_node.first; tvm::ShapeTuple tup = map_node.second; std::vector temp_shape{}; temp_shape.resize(tup.size()); for (int j = 0; j < tup.size(); ++j) { temp_shape[j] = static_cast(tup[j]); } FDDataType temp_dtype = fastdeploy::FP32; TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; outputs_desc_.emplace_back(temp_input_info); } int output_dtype_index = 0; auto output_info_dtype = tvm::Downcast>(output_info["dtype"]); for (auto map_node : output_info_dtype) { tvm::String tup = map_node.second; outputs_desc_[output_dtype_index].dtype = TVMTensorTypeToFDDataType(tup); output_dtype_index++; } return true; } FDDataType TVMBackend::TVMTensorTypeToFDDataType(tvm::String type) { if (type == "float32") { return FDDataType::FP32; } FDERROR << "FDDataType don't support this type" << std::endl; return FDDataType::UNKNOWN1; } bool TVMBackend::Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd) { for (int i = 0; i < inputs.size(); ++i) { memcpy(input_tensor_[i]->data, inputs[i].Data(), inputs[i].Nbytes()); } // get the function from the module(set input data) tvm::runtime::PackedFunc set_input = gmod_.GetFunction("set_input"); for (int i = 0; i < NumInputs(); ++i) { set_input(GetInputInfo(i).name, input_tensor_[i]); } // get the function from the module(run it) tvm::runtime::PackedFunc run = gmod_.GetFunction("run"); run(); // get the function from the module(get output data) tvm::runtime::PackedFunc get_output = gmod_.GetFunction("get_output"); for (int i = 0; i < NumOutputs(); ++i) { get_output(i, output_tensor_[i]); } // get result outputs->resize(NumOutputs()); std::vector temp_shape{}; for (size_t i = 0; i < outputs_desc_.size(); ++i) { temp_shape.resize(outputs_desc_[i].shape.size()); for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { temp_shape[j] = outputs_desc_[i].shape[j]; } (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, outputs_desc_[i].name); memcpy((*outputs)[i].MutableData(), static_cast(output_tensor_[i]->data), (*outputs)[i].Nbytes()); } return true; } bool TVMBackend::BuildDLDevice(fastdeploy::Device device) { if (device == Device::CPU) { dev_ = DLDevice{kDLCPU, 0}; } else { FDERROR << "TVMBackend only support run in CPU." << std::endl; return false; } return true; } DLDataType TVMBackend::FDDataTypeToDLDataType(fastdeploy::FDDataType dtype) { if (dtype == FDDataType::FP32) { return DLDataType{kDLFloat, 32, 1}; } return {}; } } // namespace fastdeploy