// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/runtime/backends/ort/ort_backend.h" #include "fastdeploy/core/float16.h" #include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h" #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h" #include "fastdeploy/runtime/backends/ort/utils.h" #include "fastdeploy/utils/utils.h" #ifdef ENABLE_PADDLE2ONNX #include "paddle2onnx/converter.h" #endif #include namespace fastdeploy { std::vector OrtBackend::custom_operators_ = std::vector(); bool OrtBackend::BuildOption(const OrtBackendOption& option) { option_ = option; if (option.graph_optimization_level >= 0) { session_options_.SetGraphOptimizationLevel( GraphOptimizationLevel(option.graph_optimization_level)); } if (option.intra_op_num_threads > 0) { session_options_.SetIntraOpNumThreads(option.intra_op_num_threads); } if (option.inter_op_num_threads > 0) { session_options_.SetInterOpNumThreads(option.inter_op_num_threads); } if (option.execution_mode >= 0) { session_options_.SetExecutionMode(ExecutionMode(option.execution_mode)); } #ifdef WITH_DIRECTML // If use DirectML if (option.device == Device::DIRECTML) { auto all_providers = Ort::GetAvailableProviders(); bool support_dml = false; std::string providers_msg = ""; for (size_t i = 0; i < all_providers.size(); ++i) { providers_msg = providers_msg + all_providers[i] + ", "; if (all_providers[i] == "DmlExecutionProvider") { support_dml = true; } } if (!support_dml) { FDWARNING << "Compiled fastdeploy with onnxruntime doesn't " "support DirectML, the available providers are " << providers_msg << "will fallback to CPUExecutionProvider." << "Please check if DirectML is installed successfully." << std::endl; option_.device = Device::CPU; } else { // Must set as below when use dml. session_options_.DisableMemPattern(); session_options_.SetExecutionMode(ExecutionMode(0)); // DML session_option OrtApi const& ortApi = Ort::GetApi(); const OrtDmlApi* ortDmlApi; ortApi.GetExecutionProviderApi( "DML", ORT_API_VERSION, reinterpret_cast(&ortDmlApi)); OrtStatus* onnx_dml_status = ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options_, 0); if (onnx_dml_status != nullptr) { FDERROR << "DirectML is not support in your machine, the program will exit." << std::endl; ortApi.ReleaseStatus(onnx_dml_status); return false; } } return true; } #endif // CUDA if (option.device == Device::GPU) { auto all_providers = Ort::GetAvailableProviders(); bool support_cuda = false; std::string providers_msg = ""; for (size_t i = 0; i < all_providers.size(); ++i) { providers_msg = providers_msg + all_providers[i] + ", "; if (all_providers[i] == "CUDAExecutionProvider") { support_cuda = true; } } if (!support_cuda) { FDWARNING << "Compiled fastdeploy with onnxruntime doesn't " "support GPU, the available providers are " << providers_msg << "will fallback to CPUExecutionProvider." << std::endl; option_.device = Device::CPU; } else { OrtCUDAProviderOptions cuda_options; cuda_options.device_id = option.device_id; if (option.external_stream_) { cuda_options.has_user_compute_stream = 1; cuda_options.user_compute_stream = option.external_stream_; } session_options_.AppendExecutionProvider_CUDA(cuda_options); } return true; } return true; } bool OrtBackend::Init(const RuntimeOption& option) { if (option.device != Device::CPU && option.device != Device::GPU && option.device != Device::DIRECTML) { FDERROR << "Backend::ORT only supports Device::CPU/Device::GPU, but now its " << option.device << "." << std::endl; return false; } OrtBackendOption ort_option = option.ort_option; ort_option.device = option.device; ort_option.device_id = option.device_id; ort_option.external_stream_ = option.external_stream_; if (option.model_format == ModelFormat::PADDLE) { if (option.model_from_memory_) { return InitFromPaddle(option.model_file, option.params_file, ort_option); } std::string model_buffer, params_buffer; FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file."); FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file."); return InitFromPaddle(model_buffer, params_buffer, ort_option); } else if (option.model_format == ModelFormat::ONNX) { if (option.model_from_memory_) { return InitFromOnnx(option.model_file, ort_option); } std::string model_buffer; FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file."); return InitFromOnnx(model_buffer, ort_option); } else { FDERROR << "Only support Paddle/ONNX model format for OrtBackend." << std::endl; return false; } return false; } bool OrtBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const OrtBackendOption& option, bool verbose) { if (initialized_) { FDERROR << "OrtBackend is already initlized, cannot initialize again." << std::endl; return false; } char* model_content_ptr; int model_content_size = 0; bool save_external = false; #ifdef ENABLE_PADDLE2ONNX std::vector ops; ops.resize(2); strcpy(ops[0].op_name, "multiclass_nms3"); strcpy(ops[0].export_op_name, "MultiClassNMS"); strcpy(ops[1].op_name, "pool2d"); strcpy(ops[1].export_op_name, "AdaptivePool2d"); if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), params_buffer.size(), &model_content_ptr, &model_content_size, 11, true, verbose, true, true, true, ops.data(), 2, "onnxruntime", nullptr, 0, "", &save_external)) { FDERROR << "Error occured while export PaddlePaddle to ONNX format." << std::endl; return false; } std::string onnx_model_proto(model_content_ptr, model_content_ptr + model_content_size); delete[] model_content_ptr; model_content_ptr = nullptr; if (save_external) { std::string model_file_name = "model.onnx"; std::fstream f(model_file_name, std::ios::out); FDASSERT(f.is_open(), "Can not open file: %s to save model.", model_file_name.c_str()); f << onnx_model_proto; f.close(); } return InitFromOnnx(onnx_model_proto, option); #else FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to " "call `InitFromOnnx` instead." << std::endl; #endif return false; } bool OrtBackend::InitFromOnnx(const std::string& model_file, const OrtBackendOption& option) { if (initialized_) { FDERROR << "OrtBackend is already initlized, cannot initialize again." << std::endl; return false; } if (!BuildOption(option)) { FDERROR << "Create Ort option fail." << std::endl; return false; } InitCustomOperators(); session_ = {env_, model_file.data(), model_file.size(), session_options_}; binding_ = std::make_shared(session_); Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); Ort::Allocator allocator(session_, memory_info); size_t n_inputs = session_.GetInputCount(); for (size_t i = 0; i < n_inputs; ++i) { auto input_name = session_.GetInputName(i, allocator); auto type_info = session_.GetInputTypeInfo(i); std::vector shape = type_info.GetTensorTypeAndShapeInfo().GetShape(); ONNXTensorElementDataType data_type = type_info.GetTensorTypeAndShapeInfo().GetElementType(); inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type}); allocator.Free(input_name); } size_t n_outputs = session_.GetOutputCount(); for (size_t i = 0; i < n_outputs; ++i) { auto output_name = session_.GetOutputName(i, allocator); auto type_info = session_.GetOutputTypeInfo(i); std::vector shape = type_info.GetTensorTypeAndShapeInfo().GetShape(); ONNXTensorElementDataType data_type = type_info.GetTensorTypeAndShapeInfo().GetElementType(); outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type}); Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); binding_->BindOutput(output_name, out_memory_info); allocator.Free(output_name); } initialized_ = true; return true; } void OrtBackend::OrtValueToFDTensor(const Ort::Value& value, FDTensor* tensor, const std::string& name, bool copy_to_fd) { const auto info = value.GetTensorTypeAndShapeInfo(); const auto data_type = info.GetElementType(); size_t numel = info.GetElementCount(); auto shape = info.GetShape(); FDDataType dtype; if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { dtype = FDDataType::FP32; numel *= sizeof(float); } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { dtype = FDDataType::INT32; numel *= sizeof(int32_t); } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { dtype = FDDataType::INT64; numel *= sizeof(int64_t); } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { dtype = FDDataType::FP64; numel *= sizeof(double); } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) { dtype = FDDataType::FP16; numel *= sizeof(float16); } else { FDASSERT( false, "Unrecognized data type of %d while calling OrtBackend::CopyToCpu().", data_type); } const void* value_ptr = value.GetTensorData(); if (copy_to_fd) { tensor->Resize(shape, dtype, name); memcpy(tensor->MutableData(), value_ptr, numel); } else { tensor->name = name; tensor->SetExternalData(shape, dtype, const_cast(value_ptr), Device::CPU); } } bool OrtBackend::Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd) { if (inputs.size() != inputs_desc_.size()) { FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() << ") should keep same with the inputs of this model(" << inputs_desc_.size() << ")." << std::endl; return false; } // from FDTensor to Ort Inputs RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN for (size_t i = 0; i < inputs.size(); ++i) { auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU); binding_->BindInput(inputs[i].name.c_str(), ort_value); } for (size_t i = 0; i < outputs_desc_.size(); ++i) { Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info); } // Inference with inputs RUNTIME_PROFILE_LOOP_BEGIN(1) try { session_.Run({}, *(binding_.get())); } catch (const std::exception& e) { FDERROR << "Failed to Infer: " << e.what() << std::endl; return false; } RUNTIME_PROFILE_LOOP_END // Convert result after inference std::vector ort_outputs = binding_->GetOutputValues(); outputs->resize(ort_outputs.size()); for (size_t i = 0; i < ort_outputs.size(); ++i) { OrtValueToFDTensor(ort_outputs[i], &((*outputs)[i]), outputs_desc_[i].name, copy_to_fd); } RUNTIME_PROFILE_LOOP_H2D_D2H_END return true; } TensorInfo OrtBackend::GetInputInfo(int index) { FDASSERT(index < NumInputs(), "The index: %d should less than the number of inputs: %d.", index, NumInputs()); TensorInfo info; info.name = inputs_desc_[index].name; info.shape.assign(inputs_desc_[index].shape.begin(), inputs_desc_[index].shape.end()); info.dtype = GetFdDtype(inputs_desc_[index].dtype); return info; } std::vector OrtBackend::GetInputInfos() { auto size = inputs_desc_.size(); std::vector infos; infos.reserve(size); for (auto i = 0; i < size; i++) { infos.emplace_back(GetInputInfo(i)); } return infos; } TensorInfo OrtBackend::GetOutputInfo(int index) { FDASSERT(index < NumOutputs(), "The index: %d should less than the number of outputs: %d.", index, NumOutputs()); TensorInfo info; info.name = outputs_desc_[index].name; info.shape.assign(outputs_desc_[index].shape.begin(), outputs_desc_[index].shape.end()); info.dtype = GetFdDtype(outputs_desc_[index].dtype); return info; } std::vector OrtBackend::GetOutputInfos() { std::vector infos; for (auto i = 0; i < outputs_desc_.size(); i++) { infos.emplace_back(GetOutputInfo(i)); } return infos; } void OrtBackend::InitCustomOperators() { #ifndef NON_64_PLATFORM if (custom_operators_.size() == 0) { MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{}; custom_operators_.push_back(multiclass_nms); if (option_.device == Device::GPU) { AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CUDAExecutionProvider"}; custom_operators_.push_back(adaptive_pool2d); } else { AdaptivePool2dOp* adaptive_pool2d = new AdaptivePool2dOp{"CPUExecutionProvider"}; custom_operators_.push_back(adaptive_pool2d); } } for (size_t i = 0; i < custom_operators_.size(); ++i) { custom_op_domain_.Add(custom_operators_[i]); } session_options_.Add(custom_op_domain_); #endif } } // namespace fastdeploy