// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/runtime/backends/paddle/paddle_backend.h" #include #include "fastdeploy/utils/path.h" namespace fastdeploy { void PaddleBackend::BuildOption(const PaddleBackendOption& option) { option_ = option; if (option.device == Device::GPU) { auto inference_precision = paddle_infer::PrecisionType::kFloat32; if (option_.inference_precision == "float32"){ FDINFO << "Will inference_precision float32" << std::endl; inference_precision = paddle_infer::PrecisionType::kFloat32; } else if (option_.inference_precision == "float16"){ FDINFO << "Will inference_precision float16" <> max_shape; std::map> min_shape; std::map> opt_shape; GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); std::map> max_input_data; std::map> min_input_data; std::map> opt_input_data; if (!option.trt_option.min_input_data.empty()) { GetInputDataFromOption(option, &max_input_data, &min_input_data, &opt_input_data); } // Need to run once to get the shape range info file. CollectShapeRun(predictor_tmp.get(), max_shape, max_input_data); CollectShapeRun(predictor_tmp.get(), min_shape, min_input_data); CollectShapeRun(predictor_tmp.get(), opt_shape, min_input_data); FDINFO << "Finish generating shape range info file." << std::endl; } FDINFO << "Start loading shape range info file " << shape_range_info << " to set TensorRT dynamic shape." << std::endl; config_.EnableTunedTensorRtDynamicShape(shape_range_info, false); } // Note(zhoushunjie): The pass deletion should be executed just before // creating predictor. if (!option.delete_pass_names.empty()) { auto pass_builder = config_.pass_builder(); for (int i = 0; i < option.delete_pass_names.size(); i++) { FDINFO << "Delete pass : " << option.delete_pass_names[i] << std::endl; pass_builder->DeletePass(option.delete_pass_names[i]); } } if (option.enable_log_info){ FDINFO << "Finish paddle inference config with summary as: " << std::endl << config_.Summary() <GetInputNames(); auto output_names = predictor_->GetOutputNames(); auto input_dtypes = predictor_->GetInputTypes(); #ifdef PADDLEINFERENCE_API_COMPAT_2_4_x // Note: GetInputTensorShape, GetOutputTensorShape and GetOutputTypes // are not supported when Paddle Inference API version is 2.4.x. std::map> input_shapes; std::map> output_shapes; std::map output_dtypes; // Get the all the input shape info. for (size_t i = 0; i < input_names.size(); ++i) { std::vector shape; auto handle = predictor_->GetInputHandle(input_names[i]); for (int j = 0; j < handle->shape().size(); ++j) { shape.push_back(static_cast(handle->shape()[j])); // int32 -> int64 } input_shapes[input_names[i]] = shape; } // Get the all the output shape and dtype info. for (size_t i = 0; i < output_names.size(); ++i) { std::vector shape; auto handle = predictor_->GetOutputHandle(output_names[i]); for (int j = 0; j < handle->shape().size(); ++j) { shape.push_back(static_cast(handle->shape()[j])); // int32 -> int64 } output_shapes[output_names[i]] = shape; output_dtypes[output_names[i]] = handle->type(); } #else auto input_shapes = predictor_->GetInputTensorShape(); auto output_shapes = predictor_->GetOutputTensorShape(); auto output_dtypes = predictor_->GetOutputTypes(); #endif inputs_desc_.resize(input_names.size()); for (int i = 0; i < input_names.size(); ++i) { inputs_desc_[i].name = input_names[i]; auto iter = input_shapes.find(inputs_desc_[i].name); FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.", inputs_desc_[i].name.c_str()); inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); auto iter1 = input_dtypes.find(inputs_desc_[i].name); FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.", inputs_desc_[i].name.c_str()); inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); } outputs_desc_.resize(output_names.size()); for (int i = 0; i < output_names.size(); ++i) { outputs_desc_[i].name = output_names[i]; auto iter = output_shapes.find(outputs_desc_[i].name); FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.", outputs_desc_[i].name.c_str()); outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); auto iter1 = output_dtypes.find(outputs_desc_[i].name); FDASSERT(iter1 != output_dtypes.end(), "Cannot find data type for output %s.", outputs_desc_[i].name.c_str()); outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); } initialized_ = true; return true; } TensorInfo PaddleBackend::GetInputInfo(int index) { FDASSERT(index < NumInputs(), "The index: %d should less than the number of inputs: %d.", index, NumInputs()); return inputs_desc_[index]; } std::vector PaddleBackend::GetInputInfos() { return inputs_desc_; } TensorInfo PaddleBackend::GetOutputInfo(int index) { FDASSERT(index < NumOutputs(), "The index: %d should less than the number of outputs %d.", index, NumOutputs()); return outputs_desc_[index]; } std::vector PaddleBackend::GetOutputInfos() { return outputs_desc_; } bool PaddleBackend::Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd) { if (inputs.size() != inputs_desc_.size()) { FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size() << ") should keep same with the inputs of this model(" << inputs_desc_.size() << ")." << std::endl; return false; } // output share backend memory only support CPU or GPU if (option_.device == Device::IPU) { copy_to_fd = true; } RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN for (size_t i = 0; i < inputs.size(); ++i) { auto handle = predictor_->GetInputHandle(inputs[i].name); ShareTensorFromFDTensor(handle.get(), inputs[i]); } // prebinded output only support for GPU // if (!copy_to_fd) { // for (size_t i = 0; i < (*outputs).size(); ++i) { // auto output_name = (*outputs)[i].name; // // if a output is not prebinded, // // the name of output is expected to be empty. // // We skip here // if (output_name.empty()) { // continue; // } // // Record the prebinded output_name. // // Those outputs do not need PaddleTensorToFDTensor // // after predictor_.Run() // auto handle = predictor_->GetOutputHandle(output_name); // ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]); // } // } RUNTIME_PROFILE_LOOP_BEGIN(1) predictor_->Run(); RUNTIME_PROFILE_LOOP_END outputs->resize(outputs_desc_.size()); for (size_t i = 0; i < outputs_desc_.size(); ++i) { auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name); if (copy_to_fd) { (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory; } PaddleTensorToFDTensor(handle, &((*outputs)[i]), copy_to_fd); } RUNTIME_PROFILE_LOOP_H2D_D2H_END return true; } std::unique_ptr PaddleBackend::Clone(RuntimeOption& runtime_option, void* stream, int device_id) { std::unique_ptr new_backend = utils::make_unique(); auto casted_backend = dynamic_cast(new_backend.get()); if (device_id > 0 && (option_.device == Device::GPU) && device_id != option_.device_id) { auto clone_option = option_; clone_option.device_id = device_id; clone_option.external_stream_ = stream; FDASSERT(casted_backend->InitFromPaddle( runtime_option.model_file, runtime_option.params_file, runtime_option.model_from_memory_, clone_option), "Clone model from Paddle failed while initialize PaddleBackend."); FDWARNING << "The target device id:" << device_id << " is different from current device id:" << option_.device_id << ", cannot share memory with current engine." << std::endl; return new_backend; } casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end()); casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end()); casted_backend->predictor_ = std::move(predictor_->Clone(stream)); return new_backend; } void PaddleBackend::SetTRTDynamicShapeToConfig( const PaddleBackendOption& option) { std::map> max_shape; std::map> min_shape; std::map> opt_shape; GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); if (min_shape.size() > 0) { FDINFO << "Start setting trt dynamic shape." << std::endl; config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape); FDINFO << "Finish setting trt dynamic shape." << std::endl; } } void PaddleBackend::GetDynamicShapeFromOption( const PaddleBackendOption& option, std::map>* max_shape, std::map>* min_shape, std::map>* opt_shape) const { auto print_shape = [](const std::vector& shape) -> std::string { std::ostringstream oss; oss << "["; for (int i = 0; i < shape.size(); ++i) { oss << shape[i]; if (i < shape.size() - 1) { oss << ", "; } } oss << "]"; return oss.str(); }; for (const auto& item : option.trt_option.min_shape) { auto max_iter = option.trt_option.max_shape.find(item.first); auto opt_iter = option.trt_option.opt_shape.find(item.first); FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str()); FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str()); (*max_shape)[item.first].assign(max_iter->second.begin(), max_iter->second.end()); (*opt_shape)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end()); (*min_shape)[item.first].assign(item.second.begin(), item.second.end()); FDINFO << item.first << ": the max shape = " << print_shape(max_iter->second) << ", the min shape = " << print_shape(item.second) << ", the opt shape = " << print_shape(opt_iter->second) << std::endl; } } void PaddleBackend::GetInputDataFromOption( const PaddleBackendOption& option, std::map>* max_input_data, std::map>* min_input_data, std::map>* opt_input_data) const { for (const auto& item : option.trt_option.min_input_data) { auto max_iter = option.trt_option.max_input_data.find(item.first); auto opt_iter = option.trt_option.opt_input_data.find(item.first); FDASSERT(max_iter != option.trt_option.max_input_data.end(), "Cannot find %s in TrtBackendOption::min_input_data.", item.first.c_str()); FDASSERT(opt_iter != option.trt_option.opt_input_data.end(), "Cannot find %s in TrtBackendOption::opt_input_data.", item.first.c_str()); (*max_input_data)[item.first].assign(max_iter->second.begin(), max_iter->second.end()); (*opt_input_data)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end()); (*min_input_data)[item.first].assign(item.second.begin(), item.second.end()); } } void PaddleBackend::CollectShapeRun( paddle_infer::Predictor* predictor, const std::map>& shape, const std::map>& data) const { auto input_names = predictor->GetInputNames(); auto input_type = predictor->GetInputTypes(); for (const auto& name : input_names) { FDASSERT(shape.find(name) != shape.end() && input_type.find(name) != input_type.end(), "When collect_trt_shape is true, please define max/opt/min shape " "for model's input:[\"%s\"] by " "(C++)RuntimeOption.trt_option.SetShape/" "(Python)RuntimeOption.trt_option.set_shape.", name.c_str()); auto tensor = predictor->GetInputHandle(name); auto shape_value = shape.at(name); int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1, std::multiplies()); tensor->Reshape(shape_value); if (data.find(name) != data.end()) { FDASSERT(data.at(name).size() == shape_num, "The data num and accumulate of shape must be equal for input: " "[\"%s\"], " " When Use the (C++)RuntimeOption.trt_option.SetInputData/ " " (Python)RuntimeOption.trt_option.set_input_data/", name.c_str()); } auto dtype = input_type[name]; switch (dtype) { case paddle_infer::DataType::FLOAT32: { if (data.find(name) != data.end()) { tensor->CopyFromCpu(data.at(name).data()); } else { std::vector input_data(shape_num, 1.0); tensor->CopyFromCpu(input_data.data()); } break; } case paddle_infer::DataType::INT32: { if (data.find(name) != data.end()) { std::vector input_data(data.at(name).begin(), data.at(name).end()); tensor->CopyFromCpu(input_data.data()); } else { std::vector input_data(shape_num, 1); tensor->CopyFromCpu(input_data.data()); } break; } case paddle_infer::DataType::INT64: { if (data.find(name) != data.end()) { std::vector input_data(data.at(name).begin(), data.at(name).end()); tensor->CopyFromCpu(input_data.data()); } else { std::vector input_data(shape_num, 1); tensor->CopyFromCpu(input_data.data()); } break; } default: { FDASSERT(false, "Input data Paddle backend only supports " "FP32/INT32/INT64 currently."); break; } } } predictor->Run(); } } // namespace fastdeploy