From dccb737d8d8159995b86c5ff47d675c7a301920d Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Thu, 20 Oct 2022 17:02:56 +0800 Subject: [PATCH] [Backend] Add collect shape for pp-trt backend (#372) * Add collect_shape attr * add EnableTunedTensorRtDynamicShape * Add collect shape python api * Fix quant model not set trt dynamic shape * Add shape info print * Fix shape print * Use CopyFromCpu instead of ShareExternalData * Add ENABLE_TRT_BACKEND macro * Add shared data with --- fastdeploy/backends/paddle/paddle_backend.cc | 138 ++++++++++++++++--- fastdeploy/backends/paddle/paddle_backend.h | 10 ++ fastdeploy/backends/paddle/util.cc | 18 ++- fastdeploy/pybind/runtime.cc | 2 + fastdeploy/runtime.cc | 9 ++ fastdeploy/runtime.h | 12 ++ fastdeploy/utils/path.h | 74 ++++++++++ python/fastdeploy/runtime.py | 6 + scripts/patch_paddle_inference.py | 2 +- 9 files changed, 251 insertions(+), 20 deletions(-) create mode 100644 fastdeploy/utils/path.h diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc index ff2e76fb0..674a37954 100644 --- a/fastdeploy/backends/paddle/paddle_backend.cc +++ b/fastdeploy/backends/paddle/paddle_backend.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "fastdeploy/backends/paddle/paddle_backend.h" +#include "fastdeploy/utils/path.h" +#include namespace fastdeploy { @@ -31,21 +33,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { use_static = true; } config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, precision, use_static); - std::map> max_shape; - std::map> min_shape; - std::map> opt_shape; - for (const auto& item : option.trt_option.min_shape) { - auto max_iter = option.trt_option.max_shape.find(item.first); - auto opt_iter = option.trt_option.opt_shape.find(item.first); - FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str()); - FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str()); - max_shape[item.first].assign(max_iter->second.begin(), max_iter->second.end()); - opt_shape[item.first].assign(opt_iter->second.begin(), opt_iter->second.end()); - min_shape[item.first].assign(item.second.begin(), item.second.end()); - } - if (min_shape.size() > 0) { - config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape); - } + SetTRTDynamicShapeToConfig(option); #else FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl; #endif @@ -97,6 +85,17 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, if (reader.is_quantize_model) { if (option.use_gpu) { FDWARNING << "The loaded model is a quantized model, while inference on GPU, please use TensorRT backend to get better performance." << std::endl; + if (option.enable_trt) { +#ifdef ENABLE_TRT_BACKEND + bool use_static = false; + if (option.trt_option.serialize_file != "") { + FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl; + use_static = true; + } + config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, paddle_infer::PrecisionType::kInt8, use_static, false); + SetTRTDynamicShapeToConfig(option); +#endif + } } if (option.enable_mkldnn) { config_.EnableMkldnnInt8(); @@ -123,7 +122,31 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, outputs_desc_[i].shape.assign(shape.begin(), shape.end()); outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype); } - +#ifdef ENABLE_TRT_BACKEND + if (option.collect_shape) { + // Set the shape info file. + auto curr_model_dir = GetDirFromPath(model_file); + std::string shape_range_info = PathJoin(curr_model_dir, "shape_range_info.pbtxt"); + if (!CheckFileExists(shape_range_info)) { + FDINFO << "Start generating shape range info file." << std::endl; + paddle_infer::Config analysis_config; + analysis_config.SetModel(model_file, params_file); + analysis_config.CollectShapeRangeInfo(shape_range_info); + auto predictor_tmp = paddle_infer::CreatePredictor(analysis_config); + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); + // Need to run once to get the shape range info file. + CollectShapeRun(predictor_tmp.get(), max_shape); + CollectShapeRun(predictor_tmp.get(), min_shape); + CollectShapeRun(predictor_tmp.get(), opt_shape); + FDINFO << "Finish generating shape range info file." << std::endl; + } + FDINFO << "Start loading shape range info file "<< shape_range_info << " to set TensorRT dynamic shape." << std::endl; + config_.EnableTunedTensorRtDynamicShape(shape_range_info, false); + } +#endif predictor_ = paddle_infer::CreatePredictor(config_); initialized_ = true; return true; @@ -172,4 +195,87 @@ bool PaddleBackend::Infer(std::vector& inputs, return true; } +#ifdef ENABLE_TRT_BACKEND +void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) { + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + GetDynamicShapeFromOption(option, &max_shape, &min_shape, &opt_shape); + FDINFO << "Start setting trt dynamic shape." << std::endl; + if (min_shape.size() > 0) { + config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape); + } + FDINFO << "Finish setting trt dynamic shape." << std::endl; +} + +void PaddleBackend::GetDynamicShapeFromOption(const PaddleBackendOption& option, + std::map>* max_shape, + std::map>* min_shape, + std::map>* opt_shape) const { + auto print_shape = [](const std::vector& shape) -> std::string { + std::ostringstream oss; + oss << "["; + for (int i = 0; i < shape.size(); ++i) { + oss << shape[i]; + if (i < shape.size() - 1) { + oss << ", "; + } + } + oss << "]"; + return oss.str(); + }; + for (const auto& item : option.trt_option.min_shape) { + auto max_iter = option.trt_option.max_shape.find(item.first); + auto opt_iter = option.trt_option.opt_shape.find(item.first); + FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str()); + FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str()); + (*max_shape)[item.first].assign(max_iter->second.begin(), max_iter->second.end()); + (*opt_shape)[item.first].assign(opt_iter->second.begin(), opt_iter->second.end()); + (*min_shape)[item.first].assign(item.second.begin(), item.second.end()); + FDINFO << item.first << ": the max shape = " << print_shape(max_iter->second) + << ", the min shape = " << print_shape(item.second) + << ", the opt shape = " << print_shape(opt_iter->second) << std::endl; + } +} + +void PaddleBackend::CollectShapeRun(paddle_infer::Predictor* predictor, + const std::map>& shape) const { + auto input_names = predictor->GetInputNames(); + auto input_type = predictor->GetInputTypes(); + for(auto name : input_names) { + FDASSERT(shape.find(name) != shape.end() && input_type.find(name) != input_type.end(), + "Paddle Input name [%s] is not one of the trt dynamic shape.", name.c_str()); + auto tensor = predictor->GetInputHandle(name); + auto shape_value = shape.at(name); + int shape_num = std::accumulate(shape_value.begin(), shape_value.end(), 1, + std::multiplies()); + tensor->Reshape(shape_value); + auto dtype = input_type[name]; + switch (dtype) { + case paddle_infer::DataType::FLOAT32: { + std::vector input_data(shape_num, 1.0); + tensor->CopyFromCpu(input_data.data()); + break; + } + case paddle_infer::DataType::INT32: { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + break; + } + case paddle_infer::DataType::INT64: { + std::vector input_data(shape_num, 1); + tensor->CopyFromCpu(input_data.data()); + break; + } + default: { + FDASSERT(false, "Input data Paddle backend only supports FP32/INT32/INT64 currently."); + break; + } + } + } + predictor->Run(); +} +#endif + + } // namespace fastdeploy diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h index 0d59a8a33..78b939fea 100755 --- a/fastdeploy/backends/paddle/paddle_backend.h +++ b/fastdeploy/backends/paddle/paddle_backend.h @@ -44,6 +44,7 @@ struct PaddleBackendOption { bool enable_trt = false; #ifdef ENABLE_TRT_BACKEND TrtBackendOption trt_option; + bool collect_shape = false; #endif int mkldnn_cache_size = 1; @@ -95,6 +96,15 @@ class PaddleBackend : public BaseBackend { std::vector GetOutputInfos() override; private: +#ifdef ENABLE_TRT_BACKEND + void CollectShapeRun(paddle_infer::Predictor* predictor, + const std::map>& shape) const; + void GetDynamicShapeFromOption(const PaddleBackendOption& option, + std::map>* max_shape, + std::map>* min_shape, + std::map>* opt_shape) const; + void SetTRTDynamicShapeToConfig(const PaddleBackendOption& option); +#endif paddle_infer::Config config_; std::shared_ptr predictor_; std::vector inputs_desc_; diff --git a/fastdeploy/backends/paddle/util.cc b/fastdeploy/backends/paddle/util.cc index 498561791..216c788b2 100644 --- a/fastdeploy/backends/paddle/util.cc +++ b/fastdeploy/backends/paddle/util.cc @@ -29,16 +29,28 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, tensor->Reshape(shape); auto place = ConvertFDDeviceToPlace(fd_tensor.device); if (fd_tensor.dtype == FDDataType::FP32) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } return; } else if (fd_tensor.dtype == FDDataType::INT32) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } return; } else if (fd_tensor.dtype == FDDataType::INT64) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), + if (place == paddle_infer::PlaceType::kGPU) { + tensor->ShareExternalData(static_cast(fd_tensor.Data()), shape, place); + } else { + tensor->CopyFromCpu(static_cast(fd_tensor.Data())); + } return; } FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.", diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index c2e2df19e..6d8eb7804 100755 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -44,6 +44,8 @@ void BindRuntime(pybind11::module& m) { .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16) .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16) .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile) + .def("enable_paddle_trt_collect_shape", &RuntimeOption::EnablePaddleTrtCollectShape) + .def("disable_paddle_trt_collect_shape", &RuntimeOption::DisablePaddleTrtCollectShape) .def_readwrite("model_file", &RuntimeOption::model_file) .def_readwrite("params_file", &RuntimeOption::params_file) .def_readwrite("model_format", &RuntimeOption::model_format) diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 63ad57eb3..0877402d7 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -388,6 +388,14 @@ bool Runtime::Compile(std::vector>& prewarm_tensors, "ENABLE_POROS_BACKEND=ON."); #endif return true; +} + +void RuntimeOption::EnablePaddleTrtCollectShape() { + pd_collect_shape = true; +} + +void RuntimeOption::DisablePaddleTrtCollectShape() { + pd_collect_shape = false; } bool Runtime::Init(const RuntimeOption& _option) { @@ -498,6 +506,7 @@ void Runtime::CreatePaddleBackend() { #ifdef ENABLE_TRT_BACKEND if (pd_option.use_gpu && option.pd_enable_trt) { pd_option.enable_trt = true; + pd_option.collect_shape = option.pd_collect_shape; auto trt_option = TrtBackendOption(); trt_option.gpu_id = option.device_id; trt_option.enable_fp16 = option.trt_enable_fp16; diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 80979218c..32ad1615c 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -204,6 +204,17 @@ struct FASTDEPLOY_DECL RuntimeOption { */ void SetTrtCacheFile(const std::string& cache_file_path); + + /** + * @brief Enable to collect shape in paddle trt backend + */ + void EnablePaddleTrtCollectShape(); + + /** + * @brief Disable to collect shape in paddle trt backend + */ + void DisablePaddleTrtCollectShape(); + Backend backend = Backend::UNKNOWN; // for cpu inference and preprocess // default will let the backend choose their own default value @@ -225,6 +236,7 @@ struct FASTDEPLOY_DECL RuntimeOption { bool pd_enable_mkldnn = true; bool pd_enable_log_info = false; bool pd_enable_trt = false; + bool pd_collect_shape = false; int pd_mkldnn_cache_size = 1; std::vector pd_delete_pass_names; diff --git a/fastdeploy/utils/path.h b/fastdeploy/utils/path.h new file mode 100644 index 000000000..5c9ed7087 --- /dev/null +++ b/fastdeploy/utils/path.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#ifdef _MSC_VER +#define PATH_SEP "\\" +#else +#define PATH_SEP "/" +#endif + +namespace fastdeploy { + +inline std::string PathJoin(const std::vector& paths, + const std::string& sep = PATH_SEP) { + if (paths.size() == 1) { + return paths[0]; + } + std::string filepath = ""; + for (const auto& path : paths) { + if (filepath == "") { + filepath += path; + continue; + } + if (path[0] == sep[0] || filepath.back() == sep[0]) { + filepath += path; + } else { + filepath += sep + path; + } + } + return filepath; +} + +inline std::string PathJoin(const std::string& folder, + const std::string& filename, + const std::string& sep = PATH_SEP) { + return PathJoin(std::vector{folder, filename}, sep); +} + +inline std::string GetDirFromPath(const std::string& path) { + auto pos = path.find_last_of(PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + // The root path in UNIX systems + if (pos == 0) { + return "/"; + } + return path.substr(0, pos); +} + +inline bool CheckFileExists(const std::string& path) { + std::fstream fin(path, std::ios::in); + if (!fin) { + return false; + } + return true; +} + +} // namespace fastdeploy diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index c576369ee..90e64d400 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -329,6 +329,12 @@ class RuntimeOption: """ return self._option.set_trt_max_workspace_size(trt_max_workspace_size) + def enable_paddle_trt_collect_shape(self): + return self._option.enable_paddle_trt_collect_shape() + + def disable_paddle_trt_collect_shape(self): + return self._option.disable_paddle_trt_collect_shape() + def __repr__(self): attrs = dir(self._option) message = "RuntimeOption(\n" diff --git a/scripts/patch_paddle_inference.py b/scripts/patch_paddle_inference.py index 2ee0dee9b..f46ab2491 100644 --- a/scripts/patch_paddle_inference.py +++ b/scripts/patch_paddle_inference.py @@ -26,7 +26,7 @@ def process_paddle_inference(paddle_inference_so_file): rpaths = [ "$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/", "$ORIGIN/../../third_party/install/mklml/lib/", - "$ORIGIN/../../../tensorrt/lib" + "$ORIGIN/../../../tensorrt/lib/" ] patchelf_exe = os.getenv("PATCHELF_EXE", "patchelf")