From f3d44785c45d3b4c53e281df7085767c5acbf3a5 Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 20 Apr 2023 21:12:43 +0800 Subject: [PATCH] Remove Paddle Reader (#1813) * Remove Paddle Reader * support pp-infer c++14 * disable trt cache --------- Co-authored-by: wang-xinyu --- CMakeLists.txt | 2 +- cmake/paddle_inference.cmake | 1 + fastdeploy/runtime/backends/paddle/option.h | 2 + .../runtime/backends/paddle/option_pybind.cc | 2 + .../runtime/backends/paddle/paddle_backend.cc | 71 +++++++++++++------ scripts/patch_paddle_inference.py | 1 + serving/src/fastdeploy_runtime.cc | 4 +- 7 files changed, 59 insertions(+), 24 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 332f382da..9ed16259e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) # Set C++11 as standard for the whole project if(NOT MSVC) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") if(NEED_ABI0) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index 1aad4dfda..1e9e90081 100755 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -43,6 +43,7 @@ if(PADDLEINFERENCE_DIRECTORY) endif() include_directories(${PADDLEINFERENCE_INC_DIR}) +include_directories(${PADDLEINFERENCE_DIRECTORY}) if(WIN32) set(PADDLEINFERENCE_COMPILE_LIB "${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib" diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h index 134fc7ef0..80380e0e2 100755 --- a/fastdeploy/runtime/backends/paddle/option.h +++ b/fastdeploy/runtime/backends/paddle/option.h @@ -58,6 +58,8 @@ struct PaddleBackendOption { bool enable_memory_optimize = true; /// Whether enable ir debug, default false bool switch_ir_debug = false; + /// Whether the load model is quantized model + bool is_quantize_model = false; /* * @brief IPU option, this will configure the IPU hardware, if inference model in IPU diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc index a08c9ab9c..3590cb866 100755 --- a/fastdeploy/runtime/backends/paddle/option_pybind.cc +++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc @@ -51,6 +51,8 @@ void BindPaddleOption(pybind11::module& m) { &PaddleBackendOption::mkldnn_cache_size) .def_readwrite("gpu_mem_init_size", &PaddleBackendOption::gpu_mem_init_size) + .def_readwrite("is_quantize_model", + &PaddleBackendOption::is_quantize_model) .def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps) .def("delete_pass", &PaddleBackendOption::DeletePass) .def("set_ipu_config", &PaddleBackendOption::SetIpuConfig); diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index 99e2ab117..f8ee9b31c 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -148,11 +148,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model, FDASSERT(ReadBinaryFromFile(model, &model_content), "Failed to read file %s.", model.c_str()); } - auto reader = - paddle2onnx::PaddleReader(model_content.c_str(), model_content.size()); // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to // int8 mode - if (reader.is_quantize_model) { + if (option.is_quantize_model) { if (option.device == Device::GPU) { FDWARNING << "The loaded model is a quantized model, while inference on " "GPU, please use TensorRT backend to get better performance." @@ -184,25 +182,25 @@ bool PaddleBackend::InitFromPaddle(const std::string& model, } } - inputs_desc_.resize(reader.num_inputs); - for (int i = 0; i < reader.num_inputs; ++i) { - std::string name(reader.inputs[i].name); - std::vector shape(reader.inputs[i].shape, - reader.inputs[i].shape + reader.inputs[i].rank); - inputs_desc_[i].name = name; - inputs_desc_[i].shape.assign(shape.begin(), shape.end()); - inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype); - } - outputs_desc_.resize(reader.num_outputs); - for (int i = 0; i < reader.num_outputs; ++i) { - std::string name(reader.outputs[i].name); - std::vector shape( - reader.outputs[i].shape, - reader.outputs[i].shape + reader.outputs[i].rank); - outputs_desc_[i].name = name; - outputs_desc_[i].shape.assign(shape.begin(), shape.end()); - outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype); - } +// inputs_desc_.resize(reader.num_inputs); +// for (int i = 0; i < reader.num_inputs; ++i) { +// std::string name(reader.inputs[i].name); +// std::vector shape(reader.inputs[i].shape, +// reader.inputs[i].shape + reader.inputs[i].rank); +// inputs_desc_[i].name = name; +// inputs_desc_[i].shape.assign(shape.begin(), shape.end()); +// inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype); +// } +// outputs_desc_.resize(reader.num_outputs); +// for (int i = 0; i < reader.num_outputs; ++i) { +// std::string name(reader.outputs[i].name); +// std::vector shape( +// reader.outputs[i].shape, +// reader.outputs[i].shape + reader.outputs[i].rank); +// outputs_desc_[i].name = name; +// outputs_desc_[i].shape.assign(shape.begin(), shape.end()); +// outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype); +// } if (option.collect_trt_shape) { // Set the shape info file. std::string curr_model_dir = "./"; @@ -253,6 +251,35 @@ bool PaddleBackend::InitFromPaddle(const std::string& model, } } predictor_ = paddle_infer::CreatePredictor(config_); + + auto input_names = predictor_->GetInputNames(); + auto output_names = predictor_->GetOutputNames(); + auto input_dtypes = predictor_->GetInputTypes(); + auto output_dtypes = predictor_->GetOutputTypes(); + auto input_shapes = predictor_->GetInputTensorShape(); + auto output_shapes = predictor_->GetOutputTensorShape(); + + inputs_desc_.resize(input_names.size()); + for (int i = 0; i < input_names.size(); ++i) { + inputs_desc_[i].name = input_names[i]; + auto iter = input_shapes.find(inputs_desc_[i].name); + FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.", inputs_desc_[i].name.c_str()); + inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); + auto iter1 = input_dtypes.find(inputs_desc_[i].name); + FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.", inputs_desc_[i].name.c_str()); + inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); + } + outputs_desc_.resize(output_names.size()); + for (int i = 0; i < output_names.size(); ++i) { + outputs_desc_[i].name = output_names[i]; + auto iter = output_shapes.find(outputs_desc_[i].name); + FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.", outputs_desc_[i].name.c_str()); + outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end()); + auto iter1 = output_dtypes.find(outputs_desc_[i].name); + FDASSERT(iter1 != output_dtypes.end(), "Cannot find data type for output %s.", outputs_desc_[i].name.c_str()); + outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second); + } + initialized_ = true; return true; } diff --git a/scripts/patch_paddle_inference.py b/scripts/patch_paddle_inference.py index e85071ffd..9eeabf37f 100644 --- a/scripts/patch_paddle_inference.py +++ b/scripts/patch_paddle_inference.py @@ -26,6 +26,7 @@ def process_paddle_inference(paddle_inference_so_file): rpaths = [ "$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/", "$ORIGIN/../../third_party/install/mklml/lib/", + "$ORIGIN/../../third_party/install/fdmodel/lib/", "$ORIGIN/../../../tensorrt/lib/" ] diff --git a/serving/src/fastdeploy_runtime.cc b/serving/src/fastdeploy_runtime.cc index 03536f58b..bc9c89252 100644 --- a/serving/src/fastdeploy_runtime.cc +++ b/serving/src/fastdeploy_runtime.cc @@ -314,7 +314,9 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) // value_string, // &runtime_options_->trt_max_workspace_size)); } else if (param_key == "cache_file") { - runtime_options_->trt_option.serialize_file = value_string; + LOG_MESSAGE(TRITONSERVER_LOG_INFO, + "Skipping setting TRT cache file."); + // runtime_options_->trt_option.serialize_file = value_string; } else if (param_key == "use_paddle") { runtime_options_->EnablePaddleToTrt(); } else if (param_key == "use_paddle_log") {