mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
Remove Paddle Reader (#1813)
* Remove Paddle Reader * support pp-infer c++14 * disable trt cache --------- Co-authored-by: wang-xinyu <wangxinyu_es@163.com>
This commit is contained in:
@@ -36,7 +36,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake)
|
|||||||
|
|
||||||
# Set C++11 as standard for the whole project
|
# Set C++11 as standard for the whole project
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
|
set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
|
||||||
if(NEED_ABI0)
|
if(NEED_ABI0)
|
||||||
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
|
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
|
||||||
|
@@ -43,6 +43,7 @@ if(PADDLEINFERENCE_DIRECTORY)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
include_directories(${PADDLEINFERENCE_INC_DIR})
|
include_directories(${PADDLEINFERENCE_INC_DIR})
|
||||||
|
include_directories(${PADDLEINFERENCE_DIRECTORY})
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
set(PADDLEINFERENCE_COMPILE_LIB
|
set(PADDLEINFERENCE_COMPILE_LIB
|
||||||
"${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib"
|
"${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/paddle_inference.lib"
|
||||||
|
@@ -58,6 +58,8 @@ struct PaddleBackendOption {
|
|||||||
bool enable_memory_optimize = true;
|
bool enable_memory_optimize = true;
|
||||||
/// Whether enable ir debug, default false
|
/// Whether enable ir debug, default false
|
||||||
bool switch_ir_debug = false;
|
bool switch_ir_debug = false;
|
||||||
|
/// Whether the load model is quantized model
|
||||||
|
bool is_quantize_model = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||||
|
@@ -51,6 +51,8 @@ void BindPaddleOption(pybind11::module& m) {
|
|||||||
&PaddleBackendOption::mkldnn_cache_size)
|
&PaddleBackendOption::mkldnn_cache_size)
|
||||||
.def_readwrite("gpu_mem_init_size",
|
.def_readwrite("gpu_mem_init_size",
|
||||||
&PaddleBackendOption::gpu_mem_init_size)
|
&PaddleBackendOption::gpu_mem_init_size)
|
||||||
|
.def_readwrite("is_quantize_model",
|
||||||
|
&PaddleBackendOption::is_quantize_model)
|
||||||
.def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps)
|
.def("disable_trt_ops", &PaddleBackendOption::DisableTrtOps)
|
||||||
.def("delete_pass", &PaddleBackendOption::DeletePass)
|
.def("delete_pass", &PaddleBackendOption::DeletePass)
|
||||||
.def("set_ipu_config", &PaddleBackendOption::SetIpuConfig);
|
.def("set_ipu_config", &PaddleBackendOption::SetIpuConfig);
|
||||||
|
@@ -148,11 +148,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
|
|||||||
FDASSERT(ReadBinaryFromFile(model, &model_content),
|
FDASSERT(ReadBinaryFromFile(model, &model_content),
|
||||||
"Failed to read file %s.", model.c_str());
|
"Failed to read file %s.", model.c_str());
|
||||||
}
|
}
|
||||||
auto reader =
|
|
||||||
paddle2onnx::PaddleReader(model_content.c_str(), model_content.size());
|
|
||||||
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
|
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to
|
||||||
// int8 mode
|
// int8 mode
|
||||||
if (reader.is_quantize_model) {
|
if (option.is_quantize_model) {
|
||||||
if (option.device == Device::GPU) {
|
if (option.device == Device::GPU) {
|
||||||
FDWARNING << "The loaded model is a quantized model, while inference on "
|
FDWARNING << "The loaded model is a quantized model, while inference on "
|
||||||
"GPU, please use TensorRT backend to get better performance."
|
"GPU, please use TensorRT backend to get better performance."
|
||||||
@@ -184,25 +182,25 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inputs_desc_.resize(reader.num_inputs);
|
// inputs_desc_.resize(reader.num_inputs);
|
||||||
for (int i = 0; i < reader.num_inputs; ++i) {
|
// for (int i = 0; i < reader.num_inputs; ++i) {
|
||||||
std::string name(reader.inputs[i].name);
|
// std::string name(reader.inputs[i].name);
|
||||||
std::vector<int64_t> shape(reader.inputs[i].shape,
|
// std::vector<int64_t> shape(reader.inputs[i].shape,
|
||||||
reader.inputs[i].shape + reader.inputs[i].rank);
|
// reader.inputs[i].shape + reader.inputs[i].rank);
|
||||||
inputs_desc_[i].name = name;
|
// inputs_desc_[i].name = name;
|
||||||
inputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
// inputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||||
inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
|
// inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
|
||||||
}
|
// }
|
||||||
outputs_desc_.resize(reader.num_outputs);
|
// outputs_desc_.resize(reader.num_outputs);
|
||||||
for (int i = 0; i < reader.num_outputs; ++i) {
|
// for (int i = 0; i < reader.num_outputs; ++i) {
|
||||||
std::string name(reader.outputs[i].name);
|
// std::string name(reader.outputs[i].name);
|
||||||
std::vector<int64_t> shape(
|
// std::vector<int64_t> shape(
|
||||||
reader.outputs[i].shape,
|
// reader.outputs[i].shape,
|
||||||
reader.outputs[i].shape + reader.outputs[i].rank);
|
// reader.outputs[i].shape + reader.outputs[i].rank);
|
||||||
outputs_desc_[i].name = name;
|
// outputs_desc_[i].name = name;
|
||||||
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
// outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||||
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
|
// outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
|
||||||
}
|
// }
|
||||||
if (option.collect_trt_shape) {
|
if (option.collect_trt_shape) {
|
||||||
// Set the shape info file.
|
// Set the shape info file.
|
||||||
std::string curr_model_dir = "./";
|
std::string curr_model_dir = "./";
|
||||||
@@ -253,6 +251,35 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
predictor_ = paddle_infer::CreatePredictor(config_);
|
predictor_ = paddle_infer::CreatePredictor(config_);
|
||||||
|
|
||||||
|
auto input_names = predictor_->GetInputNames();
|
||||||
|
auto output_names = predictor_->GetOutputNames();
|
||||||
|
auto input_dtypes = predictor_->GetInputTypes();
|
||||||
|
auto output_dtypes = predictor_->GetOutputTypes();
|
||||||
|
auto input_shapes = predictor_->GetInputTensorShape();
|
||||||
|
auto output_shapes = predictor_->GetOutputTensorShape();
|
||||||
|
|
||||||
|
inputs_desc_.resize(input_names.size());
|
||||||
|
for (int i = 0; i < input_names.size(); ++i) {
|
||||||
|
inputs_desc_[i].name = input_names[i];
|
||||||
|
auto iter = input_shapes.find(inputs_desc_[i].name);
|
||||||
|
FDASSERT(iter != input_shapes.end(), "Cannot find shape for input %s.", inputs_desc_[i].name.c_str());
|
||||||
|
inputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
|
||||||
|
auto iter1 = input_dtypes.find(inputs_desc_[i].name);
|
||||||
|
FDASSERT(iter1 != input_dtypes.end(), "Cannot find data type for input %s.", inputs_desc_[i].name.c_str());
|
||||||
|
inputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
|
||||||
|
}
|
||||||
|
outputs_desc_.resize(output_names.size());
|
||||||
|
for (int i = 0; i < output_names.size(); ++i) {
|
||||||
|
outputs_desc_[i].name = output_names[i];
|
||||||
|
auto iter = output_shapes.find(outputs_desc_[i].name);
|
||||||
|
FDASSERT(iter != output_shapes.end(), "Cannot find shape for output %s.", outputs_desc_[i].name.c_str());
|
||||||
|
outputs_desc_[i].shape.assign(iter->second.begin(), iter->second.end());
|
||||||
|
auto iter1 = output_dtypes.find(outputs_desc_[i].name);
|
||||||
|
FDASSERT(iter1 != output_dtypes.end(), "Cannot find data type for output %s.", outputs_desc_[i].name.c_str());
|
||||||
|
outputs_desc_[i].dtype = PaddleDataTypeToFD(iter1->second);
|
||||||
|
}
|
||||||
|
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -26,6 +26,7 @@ def process_paddle_inference(paddle_inference_so_file):
|
|||||||
rpaths = [
|
rpaths = [
|
||||||
"$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/",
|
"$ORIGIN", "$ORIGIN/../../third_party/install/mkldnn/lib/",
|
||||||
"$ORIGIN/../../third_party/install/mklml/lib/",
|
"$ORIGIN/../../third_party/install/mklml/lib/",
|
||||||
|
"$ORIGIN/../../third_party/install/fdmodel/lib/",
|
||||||
"$ORIGIN/../../../tensorrt/lib/"
|
"$ORIGIN/../../../tensorrt/lib/"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -314,7 +314,9 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
|||||||
// value_string,
|
// value_string,
|
||||||
// &runtime_options_->trt_max_workspace_size));
|
// &runtime_options_->trt_max_workspace_size));
|
||||||
} else if (param_key == "cache_file") {
|
} else if (param_key == "cache_file") {
|
||||||
runtime_options_->trt_option.serialize_file = value_string;
|
LOG_MESSAGE(TRITONSERVER_LOG_INFO,
|
||||||
|
"Skipping setting TRT cache file.");
|
||||||
|
// runtime_options_->trt_option.serialize_file = value_string;
|
||||||
} else if (param_key == "use_paddle") {
|
} else if (param_key == "use_paddle") {
|
||||||
runtime_options_->EnablePaddleToTrt();
|
runtime_options_->EnablePaddleToTrt();
|
||||||
} else if (param_key == "use_paddle_log") {
|
} else if (param_key == "use_paddle_log") {
|
||||||
|
Reference in New Issue
Block a user