mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 12:52:29 +08:00

* [cmake] upgrade windows paddle inference -> 2.5.0 * [cmake] upgrade windows paddle inference -> 2.5.0 * fix paddle custom ops bug on windows * [Backend] refactor paddle custom ops
405 lines
16 KiB
C++
405 lines
16 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#include "fastdeploy/function/functions.h"
|
|
#include "flags.h"
|
|
#include "macros.h"
|
|
#include "option.h"
|
|
|
|
namespace vision = fastdeploy::vision;
|
|
namespace benchmark = fastdeploy::benchmark;
|
|
|
|
DEFINE_string(shapes, "1,3,224,224",
|
|
"Required, set input shape for model."
|
|
"default 1,3,224,224");
|
|
DEFINE_string(names, "DEFAULT", "Required, set input names for model.");
|
|
DEFINE_string(dtypes, "FP32",
|
|
"Required, set input dtypes for model."
|
|
"default FP32.");
|
|
DEFINE_string(trt_shapes, "1,3,224,224:1,3,224,224:1,3,224,224",
|
|
"Optional, set min/opt/max shape for trt/paddle_trt."
|
|
"default 1,3,224,224:1,3,224,224:1,3,224,224");
|
|
DEFINE_int32(batch, 1,
|
|
"Optional, set trt max batch size, "
|
|
"default 1");
|
|
DEFINE_bool(dump, false,
|
|
"Optional, whether to dump output tensors, "
|
|
"default false.");
|
|
DEFINE_bool(info, false,
|
|
"Optional, only check the input infos of model."
|
|
"default false.");
|
|
DEFINE_bool(diff, false,
|
|
"Optional, check the diff between two tensors."
|
|
"default false.");
|
|
DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt",
|
|
"Optional, the paths to dumped tensors, "
|
|
"default tensor_a.txt:tensor_b.txt");
|
|
DEFINE_bool(mem, false,
|
|
"Optional, whether to force to collect memory info, "
|
|
"default false.");
|
|
DEFINE_int32(interval, -1,
|
|
"Optional, sampling interval for collect memory info, "
|
|
"default false.");
|
|
DEFINE_string(model_format, "PADDLE",
|
|
"Optional, set specific model format,"
|
|
"eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO"
|
|
"default PADDLE.");
|
|
DEFINE_bool(disable_mkldnn, false,
|
|
"Optional, disable mkldnn for paddle backend. "
|
|
"default false.");
|
|
DEFINE_string(optimized_model_dir, "",
|
|
"Optional, set optimized model dir for lite."
|
|
"eg: model.opt.nb, "
|
|
"default ''");
|
|
DEFINE_bool(collect_trt_shape_by_device, false,
|
|
"Optional, whether collect trt shape by device. "
|
|
"default false.");
|
|
DEFINE_double(custom_tensor_value, 1.0,
|
|
"Optional, set the value for fd tensor, "
|
|
"default 1.0");
|
|
DEFINE_bool(collect_trt_shape_by_custom_tensor_value, false,
|
|
"Optional, whether collect trt shape by custom tensor value. "
|
|
"default false.");
|
|
|
|
#if defined(ENABLE_BENCHMARK)
|
|
static std::vector<int64_t> GetInt64Shape(const std::vector<int>& shape) {
|
|
std::vector<int64_t> new_shape;
|
|
new_shape.resize(shape.size());
|
|
for (int i = 0; i < shape.size(); ++i) {
|
|
new_shape[i] = static_cast<int64_t>(shape[i]);
|
|
}
|
|
return new_shape;
|
|
}
|
|
|
|
static fastdeploy::ModelFormat GetModelFormat(const std::string& model_format) {
|
|
if (model_format == "PADDLE") {
|
|
return fastdeploy::ModelFormat::PADDLE;
|
|
} else if (model_format == "ONNX") {
|
|
return fastdeploy::ModelFormat::ONNX;
|
|
} else if (model_format == "RKNN") {
|
|
return fastdeploy::ModelFormat::RKNN;
|
|
} else if (model_format == "TORCHSCRIPT") {
|
|
return fastdeploy::ModelFormat::TORCHSCRIPT;
|
|
} else if (model_format == "SOPHGO") {
|
|
return fastdeploy::ModelFormat::SOPHGO;
|
|
} else {
|
|
return fastdeploy::ModelFormat::PADDLE;
|
|
}
|
|
}
|
|
|
|
static void CheckTensorDiff(int argc, char* argv[]) {
|
|
google::ParseCommandLineFlags(&argc, &argv, true);
|
|
std::cout << "Check tensor diff ..." << std::endl;
|
|
std::vector<std::string> tensor_paths =
|
|
benchmark::ResultManager::SplitStr(FLAGS_tensors);
|
|
assert(tensor_paths.size() == 2);
|
|
fastdeploy::FDTensor tensor_a, tensor_b;
|
|
benchmark::ResultManager::LoadFDTensor(&tensor_a, tensor_paths[0]);
|
|
benchmark::ResultManager::LoadFDTensor(&tensor_b, tensor_paths[1]);
|
|
auto tensor_diff =
|
|
benchmark::ResultManager::CalculateDiffStatis(tensor_a, tensor_b);
|
|
std::cout << "Tensor diff: mean=" << tensor_diff.data.mean
|
|
<< ", max=" << tensor_diff.data.max
|
|
<< ", min=" << tensor_diff.data.min << std::endl;
|
|
}
|
|
|
|
static void RuntimeProfiling(int argc, char* argv[]) {
|
|
// Init runtime option
|
|
auto option = fastdeploy::RuntimeOption();
|
|
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
|
return;
|
|
}
|
|
if (FLAGS_disable_mkldnn) {
|
|
option.paddle_infer_option.enable_mkldnn = false;
|
|
}
|
|
std::unordered_map<std::string, std::string> config_info;
|
|
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
|
&config_info);
|
|
UpdateBaseCustomFlags(config_info); // see flags.h
|
|
// Init log recorder
|
|
std::stringstream ss;
|
|
ss.precision(6);
|
|
|
|
// Memory resource moniter
|
|
int sampling_interval = FLAGS_interval >= 1
|
|
? FLAGS_interval
|
|
: std::stoi(config_info["sampling_interval"]);
|
|
|
|
benchmark::ResourceUsageMonitor resource_moniter(
|
|
sampling_interval, std::stoi(config_info["device_id"]));
|
|
|
|
// Check model path and model format
|
|
std::string model_name, params_name, config_name;
|
|
std::string model_file, params_file;
|
|
auto model_format = fastdeploy::ModelFormat::PADDLE;
|
|
if (FLAGS_model_file != "UNKNOWN") {
|
|
// Set model file/param/format via command line
|
|
if (FLAGS_model != "") {
|
|
model_file = FLAGS_model + sep + FLAGS_model_file;
|
|
params_file = FLAGS_model + sep + FLAGS_params_file;
|
|
} else {
|
|
model_file = FLAGS_model_file;
|
|
params_file = FLAGS_params_file;
|
|
}
|
|
model_format = GetModelFormat(FLAGS_model_format);
|
|
if (model_format == fastdeploy::ModelFormat::PADDLE &&
|
|
FLAGS_params_file == "") {
|
|
if (config_info["backend"] != "lite") {
|
|
std::cout << "[ERROR] params_file can not be empty for PADDLE"
|
|
<< " format, Please, set your custom params_file manually."
|
|
<< std::endl;
|
|
return;
|
|
} else {
|
|
std::cout << "[INFO] Will using the lite light api for: " << model_file
|
|
<< std::endl;
|
|
}
|
|
}
|
|
} else {
|
|
// Set model file/param/format via model dir (only support
|
|
// for Paddle model format now)
|
|
if (!UpdateModelResourceName(&model_name, ¶ms_name, &config_name,
|
|
&model_format, config_info, false)) {
|
|
return;
|
|
}
|
|
model_file = FLAGS_model + sep + model_name;
|
|
params_file = FLAGS_model + sep + params_name;
|
|
}
|
|
|
|
option.SetModelPath(model_file, params_file, model_format);
|
|
|
|
// Set opt model dir
|
|
if (config_info["backend"] == "lite") {
|
|
if (FLAGS_optimized_model_dir != "") {
|
|
option.paddle_lite_option.optimized_model_dir = FLAGS_optimized_model_dir;
|
|
} else {
|
|
option.paddle_lite_option.optimized_model_dir = FLAGS_model;
|
|
}
|
|
}
|
|
|
|
// Get input shapes/names/dtypes
|
|
std::vector<std::vector<int32_t>> input_shapes =
|
|
benchmark::ResultManager::GetInputShapes(FLAGS_shapes);
|
|
std::vector<std::string> input_names =
|
|
benchmark::ResultManager::GetInputNames(FLAGS_names);
|
|
std::vector<fastdeploy::FDDataType> input_dtypes =
|
|
benchmark::ResultManager::GetInputDtypes(FLAGS_dtypes);
|
|
|
|
// Set tensorrt shapes
|
|
if (config_info["backend"] == "paddle_trt") {
|
|
option.paddle_infer_option.collect_trt_shape = true;
|
|
option.paddle_infer_option.collect_trt_shape_by_device =
|
|
FLAGS_collect_trt_shape_by_device;
|
|
}
|
|
if (config_info["backend"] == "paddle_trt" ||
|
|
config_info["backend"] == "trt") {
|
|
option.trt_option.max_batch_size = FLAGS_batch;
|
|
std::vector<std::vector<int32_t>> trt_shapes =
|
|
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shapes);
|
|
if (input_names[0] == "DEFAULT") {
|
|
std::cout << "Please set the input names for TRT/Paddle-TRT backend!"
|
|
<< std::endl;
|
|
return;
|
|
}
|
|
assert(input_names.size() == (trt_shapes.size() / 3));
|
|
for (int i = 0; i < input_shapes.size(); ++i) {
|
|
option.trt_option.SetShape(input_names[i], trt_shapes[i * 3],
|
|
trt_shapes[i * 3 + 1], trt_shapes[i * 3 + 2]);
|
|
// Set custom input data for collect trt shapes
|
|
if (FLAGS_collect_trt_shape_by_custom_tensor_value) {
|
|
int min_shape_num = std::accumulate(trt_shapes[i * 3].begin(),
|
|
trt_shapes[i * 3].end(), 1,
|
|
std::multiplies<int>());
|
|
int opt_shape_num = std::accumulate(trt_shapes[i * 3 + 1].begin(),
|
|
trt_shapes[i * 3 + 1].end(), 1,
|
|
std::multiplies<int>());
|
|
int max_shape_num = std::accumulate(trt_shapes[i * 3 + 2].begin(),
|
|
trt_shapes[i * 3 + 2].end(), 1,
|
|
std::multiplies<int>());
|
|
std::vector<float> min_input_data(min_shape_num, FLAGS_custom_tensor_value);
|
|
std::vector<float> opt_input_data(opt_shape_num, FLAGS_custom_tensor_value);
|
|
std::vector<float> max_input_data(max_shape_num, FLAGS_custom_tensor_value);
|
|
option.trt_option.SetInputData(input_names[i], min_input_data,
|
|
opt_input_data, max_input_data);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Init runtime
|
|
fastdeploy::Runtime runtime;
|
|
if (!runtime.Init(option)) {
|
|
std::cout << "Initial Runtime failed!" << std::endl;
|
|
}
|
|
|
|
// Check default input names
|
|
if (input_names[0] == "DEFAULT") {
|
|
input_names.clear();
|
|
for (int i = 0; i < runtime.NumInputs(); ++i) {
|
|
input_names.push_back(runtime.GetInputInfo(i).name);
|
|
}
|
|
}
|
|
|
|
assert(runtime.NumInputs() == input_shapes.size());
|
|
assert(runtime.NumInputs() == input_names.size());
|
|
assert(runtime.NumInputs() == input_dtypes.size());
|
|
|
|
// Feed inputs, all values set as 1.
|
|
std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
|
|
for (int i = 0; i < inputs.size(); ++i) {
|
|
fastdeploy::function::Full(
|
|
FLAGS_custom_tensor_value, GetInt64Shape(input_shapes[i]),
|
|
&inputs[i], input_dtypes[i]);
|
|
inputs[i].name = input_names[i];
|
|
}
|
|
|
|
// Start memory resource moniter
|
|
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
|
|
resource_moniter.Start();
|
|
}
|
|
|
|
// Run runtime profiling
|
|
std::vector<fastdeploy::FDTensor> outputs;
|
|
if (!runtime.Infer(inputs, &outputs)) {
|
|
std::cerr << "Failed to predict." << std::endl;
|
|
ss << "Runtime(ms): Failed" << std::endl;
|
|
if (config_info["collect_memory_info"] == "true") {
|
|
ss << "cpu_rss_mb: Failed" << std::endl;
|
|
ss << "gpu_rss_mb: Failed" << std::endl;
|
|
ss << "gpu_util: Failed" << std::endl;
|
|
resource_moniter.Stop();
|
|
}
|
|
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
|
|
config_info["result_path"]);
|
|
return;
|
|
}
|
|
|
|
double profile_time = runtime.GetProfileTime() * 1000.0;
|
|
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
|
|
ss << "Runtime(ms): " << profile_time << "ms." << std::endl;
|
|
|
|
// Collect memory info
|
|
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
|
|
float cpu_mem = resource_moniter.GetMaxCpuMem();
|
|
float gpu_mem = resource_moniter.GetMaxGpuMem();
|
|
float gpu_util = resource_moniter.GetMaxGpuUtil();
|
|
std::cout << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
|
|
ss << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
|
|
std::cout << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
|
|
ss << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
|
|
std::cout << "gpu_util: " << gpu_util << std::endl;
|
|
ss << "gpu_util: " << gpu_util << "MB." << std::endl;
|
|
resource_moniter.Stop();
|
|
}
|
|
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
|
|
config_info["result_path"]);
|
|
|
|
// Dump output tensors
|
|
if (FLAGS_dump) {
|
|
for (int i = 0; i < outputs.size(); ++i) {
|
|
auto name_tokens =
|
|
benchmark::ResultManager::SplitStr(outputs[i].name, '/');
|
|
std::string out_name = name_tokens[0];
|
|
for (int j = 1; j < name_tokens.size(); ++j) {
|
|
out_name += "_";
|
|
out_name += name_tokens[j];
|
|
}
|
|
std::string out_file = config_info["backend"] + "_" + out_name + ".txt";
|
|
benchmark::ResultManager::SaveFDTensor(outputs[i], out_file);
|
|
outputs[i].PrintInfo();
|
|
std::cout << "Saved: " << out_file << std::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void showInputInfos(int argc, char* argv[]) {
|
|
auto option = fastdeploy::RuntimeOption();
|
|
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
|
return;
|
|
}
|
|
if (FLAGS_disable_mkldnn) {
|
|
option.paddle_infer_option.enable_mkldnn = false;
|
|
}
|
|
std::unordered_map<std::string, std::string> config_info;
|
|
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
|
&config_info);
|
|
std::string model_name, params_name, config_name;
|
|
std::string model_file, params_file;
|
|
auto model_format = fastdeploy::ModelFormat::PADDLE;
|
|
if (FLAGS_model_file != "UNKNOWN") {
|
|
// Set model file/param/format via command line
|
|
if (FLAGS_model != "") {
|
|
model_file = FLAGS_model + sep + FLAGS_model_file;
|
|
params_file = FLAGS_model + sep + FLAGS_params_file;
|
|
} else {
|
|
model_file = FLAGS_model_file;
|
|
params_file = FLAGS_params_file;
|
|
}
|
|
model_format = GetModelFormat(FLAGS_model_format);
|
|
if (model_format == fastdeploy::ModelFormat::PADDLE &&
|
|
FLAGS_params_file == "") {
|
|
if (config_info["backend"] != "lite") {
|
|
std::cout << "[ERROR] params_file can not be empty for PADDLE"
|
|
<< " format, Please, set your custom params_file manually."
|
|
<< std::endl;
|
|
return;
|
|
} else {
|
|
std::cout << "[INFO] Will using the lite light api for: " << model_file
|
|
<< std::endl;
|
|
}
|
|
}
|
|
} else {
|
|
// Set model file/param/format via model dir (only support
|
|
// for Paddle model format now)
|
|
if (!UpdateModelResourceName(&model_name, ¶ms_name, &config_name,
|
|
&model_format, config_info, false)) {
|
|
return;
|
|
}
|
|
model_file = FLAGS_model + sep + model_name;
|
|
params_file = FLAGS_model + sep + params_name;
|
|
}
|
|
|
|
option.SetModelPath(model_file, params_file, model_format);
|
|
|
|
// Init runtime
|
|
fastdeploy::Runtime runtime;
|
|
if (!runtime.Init(option)) {
|
|
std::cout << "Initial Runtime failed!" << std::endl;
|
|
}
|
|
// Show input tensor infos
|
|
auto input_infos = runtime.GetInputInfos();
|
|
for (int i = 0; i < input_infos.size(); ++i) {
|
|
std::cout << input_infos[i] << std::endl;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
int main(int argc, char* argv[]) {
|
|
#if defined(ENABLE_BENCHMARK)
|
|
google::SetVersionString("0.0.0");
|
|
google::SetUsageMessage(
|
|
"./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx "
|
|
"-[shapes|dtypes|names|tensors] -[model_file|params_file|model_format]");
|
|
google::ParseCommandLineFlags(&argc, &argv, true);
|
|
if (FLAGS_diff) {
|
|
CheckTensorDiff(argc, argv);
|
|
return 0;
|
|
} else if (FLAGS_info) {
|
|
showInputInfos(argc, argv);
|
|
return 0;
|
|
} else {
|
|
RuntimeProfiling(argc, argv);
|
|
return 0;
|
|
}
|
|
#endif
|
|
return 0;
|
|
}
|