Files
FastDeploy/benchmark/cpp/benchmark.cc
DefTruth 99c2b6592d [Backend] refactor paddle custom ops -> fastdeploy::paddle_custom_ops (#2101)
* [cmake] upgrade windows paddle inference -> 2.5.0

* [cmake] upgrade windows paddle inference -> 2.5.0

* fix paddle custom ops bug on windows

* [Backend] refactor paddle custom ops
2023-07-13 09:00:03 +08:00

405 lines
16 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/function/functions.h"
#include "flags.h"
#include "macros.h"
#include "option.h"
namespace vision = fastdeploy::vision;
namespace benchmark = fastdeploy::benchmark;
DEFINE_string(shapes, "1,3,224,224",
"Required, set input shape for model."
"default 1,3,224,224");
DEFINE_string(names, "DEFAULT", "Required, set input names for model.");
DEFINE_string(dtypes, "FP32",
"Required, set input dtypes for model."
"default FP32.");
DEFINE_string(trt_shapes, "1,3,224,224:1,3,224,224:1,3,224,224",
"Optional, set min/opt/max shape for trt/paddle_trt."
"default 1,3,224,224:1,3,224,224:1,3,224,224");
DEFINE_int32(batch, 1,
"Optional, set trt max batch size, "
"default 1");
DEFINE_bool(dump, false,
"Optional, whether to dump output tensors, "
"default false.");
DEFINE_bool(info, false,
"Optional, only check the input infos of model."
"default false.");
DEFINE_bool(diff, false,
"Optional, check the diff between two tensors."
"default false.");
DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt",
"Optional, the paths to dumped tensors, "
"default tensor_a.txt:tensor_b.txt");
DEFINE_bool(mem, false,
"Optional, whether to force to collect memory info, "
"default false.");
DEFINE_int32(interval, -1,
"Optional, sampling interval for collect memory info, "
"default false.");
DEFINE_string(model_format, "PADDLE",
"Optional, set specific model format,"
"eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO"
"default PADDLE.");
DEFINE_bool(disable_mkldnn, false,
"Optional, disable mkldnn for paddle backend. "
"default false.");
DEFINE_string(optimized_model_dir, "",
"Optional, set optimized model dir for lite."
"eg: model.opt.nb, "
"default ''");
DEFINE_bool(collect_trt_shape_by_device, false,
"Optional, whether collect trt shape by device. "
"default false.");
DEFINE_double(custom_tensor_value, 1.0,
"Optional, set the value for fd tensor, "
"default 1.0");
DEFINE_bool(collect_trt_shape_by_custom_tensor_value, false,
"Optional, whether collect trt shape by custom tensor value. "
"default false.");
#if defined(ENABLE_BENCHMARK)
static std::vector<int64_t> GetInt64Shape(const std::vector<int>& shape) {
std::vector<int64_t> new_shape;
new_shape.resize(shape.size());
for (int i = 0; i < shape.size(); ++i) {
new_shape[i] = static_cast<int64_t>(shape[i]);
}
return new_shape;
}
static fastdeploy::ModelFormat GetModelFormat(const std::string& model_format) {
if (model_format == "PADDLE") {
return fastdeploy::ModelFormat::PADDLE;
} else if (model_format == "ONNX") {
return fastdeploy::ModelFormat::ONNX;
} else if (model_format == "RKNN") {
return fastdeploy::ModelFormat::RKNN;
} else if (model_format == "TORCHSCRIPT") {
return fastdeploy::ModelFormat::TORCHSCRIPT;
} else if (model_format == "SOPHGO") {
return fastdeploy::ModelFormat::SOPHGO;
} else {
return fastdeploy::ModelFormat::PADDLE;
}
}
static void CheckTensorDiff(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "Check tensor diff ..." << std::endl;
std::vector<std::string> tensor_paths =
benchmark::ResultManager::SplitStr(FLAGS_tensors);
assert(tensor_paths.size() == 2);
fastdeploy::FDTensor tensor_a, tensor_b;
benchmark::ResultManager::LoadFDTensor(&tensor_a, tensor_paths[0]);
benchmark::ResultManager::LoadFDTensor(&tensor_b, tensor_paths[1]);
auto tensor_diff =
benchmark::ResultManager::CalculateDiffStatis(tensor_a, tensor_b);
std::cout << "Tensor diff: mean=" << tensor_diff.data.mean
<< ", max=" << tensor_diff.data.max
<< ", min=" << tensor_diff.data.min << std::endl;
}
static void RuntimeProfiling(int argc, char* argv[]) {
// Init runtime option
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return;
}
if (FLAGS_disable_mkldnn) {
option.paddle_infer_option.enable_mkldnn = false;
}
std::unordered_map<std::string, std::string> config_info;
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
&config_info);
UpdateBaseCustomFlags(config_info); // see flags.h
// Init log recorder
std::stringstream ss;
ss.precision(6);
// Memory resource moniter
int sampling_interval = FLAGS_interval >= 1
? FLAGS_interval
: std::stoi(config_info["sampling_interval"]);
benchmark::ResourceUsageMonitor resource_moniter(
sampling_interval, std::stoi(config_info["device_id"]));
// Check model path and model format
std::string model_name, params_name, config_name;
std::string model_file, params_file;
auto model_format = fastdeploy::ModelFormat::PADDLE;
if (FLAGS_model_file != "UNKNOWN") {
// Set model file/param/format via command line
if (FLAGS_model != "") {
model_file = FLAGS_model + sep + FLAGS_model_file;
params_file = FLAGS_model + sep + FLAGS_params_file;
} else {
model_file = FLAGS_model_file;
params_file = FLAGS_params_file;
}
model_format = GetModelFormat(FLAGS_model_format);
if (model_format == fastdeploy::ModelFormat::PADDLE &&
FLAGS_params_file == "") {
if (config_info["backend"] != "lite") {
std::cout << "[ERROR] params_file can not be empty for PADDLE"
<< " format, Please, set your custom params_file manually."
<< std::endl;
return;
} else {
std::cout << "[INFO] Will using the lite light api for: " << model_file
<< std::endl;
}
}
} else {
// Set model file/param/format via model dir (only support
// for Paddle model format now)
if (!UpdateModelResourceName(&model_name, &params_name, &config_name,
&model_format, config_info, false)) {
return;
}
model_file = FLAGS_model + sep + model_name;
params_file = FLAGS_model + sep + params_name;
}
option.SetModelPath(model_file, params_file, model_format);
// Set opt model dir
if (config_info["backend"] == "lite") {
if (FLAGS_optimized_model_dir != "") {
option.paddle_lite_option.optimized_model_dir = FLAGS_optimized_model_dir;
} else {
option.paddle_lite_option.optimized_model_dir = FLAGS_model;
}
}
// Get input shapes/names/dtypes
std::vector<std::vector<int32_t>> input_shapes =
benchmark::ResultManager::GetInputShapes(FLAGS_shapes);
std::vector<std::string> input_names =
benchmark::ResultManager::GetInputNames(FLAGS_names);
std::vector<fastdeploy::FDDataType> input_dtypes =
benchmark::ResultManager::GetInputDtypes(FLAGS_dtypes);
// Set tensorrt shapes
if (config_info["backend"] == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
option.paddle_infer_option.collect_trt_shape_by_device =
FLAGS_collect_trt_shape_by_device;
}
if (config_info["backend"] == "paddle_trt" ||
config_info["backend"] == "trt") {
option.trt_option.max_batch_size = FLAGS_batch;
std::vector<std::vector<int32_t>> trt_shapes =
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shapes);
if (input_names[0] == "DEFAULT") {
std::cout << "Please set the input names for TRT/Paddle-TRT backend!"
<< std::endl;
return;
}
assert(input_names.size() == (trt_shapes.size() / 3));
for (int i = 0; i < input_shapes.size(); ++i) {
option.trt_option.SetShape(input_names[i], trt_shapes[i * 3],
trt_shapes[i * 3 + 1], trt_shapes[i * 3 + 2]);
// Set custom input data for collect trt shapes
if (FLAGS_collect_trt_shape_by_custom_tensor_value) {
int min_shape_num = std::accumulate(trt_shapes[i * 3].begin(),
trt_shapes[i * 3].end(), 1,
std::multiplies<int>());
int opt_shape_num = std::accumulate(trt_shapes[i * 3 + 1].begin(),
trt_shapes[i * 3 + 1].end(), 1,
std::multiplies<int>());
int max_shape_num = std::accumulate(trt_shapes[i * 3 + 2].begin(),
trt_shapes[i * 3 + 2].end(), 1,
std::multiplies<int>());
std::vector<float> min_input_data(min_shape_num, FLAGS_custom_tensor_value);
std::vector<float> opt_input_data(opt_shape_num, FLAGS_custom_tensor_value);
std::vector<float> max_input_data(max_shape_num, FLAGS_custom_tensor_value);
option.trt_option.SetInputData(input_names[i], min_input_data,
opt_input_data, max_input_data);
}
}
}
// Init runtime
fastdeploy::Runtime runtime;
if (!runtime.Init(option)) {
std::cout << "Initial Runtime failed!" << std::endl;
}
// Check default input names
if (input_names[0] == "DEFAULT") {
input_names.clear();
for (int i = 0; i < runtime.NumInputs(); ++i) {
input_names.push_back(runtime.GetInputInfo(i).name);
}
}
assert(runtime.NumInputs() == input_shapes.size());
assert(runtime.NumInputs() == input_names.size());
assert(runtime.NumInputs() == input_dtypes.size());
// Feed inputs, all values set as 1.
std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
for (int i = 0; i < inputs.size(); ++i) {
fastdeploy::function::Full(
FLAGS_custom_tensor_value, GetInt64Shape(input_shapes[i]),
&inputs[i], input_dtypes[i]);
inputs[i].name = input_names[i];
}
// Start memory resource moniter
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
resource_moniter.Start();
}
// Run runtime profiling
std::vector<fastdeploy::FDTensor> outputs;
if (!runtime.Infer(inputs, &outputs)) {
std::cerr << "Failed to predict." << std::endl;
ss << "Runtime(ms): Failed" << std::endl;
if (config_info["collect_memory_info"] == "true") {
ss << "cpu_rss_mb: Failed" << std::endl;
ss << "gpu_rss_mb: Failed" << std::endl;
ss << "gpu_util: Failed" << std::endl;
resource_moniter.Stop();
}
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
config_info["result_path"]);
return;
}
double profile_time = runtime.GetProfileTime() * 1000.0;
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
ss << "Runtime(ms): " << profile_time << "ms." << std::endl;
// Collect memory info
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
float cpu_mem = resource_moniter.GetMaxCpuMem();
float gpu_mem = resource_moniter.GetMaxGpuMem();
float gpu_util = resource_moniter.GetMaxGpuUtil();
std::cout << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
ss << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
std::cout << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
ss << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
std::cout << "gpu_util: " << gpu_util << std::endl;
ss << "gpu_util: " << gpu_util << "MB." << std::endl;
resource_moniter.Stop();
}
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
config_info["result_path"]);
// Dump output tensors
if (FLAGS_dump) {
for (int i = 0; i < outputs.size(); ++i) {
auto name_tokens =
benchmark::ResultManager::SplitStr(outputs[i].name, '/');
std::string out_name = name_tokens[0];
for (int j = 1; j < name_tokens.size(); ++j) {
out_name += "_";
out_name += name_tokens[j];
}
std::string out_file = config_info["backend"] + "_" + out_name + ".txt";
benchmark::ResultManager::SaveFDTensor(outputs[i], out_file);
outputs[i].PrintInfo();
std::cout << "Saved: " << out_file << std::endl;
}
}
}
static void showInputInfos(int argc, char* argv[]) {
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return;
}
if (FLAGS_disable_mkldnn) {
option.paddle_infer_option.enable_mkldnn = false;
}
std::unordered_map<std::string, std::string> config_info;
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
&config_info);
std::string model_name, params_name, config_name;
std::string model_file, params_file;
auto model_format = fastdeploy::ModelFormat::PADDLE;
if (FLAGS_model_file != "UNKNOWN") {
// Set model file/param/format via command line
if (FLAGS_model != "") {
model_file = FLAGS_model + sep + FLAGS_model_file;
params_file = FLAGS_model + sep + FLAGS_params_file;
} else {
model_file = FLAGS_model_file;
params_file = FLAGS_params_file;
}
model_format = GetModelFormat(FLAGS_model_format);
if (model_format == fastdeploy::ModelFormat::PADDLE &&
FLAGS_params_file == "") {
if (config_info["backend"] != "lite") {
std::cout << "[ERROR] params_file can not be empty for PADDLE"
<< " format, Please, set your custom params_file manually."
<< std::endl;
return;
} else {
std::cout << "[INFO] Will using the lite light api for: " << model_file
<< std::endl;
}
}
} else {
// Set model file/param/format via model dir (only support
// for Paddle model format now)
if (!UpdateModelResourceName(&model_name, &params_name, &config_name,
&model_format, config_info, false)) {
return;
}
model_file = FLAGS_model + sep + model_name;
params_file = FLAGS_model + sep + params_name;
}
option.SetModelPath(model_file, params_file, model_format);
// Init runtime
fastdeploy::Runtime runtime;
if (!runtime.Init(option)) {
std::cout << "Initial Runtime failed!" << std::endl;
}
// Show input tensor infos
auto input_infos = runtime.GetInputInfos();
for (int i = 0; i < input_infos.size(); ++i) {
std::cout << input_infos[i] << std::endl;
}
}
#endif
int main(int argc, char* argv[]) {
#if defined(ENABLE_BENCHMARK)
google::SetVersionString("0.0.0");
google::SetUsageMessage(
"./benchmark -[info|diff|check|dump|mem] -model xxx -config_path xxx "
"-[shapes|dtypes|names|tensors] -[model_file|params_file|model_format]");
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_diff) {
CheckTensorDiff(argc, argv);
return 0;
} else if (FLAGS_info) {
showInputInfos(argc, argv);
return 0;
} else {
RuntimeProfiling(argc, argv);
return 0;
}
#endif
return 0;
}