Files
FastDeploy/fastdeploy/fastdeploy_runtime.h
2022-09-14 15:44:13 +08:00

175 lines
5.7 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/perf.h"
namespace fastdeploy {
enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER, OPENVINO };
// AUTOREC will according to the name of model file
// to decide which Frontend is
enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
FASTDEPLOY_DECL std::string Str(const Backend& b);
FASTDEPLOY_DECL std::string Str(const Frontend& f);
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
bool CheckModelFormat(const std::string& model_file,
const Frontend& model_format);
Frontend GuessModelFormat(const std::string& model_file);
struct FASTDEPLOY_DECL RuntimeOption {
// set path of model file and params file
// for onnx, only need to define model_file, but also need to
// define model_format
// model_format support 'paddle' / 'onnx' now.
void SetModelPath(const std::string& model_path,
const std::string& params_path = "",
const std::string& _model_format = "paddle");
// set model inference in GPU
void UseCpu();
// set model inference in CPU
void UseGpu(int gpu_id = 0);
// set number of thread while inference in CPU
void SetCpuThreadNum(int thread_num);
// use paddle inference backend
void UsePaddleBackend();
// use onnxruntime backend
void UseOrtBackend();
// use tensorrt backend
void UseTrtBackend();
// use openvino backend
void UseOpenVINOBackend();
// enable mkldnn while use paddle inference in CPU
void EnablePaddleMKLDNN();
// disable mkldnn while use paddle inference in CPU
void DisablePaddleMKLDNN();
// Enable delete in pass
void DeletePaddleBackendPass(const std::string& delete_pass_name);
// enable debug information of paddle backend
void EnablePaddleLogInfo();
// disable debug information of paddle backend
void DisablePaddleLogInfo();
// set size of cached shape while enable mkldnn with paddle inference backend
void SetPaddleMKLDNNCacheSize(int size);
// set tensorrt shape while the inputs of model contain dynamic shape
// min_shape: the minimum shape
// opt_shape: the most common shape while inference, default be empty
// max_shape: the maximum shape, default be empty
// if opt_shape, max_shape are empty, they will keep same with the min_shape
// which means the shape will be fixed as min_shape while inference
void SetTrtInputShape(
const std::string& input_name, const std::vector<int32_t>& min_shape,
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
const std::vector<int32_t>& max_shape = std::vector<int32_t>());
// enable half precision while use tensorrt backend
void EnableTrtFP16();
// disable half precision, change to full precision(float32)
void DisableTrtFP16();
void SetTrtCacheFile(const std::string& cache_file_path);
Backend backend = Backend::UNKNOWN;
// for cpu inference and preprocess
// default will let the backend choose their own default value
int cpu_thread_num = -1;
int device_id = 0;
Device device = Device::CPU;
// ======Only for ORT Backend========
// -1 means use default value by ort
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
// ORT_ENABLE_ALL
int ort_graph_opt_level = -1;
int ort_inter_op_num_threads = -1;
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
int ort_execution_mode = -1;
// ======Only for Paddle Backend=====
bool pd_enable_mkldnn = true;
bool pd_enable_log_info = false;
int pd_mkldnn_cache_size = 1;
std::vector<std::string> pd_delete_pass_names;
// ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
std::string trt_serialize_file = "";
bool trt_enable_fp16 = false;
bool trt_enable_int8 = false;
size_t trt_max_batch_size = 32;
size_t trt_max_workspace_size = 1 << 30;
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
Frontend model_format = Frontend::AUTOREC; // format of input model
// inside parameters, only for inside usage
// remove multiclass_nms in Paddle2ONNX
bool remove_multiclass_nms_ = false;
// for Paddle2ONNX to export custom operators
std::map<std::string, std::string> custom_op_info_;
};
struct FASTDEPLOY_DECL Runtime {
public:
// explicit Runtime(const RuntimeOption& _option = RuntimeOption());
bool Init(const RuntimeOption& _option);
bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors);
void CreateOrtBackend();
void CreatePaddleBackend();
void CreateTrtBackend();
void CreateOpenVINOBackend();
int NumInputs() { return backend_->NumInputs(); }
int NumOutputs() { return backend_->NumOutputs(); }
TensorInfo GetInputInfo(int index);
TensorInfo GetOutputInfo(int index);
RuntimeOption option;
private:
std::unique_ptr<BaseBackend> backend_;
};
} // namespace fastdeploy