mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
95 lines
2.8 KiB
C++
95 lines
2.8 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#pragma once
|
|
|
|
#include "fastdeploy/backends/backend.h"
|
|
#include "fastdeploy/utils/perf.h"
|
|
#include <map>
|
|
#include <vector>
|
|
|
|
namespace fastdeploy {
|
|
|
|
enum class Backend { UNKNOWN, ORT, TRT, PDRT };
|
|
enum class Frontend { PADDLE, ONNX };
|
|
|
|
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
|
|
|
|
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
|
|
|
|
bool ModelFormatCheck(const std::string& model_file,
|
|
const Frontend& model_format);
|
|
|
|
struct FASTDEPLOY_DECL RuntimeOption {
|
|
Backend backend = Backend::ORT;
|
|
|
|
// for cpu inference and preprocess
|
|
int cpu_thread_num = 8;
|
|
int device_id = 0;
|
|
|
|
#ifdef WITH_GPU
|
|
Device device = Device::GPU;
|
|
#else
|
|
Device device = Device::CPU;
|
|
#endif
|
|
|
|
// ======Only for ORT Backend========
|
|
// -1 means use default value by ort
|
|
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
|
|
// ORT_ENABLE_ALL
|
|
int ort_graph_opt_level = -1;
|
|
int ort_inter_op_num_threads = -1;
|
|
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
|
|
int ort_execution_mode = -1;
|
|
|
|
// ======Only for Trt Backend=======
|
|
std::map<std::string, std::vector<int32_t>> trt_fixed_shape;
|
|
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
|
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
|
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
|
std::string trt_serialize_file = "";
|
|
bool trt_enable_fp16 = false;
|
|
bool trt_enable_int8 = false;
|
|
size_t trt_max_batch_size = 32;
|
|
size_t trt_max_workspace_size = 1 << 30;
|
|
|
|
std::string model_file = ""; // Path of model file
|
|
std::string params_file = ""; // Path of parameters file, can be empty
|
|
Frontend model_format = Frontend::PADDLE; // format of input model
|
|
};
|
|
|
|
struct FASTDEPLOY_DECL Runtime {
|
|
public:
|
|
// explicit Runtime(const RuntimeOption& _option = RuntimeOption());
|
|
|
|
bool Init(const RuntimeOption& _option);
|
|
|
|
bool Infer(std::vector<FDTensor>& input_tensors,
|
|
std::vector<FDTensor>* output_tensors);
|
|
|
|
void CreateOrtBackend();
|
|
|
|
void CreateTrtBackend();
|
|
|
|
int NumInputs() { return backend_->NumInputs(); }
|
|
int NumOutputs() { return backend_->NumOutputs(); }
|
|
TensorInfo GetInputInfo(int index);
|
|
TensorInfo GetOutputInfo(int index);
|
|
|
|
RuntimeOption option;
|
|
|
|
private:
|
|
BaseBackend* backend_;
|
|
};
|
|
} // namespace fastdeploy
|