Files
FastDeploy/fastdeploy/runtime/backends/backend.h
Jason 6be2c0367b [Example] Update runtime examples (#1542)
* Add notes for tensors

* Optimize some apis

* move some warnings
2023-03-08 16:56:04 +08:00

161 lines
5.7 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_type.h"
#include "fastdeploy/runtime/runtime_option.h"
#include "fastdeploy/benchmark/benchmark.h"
namespace fastdeploy {
/*! @brief Information of Tensor
*/
struct TensorInfo {
std::string name; ///< Name of tensor
std::vector<int> shape; ///< Shape of tensor
FDDataType dtype; ///< Data type of tensor
friend std::ostream& operator<<(std::ostream& output,
const TensorInfo& info) {
output << "TensorInfo(name: " << info.name << ", shape: [";
for (size_t i = 0; i < info.shape.size(); ++i) {
if (i == info.shape.size() - 1) {
output << info.shape[i];
} else {
output << info.shape[i] << ", ";
}
}
output << "], dtype: " << Str(info.dtype) << ")";
return output;
}
};
class BaseBackend {
public:
bool initialized_ = false;
BaseBackend() {}
virtual ~BaseBackend() = default;
virtual bool Initialized() const { return initialized_; }
virtual bool Init(const RuntimeOption& option) {
FDERROR << "Not Implement for "
<< option.backend << " in "
<< option.device << "."
<< std::endl;
return false;
}
// Get number of inputs of the model
virtual int NumInputs() const = 0;
// Get number of outputs of the model
virtual int NumOutputs() const = 0;
// Get information of input tensor
virtual TensorInfo GetInputInfo(int index) = 0;
// Get information of output tensor
virtual TensorInfo GetOutputInfo(int index) = 0;
// Get information of all the input tensors
virtual std::vector<TensorInfo> GetInputInfos() = 0;
// Get information of all the output tensors
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
// if copy_to_fd is true, copy memory data to FDTensor
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs,
bool copy_to_fd = true) = 0;
// Optional: For those backends which can share memory
// while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
void* stream = nullptr,
int device_id = -1) {
FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
return nullptr;
}
benchmark::BenchmarkOption benchmark_option_;
benchmark::BenchmarkResult benchmark_result_;
};
/** \brief Macros for Runtime benchmark profiling.
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
* indicates that the least number of times the loop
* will repeat when profiling mode is not enabled.
* In most cases, the value should be 1, i.e., results are
* obtained by running the inference process once, when
* the profile mode is turned off, such as ONNX Runtime,
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
* RKNPU2, SOPHGO etc.
*
* example code @code
* // OpenVINOBackend::Infer
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
* // do something ....
* RUNTIME_PROFILE_LOOP_BEGIN(1)
* // The codes which wrapped by 'BEGIN(1) ~ END' scope
* // will only run once when profiling mode is not enabled.
* request_.infer();
* RUNTIME_PROFILE_LOOP_END
* // do something ....
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
*
* @endcode In this case, No global variables inside a function
* are wrapped by BEGIN and END, which may be required for
* subsequent tasks. But, some times we need to set 'base_loop'
* as 0, such as POROS.
*
* * example code @code
* // PorosBackend::Infer
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
* // do something ....
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
* // The codes which wrapped by 'BEGIN(0) ~ END' scope
* // will not run when profiling mode is not enabled.
* auto poros_outputs = _poros_module->forward(poros_inputs);
* RUNTIME_PROFILE_LOOP_END
* // Run another inference beyond the scope of 'BEGIN ~ END'
* // to get valid outputs for subsequent tasks.
* auto poros_outputs = _poros_module->forward(poros_inputs);
* // do something .... will use 'poros_outputs' ...
* if (poros_outputs.isTensor()) {
* // ...
* }
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
*
* @endcode In this case, 'poros_outputs' inside a function
* are wrapped by BEGIN and END, which may be required for
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch
* another infer to get the valid outputs beyond the scope
* of 'BEGIN ~ END' for subsequent tasks.
*/
#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop) \
__RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop))
#define RUNTIME_PROFILE_LOOP_END \
__RUNTIME_PROFILE_LOOP_END(benchmark_result_)
#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN \
__RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1)
#define RUNTIME_PROFILE_LOOP_H2D_D2H_END \
__RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_)
} // namespace fastdeploy