mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-30 22:32:30 +08:00
161 lines
5.7 KiB
C++
161 lines
5.7 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "fastdeploy/core/fd_tensor.h"
|
|
#include "fastdeploy/core/fd_type.h"
|
|
#include "fastdeploy/runtime/runtime_option.h"
|
|
#include "fastdeploy/benchmark/benchmark.h"
|
|
|
|
namespace fastdeploy {
|
|
|
|
/*! @brief Information of Tensor
|
|
*/
|
|
struct TensorInfo {
|
|
std::string name; ///< Name of tensor
|
|
std::vector<int> shape; ///< Shape of tensor
|
|
FDDataType dtype; ///< Data type of tensor
|
|
|
|
friend std::ostream& operator<<(std::ostream& output,
|
|
const TensorInfo& info) {
|
|
output << "TensorInfo(name: " << info.name << ", shape: [";
|
|
for (size_t i = 0; i < info.shape.size(); ++i) {
|
|
if (i == info.shape.size() - 1) {
|
|
output << info.shape[i];
|
|
} else {
|
|
output << info.shape[i] << ", ";
|
|
}
|
|
}
|
|
output << "], dtype: " << Str(info.dtype) << ")";
|
|
return output;
|
|
}
|
|
};
|
|
|
|
class BaseBackend {
|
|
public:
|
|
bool initialized_ = false;
|
|
|
|
BaseBackend() {}
|
|
virtual ~BaseBackend() = default;
|
|
|
|
virtual bool Initialized() const { return initialized_; }
|
|
|
|
virtual bool Init(const RuntimeOption& option) {
|
|
FDERROR << "Not Implement for "
|
|
<< option.backend << " in "
|
|
<< option.device << "."
|
|
<< std::endl;
|
|
return false;
|
|
}
|
|
|
|
// Get number of inputs of the model
|
|
virtual int NumInputs() const = 0;
|
|
// Get number of outputs of the model
|
|
virtual int NumOutputs() const = 0;
|
|
// Get information of input tensor
|
|
virtual TensorInfo GetInputInfo(int index) = 0;
|
|
// Get information of output tensor
|
|
virtual TensorInfo GetOutputInfo(int index) = 0;
|
|
// Get information of all the input tensors
|
|
virtual std::vector<TensorInfo> GetInputInfos() = 0;
|
|
// Get information of all the output tensors
|
|
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
|
|
|
|
// if copy_to_fd is true, copy memory data to FDTensor
|
|
// else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
|
|
virtual bool Infer(std::vector<FDTensor>& inputs,
|
|
std::vector<FDTensor>* outputs,
|
|
bool copy_to_fd = true) = 0;
|
|
// Optional: For those backends which can share memory
|
|
// while creating multiple inference engines with same model file
|
|
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
|
|
void* stream = nullptr,
|
|
int device_id = -1) {
|
|
FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
|
|
return nullptr;
|
|
}
|
|
|
|
benchmark::BenchmarkOption benchmark_option_;
|
|
benchmark::BenchmarkResult benchmark_result_;
|
|
};
|
|
|
|
/** \brief Macros for Runtime benchmark profiling.
|
|
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
|
|
* indicates that the least number of times the loop
|
|
* will repeat when profiling mode is not enabled.
|
|
* In most cases, the value should be 1, i.e., results are
|
|
* obtained by running the inference process once, when
|
|
* the profile mode is turned off, such as ONNX Runtime,
|
|
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
|
|
* RKNPU2, SOPHGO etc.
|
|
*
|
|
* example code @code
|
|
* // OpenVINOBackend::Infer
|
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
|
* // do something ....
|
|
* RUNTIME_PROFILE_LOOP_BEGIN(1)
|
|
* // The codes which wrapped by 'BEGIN(1) ~ END' scope
|
|
* // will only run once when profiling mode is not enabled.
|
|
* request_.infer();
|
|
* RUNTIME_PROFILE_LOOP_END
|
|
* // do something ....
|
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
|
*
|
|
* @endcode In this case, No global variables inside a function
|
|
* are wrapped by BEGIN and END, which may be required for
|
|
* subsequent tasks. But, some times we need to set 'base_loop'
|
|
* as 0, such as POROS.
|
|
*
|
|
* * example code @code
|
|
* // PorosBackend::Infer
|
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
|
* // do something ....
|
|
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
|
|
* // The codes which wrapped by 'BEGIN(0) ~ END' scope
|
|
* // will not run when profiling mode is not enabled.
|
|
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
|
* RUNTIME_PROFILE_LOOP_END
|
|
* // Run another inference beyond the scope of 'BEGIN ~ END'
|
|
* // to get valid outputs for subsequent tasks.
|
|
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
|
* // do something .... will use 'poros_outputs' ...
|
|
* if (poros_outputs.isTensor()) {
|
|
* // ...
|
|
* }
|
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
|
*
|
|
* @endcode In this case, 'poros_outputs' inside a function
|
|
* are wrapped by BEGIN and END, which may be required for
|
|
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch
|
|
* another infer to get the valid outputs beyond the scope
|
|
* of 'BEGIN ~ END' for subsequent tasks.
|
|
*/
|
|
|
|
#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop) \
|
|
__RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop))
|
|
#define RUNTIME_PROFILE_LOOP_END \
|
|
__RUNTIME_PROFILE_LOOP_END(benchmark_result_)
|
|
#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN \
|
|
__RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1)
|
|
#define RUNTIME_PROFILE_LOOP_H2D_D2H_END \
|
|
__RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_)
|
|
|
|
} // namespace fastdeploy
|