mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00

* [backend] support bechmark mode for runtime and backend * [backend] support bechmark mode for runtime and backend * [pybind11] add benchmark methods pybind * [pybind11] add benchmark methods pybind * [Other] Update build scripts * [Other] Update cmake/summary.cmake * [Other] update build scripts * [Other] add ENABLE_BENCHMARK option -> setup.py * optimize backend time recording * optimize backend time recording * optimize trt backend time record * [backend] optimze backend_time recording for trt * [benchmark] remove redundant logs * fixed ov_backend confilct * [benchmark] fixed paddle_backend conflicts * [benchmark] fixed paddle_backend conflicts * [benchmark] fixed paddle_backend conflicts * [benchmark] remove use_gpu option from ort backend option * [benchmark] update benchmark_ppdet.py * [benchmark] update benchmark_ppcls.py * fixed lite backend conflicts * [Lite] fixed lite xpu * add benchmark macro * add RUNTIME_PROFILE_LOOP macros * add comments for RUNTIME_PROFILE macros * add comments for new apis * add comments for new apis * update benchmark_ppdet.py * afixed bugs * remove unused codes * optimize RUNTIME_PROFILE_LOOP macros * optimize RUNTIME_PROFILE_LOOP macros * add comments for benchmark option and result * add docs for benchmark namespace
117 lines
3.7 KiB
C++
Executable File
117 lines
3.7 KiB
C++
Executable File
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
/*! \file runtime.h
|
|
\brief A brief file description.
|
|
|
|
More details
|
|
*/
|
|
|
|
#pragma once
|
|
#include "fastdeploy/runtime/backends/backend.h"
|
|
#include "fastdeploy/core/fd_tensor.h"
|
|
#include "fastdeploy/runtime/runtime_option.h"
|
|
#include "fastdeploy/utils/perf.h"
|
|
|
|
/** \brief All C++ FastDeploy APIs are defined inside this namespace
|
|
*
|
|
*/
|
|
namespace fastdeploy {
|
|
|
|
/*! @brief Runtime object used to inference the loaded model on different devices
|
|
*/
|
|
struct FASTDEPLOY_DECL Runtime {
|
|
public:
|
|
/// Intialize a Runtime object with RuntimeOption
|
|
bool Init(const RuntimeOption& _option);
|
|
|
|
/** \brief Inference the model by the input data, and write to the output
|
|
*
|
|
* \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
|
|
* \param[in] output_tensors Inference results
|
|
* \return true if the inference successed, otherwise false
|
|
*/
|
|
bool Infer(std::vector<FDTensor>& input_tensors,
|
|
std::vector<FDTensor>* output_tensors);
|
|
|
|
/** \brief No params inference the model.
|
|
*
|
|
* the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
|
|
*/
|
|
bool Infer();
|
|
|
|
/** \brief Get number of inputs
|
|
*/
|
|
int NumInputs() { return backend_->NumInputs(); }
|
|
/** \brief Get number of outputs
|
|
*/
|
|
int NumOutputs() { return backend_->NumOutputs(); }
|
|
/** \brief Get input information by index
|
|
*/
|
|
TensorInfo GetInputInfo(int index);
|
|
/** \brief Get output information by index
|
|
*/
|
|
TensorInfo GetOutputInfo(int index);
|
|
/** \brief Get all the input information
|
|
*/
|
|
std::vector<TensorInfo> GetInputInfos();
|
|
/** \brief Get all the output information
|
|
*/
|
|
std::vector<TensorInfo> GetOutputInfos();
|
|
/** \brief Bind FDTensor by name, no copy and share input memory
|
|
*/
|
|
void BindInputTensor(const std::string& name, FDTensor& input);
|
|
/** \brief Get output FDTensor by name, no copy and share backend output memory
|
|
*/
|
|
FDTensor* GetOutputTensor(const std::string& name);
|
|
|
|
/** \brief Clone new Runtime when multiple instances of the same model are created
|
|
*
|
|
* \param[in] stream CUDA Stream, defualt param is nullptr
|
|
* \return new Runtime* by this clone
|
|
*/
|
|
Runtime* Clone(void* stream = nullptr, int device_id = -1);
|
|
|
|
void ReleaseModelMemoryBuffer();
|
|
|
|
RuntimeOption option;
|
|
|
|
/** \brief Compile TorchScript Module, only for Poros backend
|
|
*
|
|
* \param[in] prewarm_tensors Prewarm datas for compile
|
|
* \param[in] _option Runtime option
|
|
* \return true if compile successed, otherwise false
|
|
*/
|
|
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
|
const RuntimeOption& _option);
|
|
/** \brief Get profile time of Runtime after the profile process is done.
|
|
*/
|
|
double GetProfileTime() {
|
|
return backend_->benchmark_result_.time_of_runtime;
|
|
}
|
|
|
|
private:
|
|
void CreateOrtBackend();
|
|
void CreatePaddleBackend();
|
|
void CreateTrtBackend();
|
|
void CreateOpenVINOBackend();
|
|
void CreateLiteBackend();
|
|
void CreateRKNPU2Backend();
|
|
void CreateSophgoNPUBackend();
|
|
std::unique_ptr<BaseBackend> backend_;
|
|
std::vector<FDTensor> input_tensors_;
|
|
std::vector<FDTensor> output_tensors_;
|
|
};
|
|
} // namespace fastdeploy
|