mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Backend] support bechmark mode for runtime and backend (#1201)
* [backend] support bechmark mode for runtime and backend * [backend] support bechmark mode for runtime and backend * [pybind11] add benchmark methods pybind * [pybind11] add benchmark methods pybind * [Other] Update build scripts * [Other] Update cmake/summary.cmake * [Other] update build scripts * [Other] add ENABLE_BENCHMARK option -> setup.py * optimize backend time recording * optimize backend time recording * optimize trt backend time record * [backend] optimze backend_time recording for trt * [benchmark] remove redundant logs * fixed ov_backend confilct * [benchmark] fixed paddle_backend conflicts * [benchmark] fixed paddle_backend conflicts * [benchmark] fixed paddle_backend conflicts * [benchmark] remove use_gpu option from ort backend option * [benchmark] update benchmark_ppdet.py * [benchmark] update benchmark_ppcls.py * fixed lite backend conflicts * [Lite] fixed lite xpu * add benchmark macro * add RUNTIME_PROFILE_LOOP macros * add comments for RUNTIME_PROFILE macros * add comments for new apis * add comments for new apis * update benchmark_ppdet.py * afixed bugs * remove unused codes * optimize RUNTIME_PROFILE_LOOP macros * optimize RUNTIME_PROFILE_LOOP macros * add comments for benchmark option and result * add docs for benchmark namespace
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
#include "fastdeploy/core/fd_type.h"
|
||||
#include "fastdeploy/runtime/runtime_option.h"
|
||||
#include "fastdeploy/benchmark/benchmark.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
@@ -79,7 +80,6 @@ class BaseBackend {
|
||||
virtual bool Infer(std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs,
|
||||
bool copy_to_fd = true) = 0;
|
||||
|
||||
// Optional: For those backends which can share memory
|
||||
// while creating multiple inference engines with same model file
|
||||
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
@@ -88,6 +88,70 @@ class BaseBackend {
|
||||
FDERROR << "Clone no support" << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
benchmark::BenchmarkOption benchmark_option_;
|
||||
benchmark::BenchmarkResult benchmark_result_;
|
||||
};
|
||||
|
||||
/** \brief Macros for Runtime benchmark profiling.
|
||||
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
|
||||
* indicates that the least number of times the loop
|
||||
* will repeat when profiling mode is not enabled.
|
||||
* In most cases, the value should be 1, i.e., results are
|
||||
* obtained by running the inference process once, when
|
||||
* the profile mode is turned off, such as ONNX Runtime,
|
||||
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
|
||||
* RKNPU2, SOPHGO etc.
|
||||
*
|
||||
* example code @code
|
||||
* // OpenVINOBackend::Infer
|
||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
||||
* // do something ....
|
||||
* RUNTIME_PROFILE_LOOP_BEGIN(1)
|
||||
* // The codes which wrapped by 'BEGIN(1) ~ END' scope
|
||||
* // will only run once when profiling mode is not enabled.
|
||||
* request_.infer();
|
||||
* RUNTIME_PROFILE_LOOP_END
|
||||
* // do something ....
|
||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
||||
*
|
||||
* @endcode In this case, No global variables inside a function
|
||||
* are wrapped by BEGIN and END, which may be required for
|
||||
* subsequent tasks. But, some times we need to set 'base_loop'
|
||||
* as 0, such as POROS.
|
||||
*
|
||||
* * example code @code
|
||||
* // PorosBackend::Infer
|
||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
||||
* // do something ....
|
||||
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
|
||||
* // The codes which wrapped by 'BEGIN(0) ~ END' scope
|
||||
* // will not run when profiling mode is not enabled.
|
||||
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
||||
* RUNTIME_PROFILE_LOOP_END
|
||||
* // Run another inference beyond the scope of 'BEGIN ~ END'
|
||||
* // to get valid outputs for subsequent tasks.
|
||||
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
||||
* // do something .... will use 'poros_outputs' ...
|
||||
* if (poros_outputs.isTensor()) {
|
||||
* // ...
|
||||
* }
|
||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
||||
*
|
||||
* @endcode In this case, 'poros_outputs' inside a function
|
||||
* are wrapped by BEGIN and END, which may be required for
|
||||
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch
|
||||
* another infer to get the valid outputs beyond the scope
|
||||
* of 'BEGIN ~ END' for subsequent tasks.
|
||||
*/
|
||||
|
||||
#define RUNTIME_PROFILE_LOOP_BEGIN(base_loop) \
|
||||
__RUNTIME_PROFILE_LOOP_BEGIN(benchmark_option_, (base_loop))
|
||||
#define RUNTIME_PROFILE_LOOP_END \
|
||||
__RUNTIME_PROFILE_LOOP_END(benchmark_result_)
|
||||
#define RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN \
|
||||
__RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN(benchmark_option_, 1)
|
||||
#define RUNTIME_PROFILE_LOOP_H2D_D2H_END \
|
||||
__RUNTIME_PROFILE_LOOP_H2D_D2H_END(benchmark_result_)
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user