mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Other] Optimize poros backend (#1331)
* Optimize poros backend * Fix pybind error --------- Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
|
|||||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
|
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
|
||||||
runtime_option.UsePorosBackend();
|
runtime_option.UsePorosBackend();
|
||||||
runtime_option.UseGpu(0);
|
runtime_option.UseGpu(0);
|
||||||
runtime_option.is_dynamic = true;
|
|
||||||
|
|
||||||
// Compile runtime
|
// Compile runtime
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
std::unique_ptr<fd::Runtime> runtime =
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||||
|
|
||||||
|
runtime->Init(runtime_option);
|
||||||
|
|
||||||
if (!runtime->Compile(prewarm_datas, runtime_option)) {
|
if (!runtime->Compile(prewarm_datas, runtime_option)) {
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
<< "\n--- Model: " << model_file << std::endl;
|
||||||
@@ -114,4 +116,4 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
output_tensors[0].PrintInfo();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
|
|||||||
warm_datas[i][j].nbytes());
|
warm_datas[i][j].nbytes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return self.Compile(warm_tensors, _option);
|
return self.Compile(warm_tensors);
|
||||||
})
|
})
|
||||||
.def("infer",
|
.def("infer",
|
||||||
[](Runtime& self, std::map<std::string, pybind11::array>& data) {
|
[](Runtime& self, std::map<std::string, pybind11::array>& data) {
|
||||||
|
@@ -58,7 +58,10 @@ class BaseBackend {
|
|||||||
virtual bool Initialized() const { return initialized_; }
|
virtual bool Initialized() const { return initialized_; }
|
||||||
|
|
||||||
virtual bool Init(const RuntimeOption& option) {
|
virtual bool Init(const RuntimeOption& option) {
|
||||||
FDERROR << "Not Implement Yet." << std::endl;
|
FDERROR << "Not Implement for "
|
||||||
|
<< option.backend << " in "
|
||||||
|
<< option.device << "."
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,59 +92,59 @@ class BaseBackend {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
benchmark::BenchmarkOption benchmark_option_;
|
benchmark::BenchmarkOption benchmark_option_;
|
||||||
benchmark::BenchmarkResult benchmark_result_;
|
benchmark::BenchmarkResult benchmark_result_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Macros for Runtime benchmark profiling.
|
/** \brief Macros for Runtime benchmark profiling.
|
||||||
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
|
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
|
||||||
* indicates that the least number of times the loop
|
* indicates that the least number of times the loop
|
||||||
* will repeat when profiling mode is not enabled.
|
* will repeat when profiling mode is not enabled.
|
||||||
* In most cases, the value should be 1, i.e., results are
|
* In most cases, the value should be 1, i.e., results are
|
||||||
* obtained by running the inference process once, when
|
* obtained by running the inference process once, when
|
||||||
* the profile mode is turned off, such as ONNX Runtime,
|
* the profile mode is turned off, such as ONNX Runtime,
|
||||||
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
|
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
|
||||||
* RKNPU2, SOPHGO etc.
|
* RKNPU2, SOPHGO etc.
|
||||||
*
|
*
|
||||||
* example code @code
|
* example code @code
|
||||||
* // OpenVINOBackend::Infer
|
* // OpenVINOBackend::Infer
|
||||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
||||||
* // do something ....
|
* // do something ....
|
||||||
* RUNTIME_PROFILE_LOOP_BEGIN(1)
|
* RUNTIME_PROFILE_LOOP_BEGIN(1)
|
||||||
* // The codes which wrapped by 'BEGIN(1) ~ END' scope
|
* // The codes which wrapped by 'BEGIN(1) ~ END' scope
|
||||||
* // will only run once when profiling mode is not enabled.
|
* // will only run once when profiling mode is not enabled.
|
||||||
* request_.infer();
|
* request_.infer();
|
||||||
* RUNTIME_PROFILE_LOOP_END
|
* RUNTIME_PROFILE_LOOP_END
|
||||||
* // do something ....
|
* // do something ....
|
||||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
||||||
*
|
*
|
||||||
* @endcode In this case, No global variables inside a function
|
* @endcode In this case, No global variables inside a function
|
||||||
* are wrapped by BEGIN and END, which may be required for
|
* are wrapped by BEGIN and END, which may be required for
|
||||||
* subsequent tasks. But, some times we need to set 'base_loop'
|
* subsequent tasks. But, some times we need to set 'base_loop'
|
||||||
* as 0, such as POROS.
|
* as 0, such as POROS.
|
||||||
*
|
*
|
||||||
* * example code @code
|
* * example code @code
|
||||||
* // PorosBackend::Infer
|
* // PorosBackend::Infer
|
||||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
||||||
* // do something ....
|
* // do something ....
|
||||||
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
|
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
|
||||||
* // The codes which wrapped by 'BEGIN(0) ~ END' scope
|
* // The codes which wrapped by 'BEGIN(0) ~ END' scope
|
||||||
* // will not run when profiling mode is not enabled.
|
* // will not run when profiling mode is not enabled.
|
||||||
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
||||||
* RUNTIME_PROFILE_LOOP_END
|
* RUNTIME_PROFILE_LOOP_END
|
||||||
* // Run another inference beyond the scope of 'BEGIN ~ END'
|
* // Run another inference beyond the scope of 'BEGIN ~ END'
|
||||||
* // to get valid outputs for subsequent tasks.
|
* // to get valid outputs for subsequent tasks.
|
||||||
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
* auto poros_outputs = _poros_module->forward(poros_inputs);
|
||||||
* // do something .... will use 'poros_outputs' ...
|
* // do something .... will use 'poros_outputs' ...
|
||||||
* if (poros_outputs.isTensor()) {
|
* if (poros_outputs.isTensor()) {
|
||||||
* // ...
|
* // ...
|
||||||
* }
|
* }
|
||||||
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
* RUNTIME_PROFILE_LOOP_H2D_D2H_END
|
||||||
*
|
*
|
||||||
* @endcode In this case, 'poros_outputs' inside a function
|
* @endcode In this case, 'poros_outputs' inside a function
|
||||||
* are wrapped by BEGIN and END, which may be required for
|
* are wrapped by BEGIN and END, which may be required for
|
||||||
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch
|
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch
|
||||||
* another infer to get the valid outputs beyond the scope
|
* another infer to get the valid outputs beyond the scope
|
||||||
* of 'BEGIN ~ END' for subsequent tasks.
|
* of 'BEGIN ~ END' for subsequent tasks.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {
|
|||||||
|
|
||||||
void BuildOption(const PorosBackendOption& option);
|
void BuildOption(const PorosBackendOption& option);
|
||||||
|
|
||||||
|
bool Init(const RuntimeOption& option) {
|
||||||
|
if (!(Supported(option.model_format, Backend::POROS)
|
||||||
|
&& Supported(option.device, Backend::POROS))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (option.model_from_memory_) {
|
||||||
|
FDERROR << "Poros backend doesn't support load model "
|
||||||
|
<< "from memory, please load model from disk."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Compile(const std::string& model_file,
|
bool Compile(const std::string& model_file,
|
||||||
std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
const PorosBackendOption& option = PorosBackendOption());
|
const PorosBackendOption& option = PorosBackendOption());
|
||||||
|
@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
|
|||||||
return runtime;
|
return runtime;
|
||||||
}
|
}
|
||||||
|
|
||||||
// only for poros backend
|
void Runtime::CreatePorosBackend() {
|
||||||
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
const RuntimeOption& _option) {
|
backend_ = utils::make_unique<PorosBackend>();
|
||||||
|
FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"PorosBackend is not available, please compiled with "
|
||||||
|
"ENABLE_POROS_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// only for poros backend
|
||||||
|
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
|
||||||
#ifdef ENABLE_POROS_BACKEND
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
FDASSERT(
|
|
||||||
option.model_format == ModelFormat::TORCHSCRIPT,
|
|
||||||
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
|
||||||
if (option.device != Device::CPU && option.device != Device::GPU) {
|
|
||||||
FDERROR << "PorosBackend only supports CPU/GPU, but now its "
|
|
||||||
<< option.device << "." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
option.poros_option.device = option.device;
|
option.poros_option.device = option.device;
|
||||||
option.poros_option.device_id = option.device_id;
|
option.poros_option.device_id = option.device_id;
|
||||||
option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
|
option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
|
||||||
option.poros_option.max_batch_size = option.trt_option.max_batch_size;
|
option.poros_option.max_batch_size = option.trt_option.max_batch_size;
|
||||||
option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
|
option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
|
||||||
|
|
||||||
backend_ = utils::make_unique<PorosBackend>();
|
|
||||||
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
||||||
FDASSERT(
|
FDASSERT(
|
||||||
casted_backend->Compile(option.model_file, prewarm_tensors,
|
casted_backend->Compile(option.model_file, prewarm_tensors,
|
||||||
|
@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
/** \brief Compile TorchScript Module, only for Poros backend
|
/** \brief Compile TorchScript Module, only for Poros backend
|
||||||
*
|
*
|
||||||
* \param[in] prewarm_tensors Prewarm datas for compile
|
* \param[in] prewarm_tensors Prewarm datas for compile
|
||||||
* \param[in] _option Runtime option
|
|
||||||
* \return true if compile successed, otherwise false
|
* \return true if compile successed, otherwise false
|
||||||
*/
|
*/
|
||||||
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
|
||||||
const RuntimeOption& _option);
|
|
||||||
/** \brief Get profile time of Runtime after the profile process is done.
|
/** \brief Get profile time of Runtime after the profile process is done.
|
||||||
*/
|
*/
|
||||||
double GetProfileTime() {
|
double GetProfileTime() {
|
||||||
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
void CreateLiteBackend();
|
void CreateLiteBackend();
|
||||||
void CreateRKNPU2Backend();
|
void CreateRKNPU2Backend();
|
||||||
void CreateSophgoNPUBackend();
|
void CreateSophgoNPUBackend();
|
||||||
|
void CreatePorosBackend();
|
||||||
std::unique_ptr<BaseBackend> backend_;
|
std::unique_ptr<BaseBackend> backend_;
|
||||||
std::vector<FDTensor> input_tensors_;
|
std::vector<FDTensor> input_tensors_;
|
||||||
std::vector<FDTensor> output_tensors_;
|
std::vector<FDTensor> output_tensors_;
|
||||||
|
Reference in New Issue
Block a user