[Other] Optimize poros backend (#1331)

* Optimize poros backend

* Fix pybind error

---------

Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
Jason
2023-02-17 14:32:35 +08:00
committed by GitHub
parent ee41944f47
commit db471c3466
6 changed files with 66 additions and 45 deletions

View File

@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT); runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
runtime_option.UsePorosBackend(); runtime_option.UsePorosBackend();
runtime_option.UseGpu(0); runtime_option.UseGpu(0);
runtime_option.is_dynamic = true;
// Compile runtime // Compile runtime
std::unique_ptr<fd::Runtime> runtime = std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime()); std::unique_ptr<fd::Runtime>(new fd::Runtime());
runtime->Init(runtime_option);
if (!runtime->Compile(prewarm_datas, runtime_option)) { if (!runtime->Compile(prewarm_datas, runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! " std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl; << "\n--- Model: " << model_file << std::endl;
@@ -114,4 +116,4 @@ int main(int argc, char* argv[]) {
output_tensors[0].PrintInfo(); output_tensors[0].PrintInfo();
return 0; return 0;
} }

View File

@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
warm_datas[i][j].nbytes()); warm_datas[i][j].nbytes());
} }
} }
return self.Compile(warm_tensors, _option); return self.Compile(warm_tensors);
}) })
.def("infer", .def("infer",
[](Runtime& self, std::map<std::string, pybind11::array>& data) { [](Runtime& self, std::map<std::string, pybind11::array>& data) {

View File

@@ -58,7 +58,10 @@ class BaseBackend {
virtual bool Initialized() const { return initialized_; } virtual bool Initialized() const { return initialized_; }
virtual bool Init(const RuntimeOption& option) { virtual bool Init(const RuntimeOption& option) {
FDERROR << "Not Implement Yet." << std::endl; FDERROR << "Not Implement for "
<< option.backend << " in "
<< option.device << "."
<< std::endl;
return false; return false;
} }
@@ -89,59 +92,59 @@ class BaseBackend {
return nullptr; return nullptr;
} }
benchmark::BenchmarkOption benchmark_option_; benchmark::BenchmarkOption benchmark_option_;
benchmark::BenchmarkResult benchmark_result_; benchmark::BenchmarkResult benchmark_result_;
}; };
/** \brief Macros for Runtime benchmark profiling. /** \brief Macros for Runtime benchmark profiling.
* The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
* indicates that the least number of times the loop * indicates that the least number of times the loop
* will repeat when profiling mode is not enabled. * will repeat when profiling mode is not enabled.
* In most cases, the value should be 1, i.e., results are * In most cases, the value should be 1, i.e., results are
* obtained by running the inference process once, when * obtained by running the inference process once, when
* the profile mode is turned off, such as ONNX Runtime, * the profile mode is turned off, such as ONNX Runtime,
* OpenVINO, TensorRT, Paddle Inference, Paddle Lite, * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
* RKNPU2, SOPHGO etc. * RKNPU2, SOPHGO etc.
* *
* example code @code * example code @code
* // OpenVINOBackend::Infer * // OpenVINOBackend::Infer
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
* // do something .... * // do something ....
* RUNTIME_PROFILE_LOOP_BEGIN(1) * RUNTIME_PROFILE_LOOP_BEGIN(1)
* // The codes which wrapped by 'BEGIN(1) ~ END' scope * // The codes which wrapped by 'BEGIN(1) ~ END' scope
* // will only run once when profiling mode is not enabled. * // will only run once when profiling mode is not enabled.
* request_.infer(); * request_.infer();
* RUNTIME_PROFILE_LOOP_END * RUNTIME_PROFILE_LOOP_END
* // do something .... * // do something ....
* RUNTIME_PROFILE_LOOP_H2D_D2H_END * RUNTIME_PROFILE_LOOP_H2D_D2H_END
* *
* @endcode In this case, No global variables inside a function * @endcode In this case, No global variables inside a function
* are wrapped by BEGIN and END, which may be required for * are wrapped by BEGIN and END, which may be required for
* subsequent tasks. But, some times we need to set 'base_loop' * subsequent tasks. But, some times we need to set 'base_loop'
* as 0, such as POROS. * as 0, such as POROS.
* *
* * example code @code * * example code @code
* // PorosBackend::Infer * // PorosBackend::Infer
* RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
* // do something .... * // do something ....
* RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0 * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
* // The codes which wrapped by 'BEGIN(0) ~ END' scope * // The codes which wrapped by 'BEGIN(0) ~ END' scope
* // will not run when profiling mode is not enabled. * // will not run when profiling mode is not enabled.
* auto poros_outputs = _poros_module->forward(poros_inputs); * auto poros_outputs = _poros_module->forward(poros_inputs);
* RUNTIME_PROFILE_LOOP_END * RUNTIME_PROFILE_LOOP_END
* // Run another inference beyond the scope of 'BEGIN ~ END' * // Run another inference beyond the scope of 'BEGIN ~ END'
* // to get valid outputs for subsequent tasks. * // to get valid outputs for subsequent tasks.
* auto poros_outputs = _poros_module->forward(poros_inputs); * auto poros_outputs = _poros_module->forward(poros_inputs);
* // do something .... will use 'poros_outputs' ... * // do something .... will use 'poros_outputs' ...
* if (poros_outputs.isTensor()) { * if (poros_outputs.isTensor()) {
* // ... * // ...
* } * }
* RUNTIME_PROFILE_LOOP_H2D_D2H_END * RUNTIME_PROFILE_LOOP_H2D_D2H_END
* *
* @endcode In this case, 'poros_outputs' inside a function * @endcode In this case, 'poros_outputs' inside a function
* are wrapped by BEGIN and END, which may be required for * are wrapped by BEGIN and END, which may be required for
* subsequent tasks. So, we set 'base_loop' as 0 and lanuch * subsequent tasks. So, we set 'base_loop' as 0 and lanuch
* another infer to get the valid outputs beyond the scope * another infer to get the valid outputs beyond the scope
* of 'BEGIN ~ END' for subsequent tasks. * of 'BEGIN ~ END' for subsequent tasks.
*/ */

View File

@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {
void BuildOption(const PorosBackendOption& option); void BuildOption(const PorosBackendOption& option);
bool Init(const RuntimeOption& option) {
if (!(Supported(option.model_format, Backend::POROS)
&& Supported(option.device, Backend::POROS))) {
return false;
}
if (option.model_from_memory_) {
FDERROR << "Poros backend doesn't support load model "
<< "from memory, please load model from disk."
<< std::endl;
return false;
}
return true;
}
bool Compile(const std::string& model_file, bool Compile(const std::string& model_file,
std::vector<std::vector<FDTensor>>& prewarm_tensors, std::vector<std::vector<FDTensor>>& prewarm_tensors,
const PorosBackendOption& option = PorosBackendOption()); const PorosBackendOption& option = PorosBackendOption());

View File

@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
return runtime; return runtime;
} }
// only for poros backend void Runtime::CreatePorosBackend() {
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors, #ifdef ENABLE_POROS_BACKEND
const RuntimeOption& _option) { backend_ = utils::make_unique<PorosBackend>();
FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
<< "." << std::endl;
}
// only for poros backend
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
#ifdef ENABLE_POROS_BACKEND #ifdef ENABLE_POROS_BACKEND
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
if (option.device != Device::CPU && option.device != Device::GPU) {
FDERROR << "PorosBackend only supports CPU/GPU, but now its "
<< option.device << "." << std::endl;
return false;
}
option.poros_option.device = option.device; option.poros_option.device = option.device;
option.poros_option.device_id = option.device_id; option.poros_option.device_id = option.device_id;
option.poros_option.enable_fp16 = option.trt_option.enable_fp16; option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
option.poros_option.max_batch_size = option.trt_option.max_batch_size; option.poros_option.max_batch_size = option.trt_option.max_batch_size;
option.poros_option.max_workspace_size = option.trt_option.max_workspace_size; option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get()); auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT( FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, casted_backend->Compile(option.model_file, prewarm_tensors,

View File

@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
/** \brief Compile TorchScript Module, only for Poros backend /** \brief Compile TorchScript Module, only for Poros backend
* *
* \param[in] prewarm_tensors Prewarm datas for compile * \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false * \return true if compile successed, otherwise false
*/ */
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors, bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
const RuntimeOption& _option);
/** \brief Get profile time of Runtime after the profile process is done. /** \brief Get profile time of Runtime after the profile process is done.
*/ */
double GetProfileTime() { double GetProfileTime() {
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
void CreateLiteBackend(); void CreateLiteBackend();
void CreateRKNPU2Backend(); void CreateRKNPU2Backend();
void CreateSophgoNPUBackend(); void CreateSophgoNPUBackend();
void CreatePorosBackend();
std::unique_ptr<BaseBackend> backend_; std::unique_ptr<BaseBackend> backend_;
std::vector<FDTensor> input_tensors_; std::vector<FDTensor> input_tensors_;
std::vector<FDTensor> output_tensors_; std::vector<FDTensor> output_tensors_;