mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 11:56:44 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			453 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			453 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #include "fastdeploy/runtime/runtime.h"
 | |
| 
 | |
| #include "fastdeploy/utils/unique_ptr.h"
 | |
| #include "fastdeploy/utils/utils.h"
 | |
| 
 | |
| #ifdef ENABLE_ORT_BACKEND
 | |
| #include "fastdeploy/runtime/backends/ort/ort_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_TRT_BACKEND
 | |
| #include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_PADDLE_BACKEND
 | |
| #include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_POROS_BACKEND
 | |
| #include "fastdeploy/runtime/backends/poros/poros_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_OPENVINO_BACKEND
 | |
| #include "fastdeploy/runtime/backends/openvino/ov_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_LITE_BACKEND
 | |
| #include "fastdeploy/runtime/backends/lite/lite_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_RKNPU2_BACKEND
 | |
| #include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
 | |
| #endif
 | |
| 
 | |
| #ifdef ENABLE_SOPHGO_BACKEND
 | |
| #include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
 | |
| #endif
 | |
| 
 | |
| namespace fastdeploy {
 | |
| 
 | |
| bool AutoSelectBackend(RuntimeOption& option) {
 | |
|   auto iter0 = s_default_backends_by_format.find(option.model_format);
 | |
|   if (iter0 == s_default_backends_by_format.end()) {
 | |
|     FDERROR << "Cannot found a default backend for model format: "
 | |
|             << option.model_format
 | |
|             << ", please define the inference backend in RuntimeOption."
 | |
|             << std::endl;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   auto iter1 = s_default_backends_by_device.find(option.device);
 | |
|   if (iter1 == s_default_backends_by_device.end()) {
 | |
|     FDERROR << "Cannot found a default backend for device: " << option.device
 | |
|             << ", please define the inference backend in RuntimeOption."
 | |
|             << std::endl;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   std::vector<Backend> candidates;
 | |
|   for (const auto& b0 : iter0->second) {
 | |
|     for (const auto& b1 : iter1->second) {
 | |
|       if (b0 == b1) {
 | |
|         candidates.push_back(b0);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (candidates.size() == 0) {
 | |
|     FDERROR << "Cannot found availabel inference backends by model format: "
 | |
|             << option.model_format << " with device: " << option.device
 | |
|             << std::endl;
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   for (const auto& b : candidates) {
 | |
|     if (IsBackendAvailable(b)) {
 | |
|       option.backend = b;
 | |
|       FDINFO << "FastDeploy will choose " << b << " to inference this model."
 | |
|              << std::endl;
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
|   std::string debug_message = Str(candidates);
 | |
|   FDERROR << "The candiate backends for " << option.model_format << " & "
 | |
|           << option.device << " are " << debug_message
 | |
|           << ", but both of them have not been compiled with current "
 | |
|              "FastDeploy yet."
 | |
|           << std::endl;
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| bool Runtime::Init(const RuntimeOption& _option) {
 | |
|   option = _option;
 | |
|   // decrypt encrypted model
 | |
|   if ("" != option.encryption_key_) {
 | |
| #ifdef ENABLE_ENCRYPTION
 | |
|     if (option.model_from_memory_) {
 | |
|       option.model_file = Decrypt(option.model_file, option.encryption_key_);
 | |
|       if (!(option.params_file.empty())) {
 | |
|         option.params_file =
 | |
|             Decrypt(option.params_file, option.encryption_key_);
 | |
|       }
 | |
|     } else {
 | |
|       std::string model_buffer = "";
 | |
|       FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
 | |
|                "Fail to read binary from model file");
 | |
|       option.model_file = Decrypt(model_buffer, option.encryption_key_);
 | |
|       if (!(option.params_file.empty())) {
 | |
|         std::string params_buffer = "";
 | |
|         FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
 | |
|                  "Fail to read binary from parameter file");
 | |
|         option.params_file = Decrypt(params_buffer, option.encryption_key_);
 | |
|       }
 | |
|       option.model_from_memory_ = true;
 | |
|     }
 | |
| #else
 | |
|     FDERROR << "The FastDeploy didn't compile with encryption function."
 | |
|             << std::endl;
 | |
| #endif
 | |
|   }
 | |
|   // Choose default backend by model format and device if backend is not
 | |
|   // specified
 | |
|   if (option.backend == Backend::UNKNOWN) {
 | |
|     if (!AutoSelectBackend(option)) {
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (option.backend == Backend::ORT) {
 | |
|     CreateOrtBackend();
 | |
|   } else if (option.backend == Backend::TRT) {
 | |
|     CreateTrtBackend();
 | |
|   } else if (option.backend == Backend::PDINFER) {
 | |
|     CreatePaddleBackend();
 | |
|   } else if (option.backend == Backend::OPENVINO) {
 | |
|     CreateOpenVINOBackend();
 | |
|   } else if (option.backend == Backend::LITE) {
 | |
|     CreateLiteBackend();
 | |
|   } else if (option.backend == Backend::RKNPU2) {
 | |
|     CreateRKNPU2Backend();
 | |
|   } else if (option.backend == Backend::SOPHGOTPU) {
 | |
|     CreateSophgoNPUBackend();
 | |
|   } else if (option.backend == Backend::POROS) {
 | |
|     FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
 | |
|              "Backend::POROS only supports Device::CPU/Device::GPU.");
 | |
|     FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
 | |
|              "Backend::POROS only supports model format of "
 | |
|              "ModelFormat::TORCHSCRIPT.");
 | |
|     FDINFO << "Runtime initialized with Backend::POROS in " << option.device
 | |
|            << "." << std::endl;
 | |
|     return true;
 | |
|   } else {
 | |
|     FDERROR << "Runtime only support "
 | |
|                "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
 | |
|                "backend now."
 | |
|             << std::endl;
 | |
|     return false;
 | |
|   }
 | |
|   backend_->benchmark_option_ = option.benchmark_option;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| TensorInfo Runtime::GetInputInfo(int index) {
 | |
|   return backend_->GetInputInfo(index);
 | |
| }
 | |
| 
 | |
| TensorInfo Runtime::GetOutputInfo(int index) {
 | |
|   return backend_->GetOutputInfo(index);
 | |
| }
 | |
| 
 | |
| std::vector<TensorInfo> Runtime::GetInputInfos() {
 | |
|   return backend_->GetInputInfos();
 | |
| }
 | |
| 
 | |
| std::vector<TensorInfo> Runtime::GetOutputInfos() {
 | |
|   return backend_->GetOutputInfos();
 | |
| }
 | |
| 
 | |
| bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
 | |
|                     std::vector<FDTensor>* output_tensors) {
 | |
|   for (auto& tensor : input_tensors) {
 | |
|     FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
 | |
|              "Device id of input tensor(%d) and runtime(%d) are not same.",
 | |
|              tensor.device_id, option.device_id);
 | |
|   }
 | |
|   return backend_->Infer(input_tensors, output_tensors);
 | |
| }
 | |
| 
 | |
| bool Runtime::Infer() {
 | |
|   bool result = backend_->Infer(input_tensors_, &output_tensors_, false);
 | |
|   for (auto& tensor : output_tensors_) {
 | |
|     tensor.device_id = option.device_id;
 | |
|   }
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
 | |
|   bool is_exist = false;
 | |
|   for (auto& t : input_tensors_) {
 | |
|     if (t.name == name) {
 | |
|       is_exist = true;
 | |
|       t.SetExternalData(input.shape, input.dtype, input.MutableData(),
 | |
|                         input.device, input.device_id);
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   if (!is_exist) {
 | |
|     FDTensor new_tensor(name);
 | |
|     new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
 | |
|                                input.device, input.device_id);
 | |
|     input_tensors_.emplace_back(std::move(new_tensor));
 | |
|   }
 | |
| }
 | |
| 
 | |
| void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
 | |
|   bool is_exist = false;
 | |
|   for (auto& t : output_tensors_) {
 | |
|     if (t.name == name) {
 | |
|       FDINFO << "The output name [" << name << "] is exist." << std::endl;
 | |
|       is_exist = true;
 | |
|       t.SetExternalData(output.shape, output.dtype, output.MutableData(),
 | |
|                         output.device, output.device_id);
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   if (!is_exist) {
 | |
|     FDINFO << "The output name [" << name << "] is prebinded added into output tensor list." << std::endl;
 | |
|     FDTensor new_tensor(name);
 | |
|     new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
 | |
|                                output.device, output.device_id);
 | |
|     output_tensors_.emplace_back(std::move(new_tensor));
 | |
|   }
 | |
| }
 | |
| FDTensor* Runtime::GetOutputTensor(const std::string& name) {
 | |
|   for (auto& t : output_tensors_) {
 | |
|     if (t.name == name) {
 | |
|       return &t;
 | |
|     }
 | |
|   }
 | |
|   FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
 | |
|   return nullptr;
 | |
| }
 | |
| 
 | |
| void Runtime::ReleaseModelMemoryBuffer() {
 | |
|   if (option.model_from_memory_) {
 | |
|     option.model_file.clear();
 | |
|     option.model_file.shrink_to_fit();
 | |
|     option.params_file.clear();
 | |
|     option.params_file.shrink_to_fit();
 | |
|   }
 | |
| }
 | |
| 
 | |
| void Runtime::CreatePaddleBackend() {
 | |
|   FDASSERT(
 | |
|       option.device == Device::CPU || option.device == Device::GPU ||
 | |
|           option.device == Device::IPU,
 | |
|       "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
 | |
|   FDASSERT(
 | |
|       option.model_format == ModelFormat::PADDLE,
 | |
|       "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
 | |
| #ifdef ENABLE_PADDLE_BACKEND
 | |
|   option.paddle_infer_option.model_file = option.model_file;
 | |
|   option.paddle_infer_option.params_file = option.params_file;
 | |
|   option.paddle_infer_option.model_from_memory_ = option.model_from_memory_;
 | |
|   option.paddle_infer_option.device = option.device;
 | |
|   option.paddle_infer_option.device_id = option.device_id;
 | |
|   option.paddle_infer_option.enable_pinned_memory = option.enable_pinned_memory;
 | |
|   option.paddle_infer_option.external_stream_ = option.external_stream_;
 | |
|   option.paddle_infer_option.trt_option = option.trt_option;
 | |
|   option.paddle_infer_option.trt_option.gpu_id = option.device_id;
 | |
|   backend_ = utils::make_unique<PaddleBackend>();
 | |
|   auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
 | |
| 
 | |
|   if (option.model_from_memory_) {
 | |
|     FDASSERT(
 | |
|         casted_backend->InitFromPaddle(option.model_file, option.params_file,
 | |
|                                        option.paddle_infer_option),
 | |
|         "Load model from Paddle failed while initliazing PaddleBackend.");
 | |
|     ReleaseModelMemoryBuffer();
 | |
|   } else {
 | |
|     std::string model_buffer = "";
 | |
|     std::string params_buffer = "";
 | |
|     FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
 | |
|              "Fail to read binary from model file");
 | |
|     FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
 | |
|              "Fail to read binary from parameter file");
 | |
|     FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
 | |
|                                             option.paddle_infer_option),
 | |
|              "Load model from Paddle failed while initliazing PaddleBackend.");
 | |
|   }
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "PaddleBackend is not available, please compiled with "
 | |
|            "ENABLE_PADDLE_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
 | |
|          << "." << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateOpenVINOBackend() {
 | |
| #ifdef ENABLE_OPENVINO_BACKEND
 | |
|   backend_ = utils::make_unique<OpenVINOBackend>();
 | |
|   FDASSERT(backend_->Init(option), "Failed to initialize OpenVINOBackend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "OpenVINOBackend is not available, please compiled with "
 | |
|            "ENABLE_OPENVINO_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
 | |
|          << "." << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateOrtBackend() {
 | |
| #ifdef ENABLE_ORT_BACKEND
 | |
|   backend_ = utils::make_unique<OrtBackend>();
 | |
| 
 | |
|   FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "OrtBackend is not available, please compiled with "
 | |
|            "ENABLE_ORT_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
 | |
|          << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateTrtBackend() {
 | |
| #ifdef ENABLE_TRT_BACKEND
 | |
|   option.trt_option.model_file = option.model_file;
 | |
|   option.trt_option.params_file = option.params_file;
 | |
|   option.trt_option.model_format = option.model_format;
 | |
|   option.trt_option.gpu_id = option.device_id;
 | |
|   option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
 | |
|   option.trt_option.external_stream_ = option.external_stream_;
 | |
|   backend_ = utils::make_unique<TrtBackend>();
 | |
|   FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "TrtBackend is not available, please compiled with "
 | |
|            "ENABLE_TRT_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
 | |
|          << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateLiteBackend() {
 | |
| #ifdef ENABLE_LITE_BACKEND
 | |
|   backend_ = utils::make_unique<LiteBackend>();
 | |
| 
 | |
|   FDASSERT(backend_->Init(option),
 | |
|            "Load model from nb file failed while initializing LiteBackend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "LiteBackend is not available, please compiled with "
 | |
|            "ENABLE_LITE_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::PDLITE in " << option.device
 | |
|          << "." << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateRKNPU2Backend() {
 | |
| #ifdef ENABLE_RKNPU2_BACKEND
 | |
|   backend_ = utils::make_unique<RKNPU2Backend>();
 | |
|   FDASSERT(backend_->Init(option), "Failed to initialize RKNPU2 backend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "RKNPU2Backend is not available, please compiled with "
 | |
|            "ENABLE_RKNPU2_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
 | |
|          << "." << std::endl;
 | |
| }
 | |
| 
 | |
| void Runtime::CreateSophgoNPUBackend() {
 | |
| #ifdef ENABLE_SOPHGO_BACKEND
 | |
|   backend_ = utils::make_unique<SophgoBackend>();
 | |
|   FDASSERT(backend_->Init(option), "Failed to initialize Sophgo backend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "SophgoBackend is not available, please compiled with "
 | |
|            "ENABLE_SOPHGO_BACKEND=ON.");
 | |
| #endif
 | |
|   FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
 | |
|          << "." << std::endl;
 | |
| }
 | |
| 
 | |
| Runtime* Runtime::Clone(void* stream, int device_id) {
 | |
|   Runtime* runtime = new Runtime();
 | |
|   if (option.backend != Backend::OPENVINO &&
 | |
|       option.backend != Backend::PDINFER) {
 | |
|     runtime->Init(option);
 | |
|     FDWARNING << "Only OpenVINO/Paddle Inference support \
 | |
|                   clone engine to  reduce CPU/GPU memory usage now. For "
 | |
|               << option.backend
 | |
|               << ", FastDeploy will create a new engine which \
 | |
|                   will not share memory  with the current runtime."
 | |
|               << std::endl;
 | |
|     return runtime;
 | |
|   }
 | |
|   FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
 | |
|          << option.device << "." << std::endl;
 | |
|   runtime->option = option;
 | |
|   runtime->backend_ = backend_->Clone(option, stream, device_id);
 | |
|   return runtime;
 | |
| }
 | |
| 
 | |
| // only for poros backend
 | |
| bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
 | |
|                       const RuntimeOption& _option) {
 | |
| #ifdef ENABLE_POROS_BACKEND
 | |
|   FDASSERT(
 | |
|       option.model_format == ModelFormat::TORCHSCRIPT,
 | |
|       "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
 | |
|   if (option.device != Device::CPU && option.device != Device::GPU) {
 | |
|     FDERROR << "PorosBackend only supports CPU/GPU, but now its "
 | |
|             << option.device << "." << std::endl;
 | |
|     return false;
 | |
|   }
 | |
|   option.poros_option.device = option.device;
 | |
|   option.poros_option.device_id = option.device_id;
 | |
|   option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
 | |
|   option.poros_option.max_batch_size = option.trt_option.max_batch_size;
 | |
|   option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
 | |
| 
 | |
|   backend_ = utils::make_unique<PorosBackend>();
 | |
|   auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
 | |
|   FDASSERT(
 | |
|       casted_backend->Compile(option.model_file, prewarm_tensors,
 | |
|                               option.poros_option),
 | |
|       "Load model from Torchscript failed while initliazing PorosBackend.");
 | |
| #else
 | |
|   FDASSERT(false,
 | |
|            "PorosBackend is not available, please compiled with "
 | |
|            "ENABLE_POROS_BACKEND=ON.");
 | |
| #endif
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| }  // namespace fastdeploy
 | 
