FastDeploy/fastdeploy/runtime/runtime.h

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/*! \file runtime.h
    \brief A brief file description.

    More details
 */

#pragma once
#include "fastdeploy/runtime/backends/backend.h"
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/runtime/runtime_option.h"
#include "fastdeploy/utils/perf.h"

/** \brief All C++ FastDeploy APIs are defined inside this namespace
*
*/
namespace fastdeploy {

/*! @brief Runtime object used to inference the loaded model on different devices
 */
struct FASTDEPLOY_DECL Runtime {
 public:
  /// Intialize a Runtime object with RuntimeOption
  bool Init(const RuntimeOption& _option);

  /** \brief Inference the model by the input data, and write to the output
   *
   * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
   * \param[in] output_tensors Inference results
   * \return true if the inference successed, otherwise false
   */
  bool Infer(std::vector<FDTensor>& input_tensors,
             std::vector<FDTensor>* output_tensors);

  /** \brief No params inference the model.
   *
   *  the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
   */
  bool Infer();

  /** \brief Get number of inputs
   */
  int NumInputs() { return backend_->NumInputs(); }
  /** \brief Get number of outputs
   */
  int NumOutputs() { return backend_->NumOutputs(); }
  /** \brief Get input information by index
   */
  TensorInfo GetInputInfo(int index);
  /** \brief Get output information by index
   */
  TensorInfo GetOutputInfo(int index);
  /** \brief Get all the input information
   */
  std::vector<TensorInfo> GetInputInfos();
  /** \brief Get all the output information
   */
  std::vector<TensorInfo> GetOutputInfos();
  /** \brief Bind FDTensor by name, no copy and share input memory
   */
  void BindInputTensor(const std::string& name, FDTensor& input);
  /** \brief Get output FDTensor by name, no copy and share backend output memory
   */
  FDTensor* GetOutputTensor(const std::string& name);

  /** \brief Clone new Runtime when multiple instances of the same model are created
   *
   * \param[in] stream CUDA Stream, defualt param is nullptr
   * \return new Runtime* by this clone
   */
  Runtime* Clone(void* stream = nullptr, int device_id = -1);

  void ReleaseModelMemoryBuffer();

  RuntimeOption option;

  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
  /** \brief Get profile time of Runtime after the profile process is done.
   */
  double GetProfileTime() {
    return backend_->benchmark_result_.time_of_runtime;
  }

 private:
  void CreateOrtBackend();
  void CreatePaddleBackend();
  void CreateTrtBackend();
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateSophgoNPUBackend();
  std::unique_ptr<BaseBackend> backend_;
  std::vector<FDTensor> input_tensors_;
  std::vector<FDTensor> output_tensors_;
};
}  // namespace fastdeploy