FastDeploy/fastdeploy/fastdeploy_model.cc

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/utils/utils.h"

namespace fastdeploy {

bool FastDeployModel::InitRuntime() {
  FDASSERT(
      CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
      "ModelFormatCheck Failed.");
  if (runtime_initialized_) {
    FDERROR << "The model is already initialized, cannot be initliazed again."
            << std::endl;
    return false;
  }
  if (runtime_option.backend != Backend::UNKNOWN) {
    if (!IsBackendAvailable(runtime_option.backend)) {
      FDERROR << Str(runtime_option.backend)
              << " is not compiled with current FastDeploy library."
              << std::endl;
      return false;
    }

    bool use_gpu = (runtime_option.device == Device::GPU);
    bool use_ipu = (runtime_option.device == Device::IPU);
#ifndef WITH_GPU
    use_gpu = false;
#endif
#ifndef WITH_IPU
    use_ipu = false;
#endif
    bool use_rknpu = (runtime_option.device == Device::RKNPU);
    bool use_timvx = (runtime_option.device == Device::TIMVX);

    // whether the model is supported by the setted backend
    bool is_supported = false;
    if (use_gpu) {
      for (auto& item : valid_gpu_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    } else if (use_rknpu) {
      for (auto& item : valid_rknpu_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    } else if (use_timvx) {
      for (auto& item : valid_timvx_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    }else if(use_ipu) {
      for (auto& item : valid_ipu_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    } else {
      for (auto& item : valid_cpu_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    }

    if (is_supported) {
      runtime_ = std::shared_ptr<Runtime>(new Runtime());
      if (!runtime_->Init(runtime_option)) {
        return false;
      }
      runtime_initialized_ = true;
      return true;
    } else {
      FDWARNING << ModelName() << " is not supported with backend "
                << Str(runtime_option.backend) << "." << std::endl;
      if (use_gpu) {
        FDASSERT(valid_gpu_backends.size() > 0,
                 "There's no valid gpu backend for %s.", ModelName().c_str());
        FDWARNING << "FastDeploy will choose " << Str(valid_gpu_backends[0])
                  << " for model inference." << std::endl;
      } else {
        FDASSERT(valid_cpu_backends.size() > 0,
                 "There's no valid cpu backend for %s.", ModelName().c_str());
        FDWARNING << "FastDeploy will choose " << Str(valid_cpu_backends[0])
                  << " for model inference." << std::endl;
      }
    }
  }

  if (runtime_option.device == Device::CPU) {
    return CreateCpuBackend();
  } else if (runtime_option.device == Device::GPU) {
#ifdef WITH_GPU
    return CreateGpuBackend();
#else
    FDERROR << "The compiled FastDeploy library doesn't support GPU now."
            << std::endl;
    return false;
#endif
  } else if (runtime_option.device == Device::RKNPU) {
    return CreateRKNPUBackend();
  } else if (runtime_option.device == Device::TIMVX) {
    return CreateTimVXBackend();
  } else if (runtime_option.device == Device::IPU) {
#ifdef WITH_IPU
    return CreateIpuBackend();
#else
    FDERROR << "The compiled FastDeploy library doesn't support IPU now."
            << std::endl;
    return false;
#endif
  }
  FDERROR << "Only support CPU/GPU/NPU now." << std::endl;
  return false;
}

bool FastDeployModel::CreateCpuBackend() {
  if (valid_cpu_backends.size() == 0) {
    FDERROR << "There's no valid cpu backends for model: " << ModelName()
            << std::endl;
    return false;
  }

  for (size_t i = 0; i < valid_cpu_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_cpu_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_cpu_backends[i];
    runtime_ = std::shared_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  return false;
}

bool FastDeployModel::CreateGpuBackend() {
  if (valid_gpu_backends.empty()) {
    FDERROR << "There's no valid gpu backends for model: " << ModelName()
            << std::endl;
    return false;
  }

  for (size_t i = 0; i < valid_gpu_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_gpu_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_gpu_backends[i];
    runtime_ = std::shared_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Cannot find an available gpu backend to load this model."
          << std::endl;
  return false;
}

bool FastDeployModel::CreateRKNPUBackend() {
  if (valid_rknpu_backends.empty()) {
    FDERROR << "There's no valid npu backends for model: " << ModelName()
            << std::endl;
    return false;
  }

  for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_rknpu_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_rknpu_backends[i];
    runtime_ = std::unique_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Cannot find an available npu backend to load this model."
          << std::endl;
  return false;
}

bool FastDeployModel::CreateTimVXBackend() {
  if (valid_timvx_backends.size() == 0) {
    FDERROR << "There's no valid timvx backends for model: " << ModelName()
            << std::endl;
    return false;
  }

  for (size_t i = 0; i < valid_timvx_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_timvx_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_timvx_backends[i];
    runtime_ = std::unique_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  return false;
}

bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
            << std::endl;
    return false;
  }

  for (size_t i = 0; i < valid_ipu_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_ipu_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_ipu_backends[i];
    runtime_ = std::unique_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
  return false;
}

bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
                            std::vector<FDTensor>* output_tensors) {
  TimeCounter tc;
  if (enable_record_time_of_runtime_) {
    tc.Start();
  }
  auto ret = runtime_->Infer(input_tensors, output_tensors);
  if (enable_record_time_of_runtime_) {
    tc.End();
    if (time_of_runtime_.size() > 50000) {
      FDWARNING << "There are already 50000 records of runtime, will force to "
                   "disable record time of runtime now."
                << std::endl;
      enable_record_time_of_runtime_ = false;
    }
    time_of_runtime_.push_back(tc.Duration());
  }
  return ret;
}

bool FastDeployModel::Infer() {
  return Infer(reused_input_tensors_, &reused_output_tensors_);
}

std::map<std::string, float> FastDeployModel::PrintStatisInfoOfRuntime() {
  std::map<std::string, float> statis_info_of_runtime_dict;

  if (time_of_runtime_.size() < 10) {
    FDWARNING << "PrintStatisInfoOfRuntime require the runtime ran 10 times at "
                 "least, but now you only ran "
              << time_of_runtime_.size() << " times." << std::endl;
  }
  double warmup_time = 0.0;
  double remain_time = 0.0;
  int warmup_iter = time_of_runtime_.size() / 5;
  for (size_t i = 0; i < time_of_runtime_.size(); ++i) {
    if (i < warmup_iter) {
      warmup_time += time_of_runtime_[i];
    } else {
      remain_time += time_of_runtime_[i];
    }
  }
  double avg_time = remain_time / (time_of_runtime_.size() - warmup_iter);
  std::cout << "============= Runtime Statis Info(" << ModelName()
            << ") =============" << std::endl;
  std::cout << "Total iterations: " << time_of_runtime_.size() << std::endl;
  std::cout << "Total time of runtime: " << warmup_time + remain_time << "s."
            << std::endl;
  std::cout << "Warmup iterations: " << warmup_iter << std::endl;
  std::cout << "Total time of runtime in warmup step: " << warmup_time << "s."
            << std::endl;
  std::cout << "Average time of runtime exclude warmup step: "
            << avg_time * 1000 << "ms." << std::endl;

  statis_info_of_runtime_dict["total_time"] = warmup_time + remain_time;
  statis_info_of_runtime_dict["warmup_time"] = warmup_time;
  statis_info_of_runtime_dict["remain_time"] = remain_time;
  statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
  statis_info_of_runtime_dict["avg_time"] = avg_time;
  statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
  return statis_info_of_runtime_dict;
}
}  // namespace fastdeploy