// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/runtime/runtime.h" #include "fastdeploy/utils/unique_ptr.h" #include "fastdeploy/utils/utils.h" namespace fastdeploy { std::vector GetAvailableBackends() { std::vector backends; #ifdef ENABLE_ORT_BACKEND backends.push_back(Backend::ORT); #endif #ifdef ENABLE_TRT_BACKEND backends.push_back(Backend::TRT); #endif #ifdef ENABLE_PADDLE_BACKEND backends.push_back(Backend::PDINFER); #endif #ifdef ENABLE_POROS_BACKEND backends.push_back(Backend::POROS); #endif #ifdef ENABLE_OPENVINO_BACKEND backends.push_back(Backend::OPENVINO); #endif #ifdef ENABLE_LITE_BACKEND backends.push_back(Backend::LITE); #endif #ifdef ENABLE_RKNPU2_BACKEND backends.push_back(Backend::RKNPU2); #endif #ifdef ENABLE_SOPHGO_BACKEND backends.push_back(Backend::SOPHGOTPU); #endif return backends; } bool IsBackendAvailable(const Backend& backend) { std::vector backends = GetAvailableBackends(); for (size_t i = 0; i < backends.size(); ++i) { if (backend == backends[i]) { return true; } } return false; } bool CheckModelFormat(const std::string& model_file, const ModelFormat& model_format) { if (model_format == ModelFormat::PADDLE) { if (model_file.size() < 8 || model_file.substr(model_file.size() - 8, 8) != ".pdmodel") { FDERROR << "With model format of ModelFormat::PADDLE, the model file " "should ends with `.pdmodel`, but now it's " << model_file << std::endl; return false; } } else if (model_format == ModelFormat::ONNX) { if (model_file.size() < 5 || model_file.substr(model_file.size() - 5, 5) != ".onnx") { FDERROR << "With model format of ModelFormat::ONNX, the model file " "should ends with `.onnx`, but now it's " << model_file << std::endl; return false; } } else if (model_format == ModelFormat::RKNN) { if (model_file.size() < 5 || model_file.substr(model_file.size() - 5, 5) != ".rknn") { FDERROR << "With model format of ModelFormat::RKNN, the model file " "should ends with `.rknn`, but now it's " << model_file << std::endl; return false; } } else if (model_format == ModelFormat::TORCHSCRIPT) { if (model_file.size() < 3 || model_file.substr(model_file.size() - 3, 3) != ".pt") { FDERROR << "With model format of ModelFormat::TORCHSCRIPT, the model file " "should ends with `.pt`, but now it's " << model_file << std::endl; return false; } } else if (model_format == ModelFormat::SOPHGO) { if (model_file.size() < 7 || model_file.substr(model_file.size() - 7, 7) != ".bmodel") { FDERROR << "With model format of ModelFormat::SOPHGO, the model file " "should ends with `.bmodel`, but now it's " << model_file << std::endl; return false; } } else { FDERROR << "Only support model format with frontend ModelFormat::PADDLE / " "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT." << std::endl; return false; } return true; } ModelFormat GuessModelFormat(const std::string& model_file) { if (model_file.size() > 8 && model_file.substr(model_file.size() - 8, 8) == ".pdmodel") { FDINFO << "Model Format: PaddlePaddle." << std::endl; return ModelFormat::PADDLE; } else if (model_file.size() > 5 && model_file.substr(model_file.size() - 5, 5) == ".onnx") { FDINFO << "Model Format: ONNX." << std::endl; return ModelFormat::ONNX; } else if (model_file.size() > 3 && model_file.substr(model_file.size() - 3, 3) == ".pt") { FDINFO << "Model Format: Torchscript." << std::endl; return ModelFormat::TORCHSCRIPT; } else if (model_file.size() > 5 && model_file.substr(model_file.size() - 5, 5) == ".rknn") { FDINFO << "Model Format: RKNN." << std::endl; return ModelFormat::RKNN; } else if (model_file.size() > 7 && model_file.substr(model_file.size() - 7, 7) == ".bmodel") { FDINFO << "Model Format: SOPHGO." << std::endl; return ModelFormat::SOPHGO; } FDERROR << "Cannot guess which model format you are using, please set " "RuntimeOption::model_format manually." << std::endl; return ModelFormat::PADDLE; } void RuntimeOption::SetModelPath(const std::string& model_path, const std::string& params_path, const ModelFormat& format) { if (format == ModelFormat::PADDLE) { model_file = model_path; params_file = params_path; model_format = ModelFormat::PADDLE; } else if (format == ModelFormat::ONNX) { model_file = model_path; model_format = ModelFormat::ONNX; } else if (format == ModelFormat::TORCHSCRIPT) { model_file = model_path; model_format = ModelFormat::TORCHSCRIPT; } else { FDASSERT(false, "The model format only can be " "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); } } void RuntimeOption::SetModelBuffer(const char* model_buffer, size_t model_buffer_size, const char* params_buffer, size_t params_buffer_size, const ModelFormat& format) { model_buffer_size_ = model_buffer_size; params_buffer_size_ = params_buffer_size; model_from_memory_ = true; if (format == ModelFormat::PADDLE) { model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size); model_format = ModelFormat::PADDLE; } else if (format == ModelFormat::ONNX) { model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); model_format = ModelFormat::ONNX; } else if (format == ModelFormat::TORCHSCRIPT) { model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); model_format = ModelFormat::TORCHSCRIPT; } else { FDASSERT(false, "The model format only can be " "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); } } void RuntimeOption::UseGpu(int gpu_id) { #ifdef WITH_GPU device = Device::GPU; device_id = gpu_id; #else FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU." << std::endl; device = Device::CPU; #endif } void RuntimeOption::UseCpu() { device = Device::CPU; } void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, fastdeploy::rknpu2::CoreMask rknpu2_core) { rknpu2_cpu_name_ = rknpu2_name; rknpu2_core_mask_ = rknpu2_core; device = Device::RKNPU; } void RuntimeOption::UseTimVX() { enable_timvx = true; device = Device::TIMVX; } void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, bool locked, bool autotune, const std::string& autotune_file, const std::string& precision, bool adaptive_seqlen, bool enable_multi_stream) { enable_kunlunxin = true; device_id = kunlunxin_id; kunlunxin_l3_workspace_size = l3_workspace_size; kunlunxin_locked = locked; kunlunxin_autotune = autotune; kunlunxin_autotune_file = autotune_file; kunlunxin_precision = precision; kunlunxin_adaptive_seqlen = adaptive_seqlen; kunlunxin_enable_multi_stream = enable_multi_stream; device = Device::KUNLUNXIN; } void RuntimeOption::UseAscend() { enable_ascend = true; device = Device::ASCEND; } void RuntimeOption::UseSophgo() { device = Device::SOPHGOTPUD; UseSophgoBackend(); } void RuntimeOption::SetExternalStream(void* external_stream) { external_stream_ = external_stream; } void RuntimeOption::SetCpuThreadNum(int thread_num) { FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); cpu_thread_num = thread_num; } void RuntimeOption::SetOrtGraphOptLevel(int level) { std::vector supported_level{-1, 0, 1, 2}; auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end(); FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); ort_graph_opt_level = level; } // use paddle inference backend void RuntimeOption::UsePaddleBackend() { #ifdef ENABLE_PADDLE_BACKEND backend = Backend::PDINFER; #else FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference."); #endif } // use onnxruntime backend void RuntimeOption::UseOrtBackend() { #ifdef ENABLE_ORT_BACKEND backend = Backend::ORT; #else FDASSERT(false, "The FastDeploy didn't compile with OrtBackend."); #endif } // use sophgoruntime backend void RuntimeOption::UseSophgoBackend() { #ifdef ENABLE_SOPHGO_BACKEND backend = Backend::SOPHGOTPU; #else FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend."); #endif } // use poros backend void RuntimeOption::UsePorosBackend() { #ifdef ENABLE_POROS_BACKEND backend = Backend::POROS; #else FDASSERT(false, "The FastDeploy didn't compile with PorosBackend."); #endif } void RuntimeOption::UseTrtBackend() { #ifdef ENABLE_TRT_BACKEND backend = Backend::TRT; #else FDASSERT(false, "The FastDeploy didn't compile with TrtBackend."); #endif } void RuntimeOption::UseOpenVINOBackend() { #ifdef ENABLE_OPENVINO_BACKEND backend = Backend::OPENVINO; #else FDASSERT(false, "The FastDeploy didn't compile with OpenVINO."); #endif } void RuntimeOption::UseLiteBackend() { #ifdef ENABLE_LITE_BACKEND backend = Backend::LITE; #else FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite."); #endif } void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) { pd_enable_mkldnn = pd_mkldnn; } void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) { pd_delete_pass_names.push_back(pass_name); } void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; } void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; } void RuntimeOption::EnablePaddleToTrt() { FDASSERT(backend == Backend::TRT, "Should call UseTrtBackend() before call EnablePaddleToTrt()."); #ifdef ENABLE_PADDLE_BACKEND FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will " "change to use Paddle Inference Backend." << std::endl; backend = Backend::PDINFER; pd_enable_trt = true; #else FDASSERT(false, "While using TrtBackend with EnablePaddleToTrt, require the " "FastDeploy is compiled with Paddle Inference Backend, " "please rebuild your FastDeploy."); #endif } void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { FDASSERT(size > 0, "Parameter size must greater than 0."); pd_mkldnn_cache_size = size; } void RuntimeOption::SetOpenVINODevice(const std::string& name) { openvino_device = name; } void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; } void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; } void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; } void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; } void RuntimeOption::SetLitePowerMode(LitePowerMode mode) { lite_power_mode = mode; } void RuntimeOption::SetLiteOptimizedModelDir( const std::string& optimized_model_dir) { lite_optimized_model_dir = optimized_model_dir; } void RuntimeOption::SetLiteSubgraphPartitionPath( const std::string& nnadapter_subgraph_partition_config_path) { lite_nnadapter_subgraph_partition_config_path = nnadapter_subgraph_partition_config_path; } void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer( const std::string& nnadapter_subgraph_partition_config_buffer) { lite_nnadapter_subgraph_partition_config_buffer = nnadapter_subgraph_partition_config_buffer; } void RuntimeOption::SetLiteDeviceNames( const std::vector& nnadapter_device_names) { lite_nnadapter_device_names = nnadapter_device_names; } void RuntimeOption::SetLiteContextProperties( const std::string& nnadapter_context_properties) { lite_nnadapter_context_properties = nnadapter_context_properties; } void RuntimeOption::SetLiteModelCacheDir( const std::string& nnadapter_model_cache_dir) { lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir; } void RuntimeOption::SetLiteDynamicShapeInfo( const std::map>>& nnadapter_dynamic_shape_info) { lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; } void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath( const std::string& nnadapter_mixed_precision_quantization_config_path) { lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path; } void RuntimeOption::SetTrtInputShape(const std::string& input_name, const std::vector& min_shape, const std::vector& opt_shape, const std::vector& max_shape) { trt_min_shape[input_name].clear(); trt_max_shape[input_name].clear(); trt_opt_shape[input_name].clear(); trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end()); if (opt_shape.size() == 0) { trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end()); } else { trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end()); } if (max_shape.size() == 0) { trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end()); } else { trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end()); } } void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) { trt_max_workspace_size = max_workspace_size; } void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) { trt_max_batch_size = max_batch_size; } void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; } void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; } void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; } void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) { trt_serialize_file = cache_file_path; } void RuntimeOption::SetOpenVINOStreams(int num_streams) { ov_num_streams = num_streams; } bool Runtime::Compile(std::vector>& prewarm_tensors, const RuntimeOption& _option) { #ifdef ENABLE_POROS_BACKEND option = _option; auto poros_option = PorosBackendOption(); poros_option.use_gpu = (option.device == Device::GPU) ? true : false; poros_option.gpu_id = option.device_id; poros_option.long_to_int = option.long_to_int; poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; poros_option.unconst_ops_thres = option.unconst_ops_thres; poros_option.poros_file = option.poros_file; poros_option.is_dynamic = option.is_dynamic; poros_option.enable_fp16 = option.trt_enable_fp16; poros_option.max_batch_size = option.trt_max_batch_size; poros_option.max_workspace_size = option.trt_max_workspace_size; FDASSERT( option.model_format == ModelFormat::TORCHSCRIPT, "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); backend_ = utils::make_unique(); auto casted_backend = dynamic_cast(backend_.get()); FDASSERT( casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), "Load model from Torchscript failed while initliazing PorosBackend."); #else FDASSERT(false, "PorosBackend is not available, please compiled with " "ENABLE_POROS_BACKEND=ON."); #endif return true; } void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; } void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; } void RuntimeOption::DisablePaddleTrtOPs(const std::vector& ops) { trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end()); } void RuntimeOption::UseIpu(int device_num, int micro_batch_size, bool enable_pipelining, int batches_per_step) { #ifdef WITH_IPU device = Device::IPU; ipu_device_num = device_num; ipu_micro_batch_size = micro_batch_size; ipu_enable_pipelining = enable_pipelining; ipu_batches_per_step = batches_per_step; #else FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU." << std::endl; device = Device::CPU; #endif } void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num, float available_memory_proportion, bool enable_half_partial) { ipu_enable_fp16 = enable_fp16; ipu_replica_num = replica_num; ipu_available_memory_proportion = available_memory_proportion; ipu_enable_half_partial = enable_half_partial; } } // namespace fastdeploy