// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h" #include namespace fastdeploy { SophgoBackend::~SophgoBackend() { bm_dev_free(handle_); } /*************************************************************** * @name GetSDKAndDeviceVersion * @brief get Sophgo sdk and device version * @param None * @return bool * @note None ***************************************************************/ bool SophgoBackend::GetSDKAndDeviceVersion() { return true; } /*************************************************************** * @name Init * @brief Initialize Sophgo model * @param model_file: Binary data for the Sophgo model. * params_file: None * option: config * @return bool * @note None ***************************************************************/ bool SophgoBackend::Init(const RuntimeOption& option) { if (option.model_from_memory_) { FDERROR << "SophgoBackend doesn't support load model from memory, please " "load model from disk." << std::endl; return false; } if (option.model_format != ModelFormat::SOPHGO) { FDERROR << "SophgoBackend only supports model format SOPHGO, but now it's " << option.model_format << "." << std::endl; return false; } if (option.device != Device::SOPHGOTPUD) { FDERROR << "SophgoBackend only supports device::SOPHGOTPUD, but now it's " << option.device << "." << std::endl; return false; } std::string model_file = option.model_file; // LoadModel if (!this->LoadModel((char*)model_file.data())) { FDERROR << "load model failed" << std::endl; return false; } // GetSDKAndDeviceVersion if (!this->GetSDKAndDeviceVersion()) { FDERROR << "get SDK and device version failed" << std::endl; return false; } // GetModelInputOutputInfos if (!this->GetModelInputOutputInfos()) { FDERROR << "get model input output infos failed" << std::endl; return false; } return true; } /*************************************************************** * @name LoadModel * @brief read Sophgo bmodel * @param model: Binary data for the Sophgo model. * @return bool * @note None ***************************************************************/ bool SophgoBackend::LoadModel(void* model) { unsigned int card_num = 0; bm_status_t status = bm_get_card_num(&card_num); status = bm_dev_request(&handle_, 0); p_bmrt_ = bmrt_create(handle_); assert(NULL != p_bmrt_); bool load_status = bmrt_load_bmodel(p_bmrt_, (char*)model); assert(load_status); int network_num = bmrt_get_network_number(p_bmrt_); const char** net_names = NULL; bmrt_get_network_names(p_bmrt_, &net_names); net_name_ = net_names[0]; free(net_names); net_info_ = bmrt_get_network_info(p_bmrt_, net_name_.c_str()); assert(NULL != net_info_); return true; } /*************************************************************** * @name GetModelInputOutputInfos * @brief Get the detailed input and output infos of Model * @param None * @return bool * @note None ***************************************************************/ bool SophgoBackend::GetModelInputOutputInfos() { inputs_desc_.resize(net_info_->input_num); bm_shape_t* input_shapes = net_info_->stages->input_shapes; for (int idx = 0; idx < net_info_->input_num; idx++) { std::string temp_name = (net_info_->input_names)[idx]; std::vector temp_shape{}; temp_shape.resize(input_shapes[idx].num_dims); for (int i = 0; i < input_shapes[idx].num_dims; i++) { temp_shape[i] = input_shapes[idx].dims[i]; } bm_data_type_t* input_dtypes = net_info_->input_dtypes; // SophgoType to FDDataType FDDataType temp_dtype = SophgoTensorTypeToFDDataType(*input_dtypes); TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; inputs_desc_[idx] = temp_input_info; } outputs_desc_.resize(net_info_->output_num); bm_shape_t* output_shapes = net_info_->stages->output_shapes; for (int idx = 0; idx < net_info_->output_num; idx++) { std::string temp_name1 = (net_info_->output_names)[idx]; std::vector temp_shape1{}; temp_shape1.resize(output_shapes[idx].num_dims); for (int i = 0; i < output_shapes[idx].num_dims; i++) { temp_shape1[i] = output_shapes[idx].dims[i]; } bm_data_type_t* output_dtypes = net_info_->output_dtypes; // SophgoType to FDDataType FDDataType temp_dtype1 = SophgoTensorTypeToFDDataType(*output_dtypes); TensorInfo temp_output_info = {temp_name1, temp_shape1, temp_dtype1}; outputs_desc_[idx] = temp_output_info; } return true; } TensorInfo SophgoBackend::GetInputInfo(int index) { FDASSERT(index < NumInputs(), "The index: %d should less than the number of inputs: %d.", index, NumInputs()) return inputs_desc_[index]; } std::vector SophgoBackend::GetInputInfos() { return inputs_desc_; } TensorInfo SophgoBackend::GetOutputInfo(int index) { FDASSERT(index < NumOutputs(), "The index: %d should less than the number of outputs %d.", index, NumOutputs()) return outputs_desc_[index]; } std::vector SophgoBackend::GetOutputInfos() { return outputs_desc_; } bool SophgoBackend::Infer(std::vector& inputs, std::vector* outputs, bool copy_to_fd) { int input_size = inputs.size(); assert(input_size != 0); assert(input_size == NumInputs()); bm_tensor_t input_tensors[input_size]; bm_status_t status = BM_SUCCESS; RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN bm_data_type_t* input_dtypes = net_info_->input_dtypes; for (int i = 0; i < input_size; i++) { status = bm_malloc_device_byte(handle_, &input_tensors[i].device_mem, net_info_->max_input_bytes[i]); assert(BM_SUCCESS == status); input_tensors[i].dtype = input_dtypes[i]; input_tensors[i].st_mode = BM_STORE_1N; input_tensors[i].shape = net_info_->stages[0].input_shapes[i]; unsigned int input_byte = bmrt_tensor_bytesize(&input_tensors[i]); bm_memcpy_s2d_partial(handle_, input_tensors[i].device_mem, (void*)inputs[i].Data(), bmrt_tensor_bytesize(&input_tensors[i])); } int output_size = NumOutputs(); bm_tensor_t output_tensors[output_size]; for (int i = 0; i < output_size; i++) { status = bm_malloc_device_byte(handle_, &output_tensors[i].device_mem, net_info_->max_output_bytes[i]); assert(BM_SUCCESS == status); } RUNTIME_PROFILE_LOOP_BEGIN(1) bool launch_status = bmrt_launch_tensor_ex( p_bmrt_, net_name_.c_str(), input_tensors, net_info_->input_num, output_tensors, net_info_->output_num, true, false); assert(launch_status); status = bm_thread_sync(handle_); assert(status == BM_SUCCESS); RUNTIME_PROFILE_LOOP_END outputs->resize(outputs_desc_.size()); bm_data_type_t* output_dtypes = net_info_->output_dtypes; for (int i = 0; i < output_size; i++) { int temp_bytesize = bmrt_tensor_bytesize(&output_tensors[i]); // Byte float* temp_out = (float*)malloc(temp_bytesize); bm_memcpy_d2s_partial(handle_, temp_out, output_tensors[i].device_mem, temp_bytesize); std::vector temp_shape; temp_shape.resize(outputs_desc_[i].shape.size()); for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { temp_shape[j] = outputs_desc_[i].shape[j]; } (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, outputs_desc_[i].name); memcpy((*outputs)[i].MutableData(), temp_out, (*outputs)[i].Nbytes()); free(temp_out); } for (int i = 0; i < input_size; i++) { bm_free_device(handle_, input_tensors[i].device_mem); } for (int i = 0; i < output_size; i++) { bm_free_device(handle_, output_tensors[i].device_mem); } RUNTIME_PROFILE_LOOP_H2D_D2H_END return true; } /*************************************************************** * @name SophgoTensorTypeToFDDataType * @brief Change SophgoTensorType To FDDataType * @param bm_data_type_t * @return None * @note None ***************************************************************/ FDDataType SophgoBackend::SophgoTensorTypeToFDDataType(bm_data_type_t type) { if (type == BM_FLOAT16) { return FDDataType::FP16; } if (type == BM_FLOAT32) { return FDDataType::FP32; } if (type == BM_INT8) { return FDDataType::INT8; } if (type == BM_INT16) { return FDDataType::INT16; } if (type == BM_INT32) { return FDDataType::INT32; } if (type == BM_UINT8) { return FDDataType::UINT8; } FDERROR << "FDDataType don't support this type" << std::endl; return FDDataType::UNKNOWN1; } /*************************************************************** * @name FDDataTypeToSophgoTensorType * @brief Change FDDataType To SophgoTensorType * @param FDDataType * @return None * @note None ***************************************************************/ // Sophgo_tensor_type bm_data_type_t SophgoBackend::FDDataTypeToSophgoTensorType( fastdeploy::FDDataType type) { if (type == FDDataType::FP16) { return BM_FLOAT16; } if (type == FDDataType::FP32) { return BM_FLOAT32; } if (type == FDDataType::INT8) { return BM_INT8; } if (type == FDDataType::INT16) { return BM_INT16; } if (type == FDDataType::INT32) { return BM_INT32; } if (type == FDDataType::UINT8) { return BM_UINT8; } FDERROR << "Sophgo_tensor_type don't support this type" << std::endl; return BM_FLOAT32; } } // namespace fastdeploy