// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "fastdeploy/vision/common/processors/mat_batch.h" namespace fastdeploy { namespace vision { #ifdef WITH_GPU void FDMatBatch::SetStream(cudaStream_t s) { stream = s; for (size_t i = 0; i < mats->size(); ++i) { (*mats)[i].SetStream(s); } } #endif FDTensor* FDMatBatch::Tensor() { if (has_batched_tensor) { return fd_tensor.get(); } FDASSERT(mats != nullptr, "Failed to get batched tensor, Mats are empty."); FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent."); // Each mat has its own tensor, // to get a batched tensor, we need copy these tensors to a batched tensor FDTensor* src = (*mats)[0].Tensor(); device = src->device; auto new_shape = src->Shape(); new_shape.insert(new_shape.begin(), mats->size()); input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device); for (size_t i = 0; i < mats->size(); ++i) { FDASSERT(device == (*mats)[i].Tensor()->device, "Mats and MatBatch are not on the same device"); uint8_t* p = reinterpret_cast(input_cache->Data()); int num_bytes = (*mats)[i].Tensor()->Nbytes(); FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(), num_bytes, device, false); } SetTensor(input_cache); return fd_tensor.get(); } void FDMatBatch::SetTensor(FDTensor* tensor) { fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(), tensor->device, tensor->device_id); device = tensor->device; has_batched_tensor = true; } FDTensor* CreateCachedGpuInputTensor(FDMatBatch* mat_batch) { #ifdef WITH_GPU // Get the batched tensor FDTensor* src = mat_batch->Tensor(); // Need to make sure the returned tensor is pointed to the input_cache. if (src->Data() == mat_batch->output_cache->Data()) { std::swap(mat_batch->input_cache, mat_batch->output_cache); std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name); } if (src->device == Device::GPU) { return src; } else if (src->device == Device::CPU) { // Batched tensor on CPU, we need copy it to GPU mat_batch->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache", Device::GPU); FDASSERT(cudaMemcpyAsync(mat_batch->output_cache->Data(), src->Data(), src->Nbytes(), cudaMemcpyHostToDevice, mat_batch->Stream()) == 0, "[ERROR] Error occurs while copy memory from CPU to GPU."); std::swap(mat_batch->input_cache, mat_batch->output_cache); std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name); return mat_batch->input_cache; } else { FDASSERT(false, "FDMatBatch is on unsupported device: %d", src->device); } #else FDASSERT(false, "FastDeploy didn't compile with WITH_GPU."); #endif return nullptr; } } // namespace vision } // namespace fastdeploy