mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00

* ppocr cls preprocessor use manager * hwc2chw cvcuda * ppocr rec preproc use manager * ocr rec preproc cvcuda * fix rec preproc bug * ppocr cls&rec preproc set normalize * fix pybind * address comment
93 lines
3.5 KiB
C++
93 lines
3.5 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#include "fastdeploy/vision/common/processors/mat_batch.h"
|
|
|
|
namespace fastdeploy {
|
|
namespace vision {
|
|
|
|
#ifdef WITH_GPU
|
|
void FDMatBatch::SetStream(cudaStream_t s) {
|
|
stream = s;
|
|
for (size_t i = 0; i < mats->size(); ++i) {
|
|
(*mats)[i].SetStream(s);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
FDTensor* FDMatBatch::Tensor() {
|
|
if (has_batched_tensor) {
|
|
return fd_tensor.get();
|
|
}
|
|
FDASSERT(mats != nullptr, "Failed to get batched tensor, Mats are empty.");
|
|
FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.");
|
|
// Each mat has its own tensor,
|
|
// to get a batched tensor, we need copy these tensors to a batched tensor
|
|
FDTensor* src = (*mats)[0].Tensor();
|
|
device = src->device;
|
|
auto new_shape = src->Shape();
|
|
new_shape.insert(new_shape.begin(), mats->size());
|
|
input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device);
|
|
for (size_t i = 0; i < mats->size(); ++i) {
|
|
FDASSERT(device == (*mats)[i].Tensor()->device,
|
|
"Mats and MatBatch are not on the same device");
|
|
uint8_t* p = reinterpret_cast<uint8_t*>(input_cache->Data());
|
|
int num_bytes = (*mats)[i].Tensor()->Nbytes();
|
|
FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(),
|
|
num_bytes, device, false);
|
|
}
|
|
SetTensor(input_cache);
|
|
return fd_tensor.get();
|
|
}
|
|
|
|
void FDMatBatch::SetTensor(FDTensor* tensor) {
|
|
fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
|
|
tensor->device, tensor->device_id);
|
|
device = tensor->device;
|
|
has_batched_tensor = true;
|
|
}
|
|
|
|
FDTensor* CreateCachedGpuInputTensor(FDMatBatch* mat_batch) {
|
|
#ifdef WITH_GPU
|
|
// Get the batched tensor
|
|
FDTensor* src = mat_batch->Tensor();
|
|
// Need to make sure the returned tensor is pointed to the input_cache.
|
|
if (src->Data() == mat_batch->output_cache->Data()) {
|
|
std::swap(mat_batch->input_cache, mat_batch->output_cache);
|
|
std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
|
|
}
|
|
if (src->device == Device::GPU) {
|
|
return src;
|
|
} else if (src->device == Device::CPU) {
|
|
// Batched tensor on CPU, we need copy it to GPU
|
|
mat_batch->output_cache->Resize(src->Shape(), src->Dtype(), "output_cache",
|
|
Device::GPU);
|
|
FDASSERT(cudaMemcpyAsync(mat_batch->output_cache->Data(), src->Data(),
|
|
src->Nbytes(), cudaMemcpyHostToDevice,
|
|
mat_batch->Stream()) == 0,
|
|
"[ERROR] Error occurs while copy memory from CPU to GPU.");
|
|
std::swap(mat_batch->input_cache, mat_batch->output_cache);
|
|
std::swap(mat_batch->input_cache->name, mat_batch->output_cache->name);
|
|
return mat_batch->input_cache;
|
|
} else {
|
|
FDASSERT(false, "FDMatBatch is on unsupported device: %d", src->device);
|
|
}
|
|
#else
|
|
FDASSERT(false, "FastDeploy didn't compile with WITH_GPU.");
|
|
#endif
|
|
return nullptr;
|
|
}
|
|
|
|
} // namespace vision
|
|
} // namespace fastdeploy
|