Files
FastDeploy/fastdeploy/vision/common/processors/mat_batch.cc
Wang Xinyu 91a1c72f98 [CVCUDA] PP-OCR detector preprocessor integrate CV-CUDA (#1382)
* move manager initialized_ flag to ppcls

* update dbdetector preprocess api

* declare processor op

* ppocr detector preprocessor support cvcuda

* move cvcuda op to class member

* ppcls use manager register api

* refactor det preprocessor init api

* add set preprocessor api

* add create processor macro

* new processor call api

* ppcls preprocessor init resize on cpu

* ppocr detector preprocessor set normalize api

* revert ppcls pybind

* remove dbdetector set preprocessor

* refine dbdetector preprocessor includes

* remove mean std in py constructor

* add comments

* update comment

* Update __init__.py
2023-02-22 19:39:11 +08:00

83 lines
2.9 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/common/processors/mat_batch.h"
namespace fastdeploy {
namespace vision {
#ifdef WITH_GPU
void FDMatBatch::SetStream(cudaStream_t s) {
stream = s;
for (size_t i = 0; i < mats->size(); ++i) {
(*mats)[i].SetStream(s);
}
}
#endif
FDTensor* FDMatBatch::Tensor() {
if (has_batched_tensor) {
return fd_tensor.get();
}
FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.")
// Each mat has its own tensor,
// to get a batched tensor, we need copy these tensors to a batched tensor
FDTensor* src = (*mats)[0].Tensor();
auto new_shape = src->Shape();
new_shape.insert(new_shape.begin(), mats->size());
input_cache->Resize(new_shape, src->Dtype(), "batch_input_cache", device);
for (size_t i = 0; i < mats->size(); ++i) {
FDASSERT(device == (*mats)[i].Tensor()->device,
"Mats and MatBatch are not on the same device");
uint8_t* p = reinterpret_cast<uint8_t*>(input_cache->Data());
int num_bytes = (*mats)[i].Tensor()->Nbytes();
FDTensor::CopyBuffer(p + i * num_bytes, (*mats)[i].Tensor()->Data(),
num_bytes, device, false);
}
SetTensor(input_cache);
return fd_tensor.get();
}
void FDMatBatch::SetTensor(FDTensor* tensor) {
fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
tensor->device, tensor->device_id);
has_batched_tensor = true;
}
FDTensor* CreateCachedGpuInputTensor(FDMatBatch* mat_batch) {
#ifdef WITH_GPU
auto mats = mat_batch->mats;
FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.")
FDTensor* src = (*mats)[0].Tensor();
if (mat_batch->device == Device::GPU) {
return mat_batch->Tensor();
} else if (mat_batch->device == Device::CPU) {
// Mats on CPU, we need copy them to GPU and then get a batched GPU tensor
for (size_t i = 0; i < mats->size(); ++i) {
FDTensor* tensor = CreateCachedGpuInputTensor(&(*mats)[i]);
(*mats)[i].SetTensor(tensor);
}
mat_batch->device = Device::GPU;
return mat_batch->Tensor();
} else {
FDASSERT(false, "FDMat is on unsupported device: %d", src->device);
}
#else
FDASSERT(false, "FastDeploy didn't compile with WITH_GPU.");
#endif
return nullptr;
}
} // namespace vision
} // namespace fastdeploy