mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[CVCUDA] PaddleDetection preprocessor support CV-CUDA (#1493)
* ppdet preproc use manager * pad_to_size chw opencv * pad_to_size chw flycv * fix pad_to_size flycv * add warning message * cvcuda convert cubic to linear, padToSize cvcuda * stridedpad cvcuda * fix flycv include * fix flycv include * fix flycv build * cast cvcuda * fix pybind * fix normalize permute cuda * base processor move funcs to cc * Update pad_to_size.cc
This commit is contained in:
@@ -114,6 +114,68 @@ bool StridePad::ImplByFlyCV(Mat* mat) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_CVCUDA
|
||||
bool StridePad::ImplByCvCuda(FDMat* mat) {
|
||||
if (mat->layout != Layout::HWC) {
|
||||
FDERROR << "StridePad: The input data must be Layout::HWC format!"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (mat->Channels() > 4) {
|
||||
FDERROR << "StridePad: Only support channels <= 4." << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (mat->Channels() != value_.size()) {
|
||||
FDERROR
|
||||
<< "StridePad: Require input channels equals to size of padding value, "
|
||||
"but now channels = "
|
||||
<< mat->Channels() << ", the size of padding values = " << value_.size()
|
||||
<< "." << std::endl;
|
||||
return false;
|
||||
}
|
||||
int origin_w = mat->Width();
|
||||
int origin_h = mat->Height();
|
||||
|
||||
int pad_h = (mat->Height() / stride_) * stride_ +
|
||||
(mat->Height() % stride_ != 0) * stride_ - mat->Height();
|
||||
int pad_w = (mat->Width() / stride_) * stride_ +
|
||||
(mat->Width() % stride_ != 0) * stride_ - mat->Width();
|
||||
if (pad_h == 0 && pad_w == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
float4 value;
|
||||
if (value_.size() == 1) {
|
||||
value = make_float4(value_[0], 0.0f, 0.0f, 0.0f);
|
||||
} else if (value_.size() == 2) {
|
||||
value = make_float4(value_[0], value_[1], 0.0f, 0.0f);
|
||||
} else if (value_.size() == 3) {
|
||||
value = make_float4(value_[0], value_[1], value_[2], 0.0f);
|
||||
} else {
|
||||
value = make_float4(value_[0], value_[1], value_[2], value_[3]);
|
||||
}
|
||||
|
||||
// Prepare input tensor
|
||||
FDTensor* src = CreateCachedGpuInputTensor(mat);
|
||||
auto src_tensor = CreateCvCudaTensorWrapData(*src);
|
||||
|
||||
int height = mat->Height() + pad_h;
|
||||
int width = mat->Width() + pad_w;
|
||||
|
||||
// Prepare output tensor
|
||||
mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
|
||||
"output_cache", Device::GPU);
|
||||
auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
|
||||
|
||||
cvcuda_pad_op_(mat->Stream(), src_tensor, dst_tensor, 0, 0,
|
||||
NVCV_BORDER_CONSTANT, value);
|
||||
|
||||
mat->SetTensor(mat->output_cache);
|
||||
mat->mat_type = ProcLib::CVCUDA;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool StridePad::Run(Mat* mat, int stride, const std::vector<float>& value,
|
||||
ProcLib lib) {
|
||||
auto p = StridePad(stride, value);
|
||||
|
||||
Reference in New Issue
Block a user