[CVCUDA] PaddleDetection preprocessor support CV-CUDA (#1493)

* ppdet preproc use manager * pad_to_size chw opencv * pad_to_size chw flycv * fix pad_to_size flycv * add warning message * cvcuda convert cubic to linear, padToSize cvcuda * stridedpad cvcuda * fix flycv include * fix flycv include * fix flycv build * cast cvcuda * fix pybind * fix normalize permute cuda * base processor move funcs to cc * Update pad_to_size.cc
2025-12-24 13:28:13 +08:00 · 2023-03-10 12:43:57 +08:00
parent 9ee2118e1f
commit cb7c8a07d4
23 changed files with 537 additions and 239 deletions
--- a/fastdeploy/vision/common/processors/stride_pad.cc
+++ b/fastdeploy/vision/common/processors/stride_pad.cc
@@ -114,6 +114,68 @@ bool StridePad::ImplByFlyCV(Mat* mat) {
 }
 #endif

+#ifdef ENABLE_CVCUDA
+bool StridePad::ImplByCvCuda(FDMat* mat) {
+  if (mat->layout != Layout::HWC) {
+    FDERROR << "StridePad: The input data must be Layout::HWC format!"
+            << std::endl;
+    return false;
+  }
+  if (mat->Channels() > 4) {
+    FDERROR << "StridePad: Only support channels <= 4." << std::endl;
+    return false;
+  }
+  if (mat->Channels() != value_.size()) {
+    FDERROR
+        << "StridePad: Require input channels equals to size of padding value, "
+           "but now channels = "
+        << mat->Channels() << ", the size of padding values = " << value_.size()
+        << "." << std::endl;
+    return false;
+  }
+  int origin_w = mat->Width();
+  int origin_h = mat->Height();
+
+  int pad_h = (mat->Height() / stride_) * stride_ +
+              (mat->Height() % stride_ != 0) * stride_ - mat->Height();
+  int pad_w = (mat->Width() / stride_) * stride_ +
+              (mat->Width() % stride_ != 0) * stride_ - mat->Width();
+  if (pad_h == 0 && pad_w == 0) {
+    return true;
+  }
+
+  float4 value;
+  if (value_.size() == 1) {
+    value = make_float4(value_[0], 0.0f, 0.0f, 0.0f);
+  } else if (value_.size() == 2) {
+    value = make_float4(value_[0], value_[1], 0.0f, 0.0f);
+  } else if (value_.size() == 3) {
+    value = make_float4(value_[0], value_[1], value_[2], 0.0f);
+  } else {
+    value = make_float4(value_[0], value_[1], value_[2], value_[3]);
+  }
+
+  // Prepare input tensor
+  FDTensor* src = CreateCachedGpuInputTensor(mat);
+  auto src_tensor = CreateCvCudaTensorWrapData(*src);
+
+  int height = mat->Height() + pad_h;
+  int width = mat->Width() + pad_w;
+
+  // Prepare output tensor
+  mat->output_cache->Resize({height, width, mat->Channels()}, mat->Type(),
+                            "output_cache", Device::GPU);
+  auto dst_tensor = CreateCvCudaTensorWrapData(*(mat->output_cache));
+
+  cvcuda_pad_op_(mat->Stream(), src_tensor, dst_tensor, 0, 0,
+                 NVCV_BORDER_CONSTANT, value);
+
+  mat->SetTensor(mat->output_cache);
+  mat->mat_type = ProcLib::CVCUDA;
+  return true;
+}
+#endif
+
 bool StridePad::Run(Mat* mat, int stride, const std::vector<float>& value,
                    ProcLib lib) {
  auto p = StridePad(stride, value);