[Backend] Add TensorRT FP16 support for AdaptivePool2d (#1116)

* add fp16 cuda kernel * fix code bug * update code
2025-10-06 09:07:10 +08:00 · 2023-01-13 19:44:00 +08:00
parent d00df3d621
commit 829fe0721d
3 changed files with 64 additions and 25 deletions
--- a/fastdeploy/runtime/backends/common/cuda/adaptive_pool2d_kernel.h
+++ b/fastdeploy/runtime/backends/common/cuda/adaptive_pool2d_kernel.h
@@ -15,6 +15,7 @@

 #pragma once

+#include <cuda_fp16.h>
 #include <cstdint>
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -25,8 +26,10 @@
 namespace fastdeploy {

 void CudaAdaptivePool(const std::vector<int64_t>& input_dims,
-                      const std::vector<int64_t>& output_dims, float* output,
-                      const float* input, void* compute_stream,
-                      const std::string& pooling_type);
+                      const std::vector<int64_t>& output_dims, void* output,
+                      const void* input, void* compute_stream,
+                      const std::string& pooling_type,
+                      const std::string& dtype = "float",
+                      const std::string& out_dtype = "float");

 }  // namespace fastdeploy