[Backend] Add TensorRT FP16 support for AdaptivePool2d (#1116)

* add fp16 cuda kernel

* fix code bug

* update code
This commit is contained in:
yeliang2258
2023-01-13 19:44:00 +08:00
committed by GitHub
parent d00df3d621
commit 829fe0721d
3 changed files with 64 additions and 25 deletions

View File

@@ -15,6 +15,7 @@
#pragma once
#include <cuda_fp16.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
@@ -25,8 +26,10 @@
namespace fastdeploy {
void CudaAdaptivePool(const std::vector<int64_t>& input_dims,
const std::vector<int64_t>& output_dims, float* output,
const float* input, void* compute_stream,
const std::string& pooling_type);
const std::vector<int64_t>& output_dims, void* output,
const void* input, void* compute_stream,
const std::string& pooling_type,
const std::string& dtype = "float",
const std::string& out_dtype = "float");
} // namespace fastdeploy