mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[Backend] Add TensorRT FP16 support for AdaptivePool2d (#1116)
* add fp16 cuda kernel * fix code bug * update code
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_fp16.h>
|
||||
#include <cstdint>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
@@ -25,8 +26,10 @@
|
||||
namespace fastdeploy {
|
||||
|
||||
void CudaAdaptivePool(const std::vector<int64_t>& input_dims,
|
||||
const std::vector<int64_t>& output_dims, float* output,
|
||||
const float* input, void* compute_stream,
|
||||
const std::string& pooling_type);
|
||||
const std::vector<int64_t>& output_dims, void* output,
|
||||
const void* input, void* compute_stream,
|
||||
const std::string& pooling_type,
|
||||
const std::string& dtype = "float",
|
||||
const std::string& out_dtype = "float");
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user