mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[Backend] TRT cast GPU input from int64 to int32, output from int32 to int64, and Windows support building CUDA files (#426)
* TRT cast int64 to int32 * windows cmake build cuda src * fix windows cmake error when build cuda src * add a notice in windows gpu build doc * cmake add cuda std=11 * TRT cast output from int32 to int64 * nits * trt get original input output dtype
This commit is contained in:
@@ -57,7 +57,8 @@ namespace fastdeploy {
|
||||
struct TrtValueInfo {
|
||||
std::string name;
|
||||
std::vector<int> shape;
|
||||
nvinfer1::DataType dtype;
|
||||
nvinfer1::DataType dtype; // dtype of TRT model
|
||||
FDDataType original_dtype; // dtype of original ONNX/Paddle model
|
||||
};
|
||||
|
||||
struct TrtBackendOption {
|
||||
@@ -141,6 +142,13 @@ class TrtBackend : public BaseBackend {
|
||||
// Also will update the range information while inferencing
|
||||
std::map<std::string, ShapeRangeInfo> shape_range_info_;
|
||||
|
||||
// If the final output tensor's dtype is different from the
|
||||
// model output tensor's dtype, then we need cast the data
|
||||
// to the final output's dtype.
|
||||
// E.g. When trt model output tensor is int32, but final tensor is int64
|
||||
// This map stores the casted tensors.
|
||||
std::map<std::string, FDTensor> casted_output_tensors_;
|
||||
|
||||
void GetInputOutputInfo();
|
||||
bool CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer);
|
||||
bool BuildTrtEngine();
|
||||
|
Reference in New Issue
Block a user