[Backend] TRT cast GPU input from int64 to int32, output from int32 to int64, and Windows support building CUDA files (#426)

* TRT cast int64 to int32 * windows cmake build cuda src * fix windows cmake error when build cuda src * add a notice in windows gpu build doc * cmake add cuda std=11 * TRT cast output from int32 to int64 * nits * trt get original input output dtype
2025-10-06 09:07:10 +08:00 · 2022-10-28 13:38:06 +08:00
parent 04704c8411
commit caa369f64a
9 changed files with 181 additions and 25 deletions
--- a/fastdeploy/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/backends/tensorrt/trt_backend.h
@@ -57,7 +57,8 @@ namespace fastdeploy {
 struct TrtValueInfo {
  std::string name;
  std::vector<int> shape;
-  nvinfer1::DataType dtype;
+  nvinfer1::DataType dtype;  // dtype of TRT model
+  FDDataType original_dtype;  // dtype of original ONNX/Paddle model
 };

 struct TrtBackendOption {
@@ -141,6 +142,13 @@ class TrtBackend : public BaseBackend {
  // Also will update the range information while inferencing
  std::map<std::string, ShapeRangeInfo> shape_range_info_;

+  // If the final output tensor's dtype is different from the
+  // model output tensor's dtype, then we need cast the data
+  // to the final output's dtype.
+  // E.g. When trt model output tensor is int32, but final tensor is int64
+  // This map stores the casted tensors.
+  std::map<std::string, FDTensor> casted_output_tensors_;
+
  void GetInputOutputInfo();
  bool CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer);
  bool BuildTrtEngine();