[Backend] TRT cast GPU input from int64 to int32, output from int32 to int64, and Windows support building CUDA files (#426)

* TRT cast int64 to int32

* windows cmake build cuda src

* fix windows cmake error when build cuda src

* add a notice in windows gpu build doc

* cmake add cuda std=11

* TRT cast output from int32 to int64

* nits

* trt get original input output dtype
This commit is contained in:
Wang Xinyu
2022-10-28 13:38:06 +08:00
committed by GitHub
parent 04704c8411
commit caa369f64a
9 changed files with 181 additions and 25 deletions

View File

@@ -57,7 +57,8 @@ namespace fastdeploy {
struct TrtValueInfo {
std::string name;
std::vector<int> shape;
nvinfer1::DataType dtype;
nvinfer1::DataType dtype; // dtype of TRT model
FDDataType original_dtype; // dtype of original ONNX/Paddle model
};
struct TrtBackendOption {
@@ -141,6 +142,13 @@ class TrtBackend : public BaseBackend {
// Also will update the range information while inferencing
std::map<std::string, ShapeRangeInfo> shape_range_info_;
// If the final output tensor's dtype is different from the
// model output tensor's dtype, then we need cast the data
// to the final output's dtype.
// E.g. When trt model output tensor is int32, but final tensor is int64
// This map stores the casted tensors.
std::map<std::string, FDTensor> casted_output_tensors_;
void GetInputOutputInfo();
bool CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer);
bool BuildTrtEngine();