Add paddle quantize model support for ORT, TRT and MKLDNN deploy backend (#257)

* add quantize model support for trt and paddle * fix bugs * fix * update paddle2onnx version * update version * add quantize test Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-07 17:41:52 +08:00 · 2022-10-09 20:00:05 +08:00
parent ff5e798b7f
commit 2a68a23baf
10 changed files with 187 additions and 5 deletions
--- a/fastdeploy/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/backends/tensorrt/trt_backend.cc
@@ -131,10 +131,13 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
  }
  char* model_content_ptr;
  int model_content_size = 0;
+  char* calibration_cache_ptr;
+  int calibration_cache_size = 0;
  if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
                           &model_content_ptr, &model_content_size, 11, true,
                           verbose, true, true, true, custom_ops.data(),
-                           custom_ops.size())) {
+                           custom_ops.size(), "tensorrt",
+                           &calibration_cache_ptr, &calibration_cache_size)) {
    FDERROR << "Error occured while export PaddlePaddle to ONNX format."
            << std::endl;
    return false;
@@ -151,6 +154,13 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
    delete[] model_content_ptr;
    std::string onnx_model_proto(new_model, new_model + new_model_size);
    delete[] new_model;
+    if (calibration_cache_size) {
+      std::string calibration_str(
+          calibration_cache_ptr,
+          calibration_cache_ptr + calibration_cache_size);
+      calibration_str_ = calibration_str;
+      delete[] calibration_cache_ptr;
+    }
    return InitFromOnnx(onnx_model_proto, option, true);
  }

@@ -158,6 +168,12 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
                               model_content_ptr + model_content_size);
  delete[] model_content_ptr;
  model_content_ptr = nullptr;
+  if (calibration_cache_size) {
+    std::string calibration_str(calibration_cache_ptr,
+                                calibration_cache_ptr + calibration_cache_size);
+    calibration_str_ = calibration_str;
+    delete[] calibration_cache_ptr;
+  }
  return InitFromOnnx(onnx_model_proto, option, true);
 #else
  FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
@@ -409,6 +425,7 @@ bool TrtBackend::BuildTrtEngine() {
                   "will use FP32 instead."
                << std::endl;
    } else {
+      FDINFO << "[TrtBackend] Use FP16 to inference." << std::endl;
      config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
  }
@@ -459,6 +476,20 @@ bool TrtBackend::BuildTrtEngine() {
  }
  config->addOptimizationProfile(profile);

+  if (calibration_str_.size()) {
+    if (!builder_->platformHasFastInt8()) {
+      FDWARNING << "Detected INT8 is not supported in the current GPU, "
+                   "will use FP32 instead."
+                << std::endl;
+    } else {
+      FDINFO << "[TrtBackend] Use INT8 to inference." << std::endl;
+      config->setFlag(nvinfer1::BuilderFlag::kINT8);
+      Int8EntropyCalibrator2* calibrator =
+          new Int8EntropyCalibrator2(calibration_str_);
+      config->setInt8Calibrator(calibrator);
+    }
+  }
+
  FDUniquePtr<nvinfer1::IHostMemory> plan{
      builder_->buildSerializedNetwork(*network_, *config)};
  if (!plan) {