Add paddle quantize model support for ORT, TRT and MKLDNN deploy backend (#257)

* add quantize model support for trt and paddle * fix bugs * fix * update paddle2onnx version * update version * add quantize test Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-06 00:57:33 +08:00 · 2022-10-09 20:00:05 +08:00
parent ff5e798b7f
commit 2a68a23baf
10 changed files with 187 additions and 5 deletions
--- a/fastdeploy/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/backends/tensorrt/trt_backend.h
@@ -26,6 +26,32 @@
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/backends/tensorrt/utils.h"

+class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
+ public:
+  explicit Int8EntropyCalibrator2(const std::string& calibration_cache)
+      : calibration_cache_(calibration_cache) {}
+
+  int getBatchSize() const noexcept override { return 0; }
+
+  bool getBatch(void* bindings[], const char* names[],
+                int nbBindings) noexcept override {
+    return false;
+  }
+
+  const void* readCalibrationCache(size_t& length) noexcept override {
+    length = calibration_cache_.size();
+    return length ? calibration_cache_.data() : nullptr;
+  }
+
+  void writeCalibrationCache(const void* cache,
+                             size_t length) noexcept override {
+    std::cout << "NOT IMPLEMENT." << std::endl;
+  }
+
+ private:
+  const std::string calibration_cache_;
+};
+
 namespace fastdeploy {

 struct TrtValueInfo {
@@ -95,6 +121,8 @@ class TrtBackend : public BaseBackend {
  std::map<std::string, FDDeviceBuffer> inputs_buffer_;
  std::map<std::string, FDDeviceBuffer> outputs_buffer_;

+  std::string calibration_str_;
+
  // Sometimes while the number of outputs > 1
  // the output order of tensorrt may not be same
  // with the original onnx model