Add paddle quantize model support for ORT, TRT and MKLDNN deploy backend (#257)

* add quantize model support for trt and paddle * fix bugs * fix * update paddle2onnx version * update version * add quantize test Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-06 00:57:33 +08:00 · 2022-10-09 20:00:05 +08:00
parent ff5e798b7f
commit 2a68a23baf
10 changed files with 187 additions and 5 deletions
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -16,13 +16,23 @@

 namespace fastdeploy {

-void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
+void PaddleBackend::BuildOption(const PaddleBackendOption& option,
+                                const std::string& model_file) {
  if (option.use_gpu) {
    config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
  } else {
    config_.DisableGpu();
    if (option.enable_mkldnn) {
      config_.EnableMKLDNN();
+      std::string contents;
+      if (!ReadBinaryFromFile(model_file, &contents)) {
+        return;
+      }
+      auto reader =
+          paddle2onnx::PaddleReader(contents.c_str(), contents.size());
+      if (reader.is_quantize_model) {
+        config_.EnableMkldnnInt8();
+      }
      config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
    }
  }
@@ -52,7 +62,7 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
    return false;
  }
  config_.SetModel(model_file, params_file);
-  BuildOption(option);
+  BuildOption(option, model_file);
  predictor_ = paddle_infer::CreatePredictor(config_);
  std::vector<std::string> input_names = predictor_->GetInputNames();
  std::vector<std::string> output_names = predictor_->GetOutputNames();