Add paddle quantize model support for ORT, TRT and MKLDNN deploy backend (#257)

* add quantize model support for trt and paddle

* fix bugs

* fix

* update paddle2onnx version

* update version

* add quantize test

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
yeliang2258
2022-10-09 20:00:05 +08:00
committed by GitHub
parent ff5e798b7f
commit 2a68a23baf
10 changed files with 187 additions and 5 deletions

28
fastdeploy/backends/tensorrt/trt_backend.h Normal file → Executable file
View File

@@ -26,6 +26,32 @@
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/utils.h"
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
public:
explicit Int8EntropyCalibrator2(const std::string& calibration_cache)
: calibration_cache_(calibration_cache) {}
int getBatchSize() const noexcept override { return 0; }
bool getBatch(void* bindings[], const char* names[],
int nbBindings) noexcept override {
return false;
}
const void* readCalibrationCache(size_t& length) noexcept override {
length = calibration_cache_.size();
return length ? calibration_cache_.data() : nullptr;
}
void writeCalibrationCache(const void* cache,
size_t length) noexcept override {
std::cout << "NOT IMPLEMENT." << std::endl;
}
private:
const std::string calibration_cache_;
};
namespace fastdeploy {
struct TrtValueInfo {
@@ -95,6 +121,8 @@ class TrtBackend : public BaseBackend {
std::map<std::string, FDDeviceBuffer> inputs_buffer_;
std::map<std::string, FDDeviceBuffer> outputs_buffer_;
std::string calibration_str_;
// Sometimes while the number of outputs > 1
// the output order of tensorrt may not be same
// with the original onnx model