mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
Add paddle quantize model support for ORT, TRT and MKLDNN deploy backend (#257)
* add quantize model support for trt and paddle * fix bugs * fix * update paddle2onnx version * update version * add quantize test Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
28
fastdeploy/backends/tensorrt/trt_backend.h
Normal file → Executable file
28
fastdeploy/backends/tensorrt/trt_backend.h
Normal file → Executable file
@@ -26,6 +26,32 @@
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
explicit Int8EntropyCalibrator2(const std::string& calibration_cache)
|
||||
: calibration_cache_(calibration_cache) {}
|
||||
|
||||
int getBatchSize() const noexcept override { return 0; }
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[],
|
||||
int nbBindings) noexcept override {
|
||||
return false;
|
||||
}
|
||||
|
||||
const void* readCalibrationCache(size_t& length) noexcept override {
|
||||
length = calibration_cache_.size();
|
||||
return length ? calibration_cache_.data() : nullptr;
|
||||
}
|
||||
|
||||
void writeCalibrationCache(const void* cache,
|
||||
size_t length) noexcept override {
|
||||
std::cout << "NOT IMPLEMENT." << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string calibration_cache_;
|
||||
};
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
struct TrtValueInfo {
|
||||
@@ -95,6 +121,8 @@ class TrtBackend : public BaseBackend {
|
||||
std::map<std::string, FDDeviceBuffer> inputs_buffer_;
|
||||
std::map<std::string, FDDeviceBuffer> outputs_buffer_;
|
||||
|
||||
std::string calibration_str_;
|
||||
|
||||
// Sometimes while the number of outputs > 1
|
||||
// the output order of tensorrt may not be same
|
||||
// with the original onnx model
|
||||
|
Reference in New Issue
Block a user