tensorrt support

2025-09-26 21:01:13 +08:00 · 2022-12-10 10:05:16 +08:00
parent 73178d353f
commit 8c4e58f2c8
20 changed files with 2374 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,5 @@ plate/
 !*.md
 !*.txt
 !*.yaml
-!*.ttf
+!*.ttf
+!*.cu
--- a/models/export.py
+++ b/models/export.py
@@ -67,9 +67,10 @@ if __name__ == '__main__':
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not (opt.grid or opt.export_nms) # set Detect() layer grid export
+    model.model[-1].export_cat = True  #onnx export
    for _ in range(2):
        y = model(img)  # dry runs
-    output_names = None
+    output_names = ["output"]
    if opt.export_nms:
        nms = models.common.NMS(conf=0.01, kpt_label=4)
        nms_export = models.common.NMS_Export(conf=0.01, kpt_label=4)
@@ -83,16 +84,16 @@ if __name__ == '__main__':
    print(f"\n{colorstr('PyTorch:')} starting from {opt.weights} ({file_size(opt.weights):.1f} MB)")

    # TorchScript export -----------------------------------------------------------------------------------------------
-    prefix = colorstr('TorchScript:')
-    try:
-        print(f'\n{prefix} starting export with torch {torch.__version__}...')
-        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
-        ts = torch.jit.trace(model, img, strict=False)
-        ts = optimize_for_mobile(ts)  # https://pytorch.org/tutorials/recipes/script_optimized.html
-        ts.save(f)
-        print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
-    except Exception as e:
-        print(f'{prefix} export failure: {e}')
+    # prefix = colorstr('TorchScript:')
+    # try:
+    #     print(f'\n{prefix} starting export with torch {torch.__version__}...')
+    #     f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+    #     ts = torch.jit.trace(model, img, strict=False)
+    #     ts = optimize_for_mobile(ts)  # https://pytorch.org/tutorials/recipes/script_optimized.html
+    #     ts.save(f)
+    #     print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
+    # except Exception as e:
+    #     print(f'{prefix} export failure: {e}')

    # ONNX export ------------------------------------------------------------------------------------------------------
    prefix = colorstr('ONNX:')
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -215,7 +215,7 @@ class IDetect(nn.Module):
 class IKeypoint(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export
-
+    export_cat = False  # onnx export cat output
    def __init__(self, nc=80, anchors=(), nkpt=5, ch=(), inplace=True, dw_conv_kpt=False):  # detection layer
        super(IKeypoint, self).__init__()
        self.nc = nc  # number of classes
@@ -254,6 +254,57 @@ class IKeypoint(nn.Module):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
+        if self.export_cat:
+            for i in range(self.nl):
+                x[i] = torch.cat((self.im[i](self.m[i](self.ia[i](x[i]))), self.m_kpt[i](x[i])), axis=1)
+                bs, _, ny, nx = map(int,x[i].shape)  # x(bs,255,20,20) to x(bs,3,20,20,85)
+                bs=-1
+                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+                x_det = x[i][..., :5+self.nc]
+                x_kpt = x[i][..., 5+self.nc:]
+
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+                kpt_grid_x = self.grid[i][:,:,:,:, 0:1]
+                kpt_grid_y = self.grid[i][:,:,:,:, 1:2]
+
+                y = x_det.sigmoid()
+
+              
+                xy = (y[:,:,:,:, 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                wh = (y[:,:,:,:, 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
+                classfify=y[...,4:]  
+                # x_kpt[:,:,:,:, 0::3] = (x_kpt[:,:,:,:, ::3] * 2. - 0.5 + kpt_grid_x.repeat(1,1,1,1,self.nkpt)) * self.stride[i]  # xy
+                # x_kpt[:,:,:,:,1::3] = (x_kpt[:,:,:,:, 1::3] * 2. - 0.5 + kpt_grid_y.repeat(1,1,1,1,self.nkpt)) * self.stride[i]  # xy
+                # x_kpt[:,:,:,:,2::3] = x_kpt[:,:,:,:, 2::3].sigmoid()
+
+                x1=(x_kpt[:,:,:,:, 0:1] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
+                y1=(x_kpt[:,:,:,:, 1:2] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
+                s1=x_kpt[:,:,:,:, 2:3].sigmoid()
+                landmarks1=torch.cat((x1,y1,s1),-1)
+
+                x2=(x_kpt[:,:,:,:, 3:4] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
+                y2=(x_kpt[:,:,:,:, 4:5] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
+                s2=x_kpt[:,:,:,:, 5:6].sigmoid()
+                landmarks2=torch.cat((x2,y2,s2),-1)
+
+                x3=(x_kpt[:,:,:,:, 6:7] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
+                y3=(x_kpt[:,:,:,:, 7:8] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
+                s3=x_kpt[:,:,:,:, 8:9].sigmoid()
+                landmarks3=torch.cat((x3,y3,s3),-1)
+
+                x4=(x_kpt[:,:,:,:, 9:10] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
+                y4=(x_kpt[:,:,:,:, 10:11] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
+                s4=x_kpt[:,:,:,:, 11:12].sigmoid()
+                landmarks4=torch.cat((x4,y4,s4),-1)
+
+
+
+                y = torch.cat((xy, wh, classfify, landmarks1,landmarks2,landmarks3,landmarks4), dim = -1)
+
+                z.append(y.view(bs, self.na*nx*ny, self.no))
+            return torch.cat(z,1)
+        
        for i in range(self.nl):
            if self.nkpt is None or self.nkpt==0:
                x[i] = self.im[i](self.m[i](self.ia[i](x[i])))  # conv
--- a/tensorrt/.gitignore
+++ b/tensorrt/.gitignore
@@ -0,0 +1,28 @@
+# .gitignore
+# 首先忽略所有的文件
+*
+# 但是不忽略目录
+!*/
+# 忽略一些指定的目录名
+ut/
+runs/
+.vscode/
+build/
+# 不忽略下面指定的文件类型
+!*.cpp
+!*.h
+!*.hpp
+!*.c
+!.gitignore
+!*.py
+!*.sh
+!*.npy
+!*.jpg
+!*.pth
+!*.npy
+!*.txt
+!*.md
+!*.yaml
+!*.png
+!*.onnx
+!*.cu
--- a/tensorrt/CMakeLists.txt
+++ b/tensorrt/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.10)
+project(plate_rec)
+
+add_definitions(-std=c++11)
+add_definitions(-w)
+# option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+find_package(CUDA REQUIRED)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Release)
+
+#cuda 
+include_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/include)
+link_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/lib)
+
+#tensorrt 
+include_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6//include/)
+link_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/lib/)
+
+#opencv 
+find_package(OpenCV)
+include_directories(${OpenCV_INCLUDE_DIRS})
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+#onnx2trt
+add_subdirectory(${PROJECT_SOURCE_DIR}/onnx2trt)
+
+cuda_add_executable(plate_rec detect_rec_plate.cpp utils.cpp preprocess.cu)
+
+target_link_libraries(plate_rec nvinfer)
+target_link_libraries(plate_rec cudart)
+target_link_libraries(plate_rec nvonnxparser)
+target_link_libraries(plate_rec ${OpenCV_LIBS})
+
+add_definitions(-O2 -pthread)
--- a/tensorrt/README.md
+++ b/tensorrt/README.md
@@ -0,0 +1,36 @@
+# yolov7 车牌识别TensorRT
+
+1. 修改CMakeLists.txt  换成你的cuda  tensorrt  以及opencv 路径
+
+   ```
+   #cuda 
+   include_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/include)
+   link_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/lib)
+
+   #tensorrt 
+   include_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/include/)
+   link_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/lib/)
+   ```
+2. build
+
+   ```
+   1. mkdir build
+   2. cmake ..
+   3. make
+
+   ```
+3. onnx 转成tensorrt模型  onnx模型看这里[车牌识别](https://github.com/we0091234/Chinese_license_plate_detection_recognition)
+
+   ```
+   当前在build目录
+   #1 生成检测模型
+   ./onnx2trt/onnx2trt  ../onnx_model/plate_detect.onnx ./plate_detect.trt  1
+   #2 生成识别模型
+   ./onnx2trt/onnx2trt  ../onnx_model/plate_rec.onnx ./plate_rec.trt  1
+   ```
+4. 推理
+
+   ```
+   ./plate_rec ./plate_detect.trt  ./plate_rec.trt ../test_imgs
+   ```
+   结果显示在控制台
--- a/tensorrt/detect_rec_plate.cpp
+++ b/tensorrt/detect_rec_plate.cpp
@@ -0,0 +1,897 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <numeric>
+#include <chrono>
+#include <vector>
+#include <opencv2/opencv.hpp>
+#include <dirent.h>
+#include "NvInfer.h"
+#include "cuda_runtime_api.h"
+#include "logging.h"
+#include "include/utils.hpp"
+#include "preprocess.h"
+#define MAX_IMAGE_INPUT_SIZE_THRESH 5000 * 5000
+
+struct bbox 
+{
+     float x1,x2,y1,y2;
+     float landmarks[8];
+     float score;
+};
+
+
+bool my_func(bbox a,bbox b)
+{
+    return a.score>b.score;
+}
+float get_IOU(bbox a,bbox b)
+{
+     float x1 = std::max(a.x1,b.x1);
+     float x2 = std::min(a.x2,b.x2);
+     float y1 = std::max(a.y1,b.y1);
+     float y2 = std::min(a.y2,b.y2);
+
+     float w = std::max(0.0f,x2-x1);
+     float h = std::max(0.0f,y2-y1);
+
+     float inter_area = w*h;
+     float union_area = (a.x2-a.x1)*(a.y2-a.y1)+(b.x2-b.x1)*(b.y2-b.y1)-inter_area;
+
+     float IOU = 1.0*inter_area/ union_area;
+     return IOU;
+}
+
+#define CHECK(status) \
+    do\
+    {\
+        auto ret = (status);\
+        if (ret != 0)\
+        {\
+            std::cerr << "Cuda failure: " << ret << std::endl;\
+            abort();\
+        }\
+    } while (0)
+
+#define DEVICE 0  // GPU id
+#define NMS_THRESH 0.45
+#define BBOX_CONF_THRESH 0.3
+
+using namespace nvinfer1;
+
+// stuff we know about the network and the input/output blobs
+const std::vector<std::string> plate_string={"#","京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖", \
+"闽","赣","鲁","豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新","学","警","港","澳","挂","使","领","民","航","深", \
+"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
+
+const std::vector<std::string> plate_string_yinwen={"#","<beijing>","<hu>","<tianjin>","<chongqing>","<hebei>","<jing>","<meng>","<liao>","<jilin>","<hei>","<su>","<zhe>","<wan>", \
+"<fujian>","<gan>","<lun>","<henan>","<hubei>","<hunan>","<yue>","<guangxi>","<qiong>","<chuan>","<guizhou>","<yun>","<zang>","<shanxi>","<gan>","<qinghai>",\
+"<ning>","<xin>","<xue>","<police>","<hongkang>","<Macao>","<gua>","<shi>","<ling>","<min>","<hang>","<shen>", \
+"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
+
+static const int INPUT_W = 640;
+static const int INPUT_H = 640;
+static const int NUM_CLASSES = 2;  //单层车牌，双层车牌两类
+
+
+const char* INPUT_BLOB_NAME = "images"; //onnx 输入  名字
+const char* OUTPUT_BLOB_NAME = "output"; //onnx 输出 名字
+static Logger gLogger;
+
+cv::Mat static_resize(cv::Mat& img,int &top,int &left)  //对应yolov5中的letter_box
+{
+    float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
+    // r = std::min(r, 1.0f);
+    int unpad_w = r * img.cols;
+    int unpad_h = r * img.rows;
+    left = (INPUT_W-unpad_w)/2;
+    top = (INPUT_H-unpad_h)/2;
+    int right = INPUT_W-unpad_w-left;
+    int bottom = INPUT_H-unpad_h-top;
+    cv::Mat re(unpad_h, unpad_w, CV_8UC3);
+    cv::resize(img, re, re.size());
+      
+    cv::Mat out;
+  
+    cv::copyMakeBorder(re,out,top,bottom,left,right,cv::BORDER_CONSTANT,cv::Scalar(114,114,114));
+ 
+    return out;
+}
+
+struct Object
+{
+    cv::Rect_<float> rect; //
+    float landmarks[8]; //4个关键点
+    int label;
+    float prob;
+};
+
+
+static inline float intersection_area(const Object& a, const Object& b)
+{
+    cv::Rect_<float> inter = a.rect & b.rect;
+    return inter.area();
+}
+
+static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
+{
+    int i = left;
+    int j = right;
+    float p = faceobjects[(left + right) / 2].prob;
+
+    while (i <= j)
+    {
+        while (faceobjects[i].prob > p)
+            i++;
+
+        while (faceobjects[j].prob < p)
+            j--;
+
+        if (i <= j)
+        {
+            // swap
+            std::swap(faceobjects[i], faceobjects[j]);
+
+            i++;
+            j--;
+        }
+    }
+
+    #pragma omp parallel sections
+    {
+        #pragma omp section
+        {
+            if (left < j) qsort_descent_inplace(faceobjects, left, j);
+        }
+        #pragma omp section
+        {
+            if (i < right) qsort_descent_inplace(faceobjects, i, right);
+        }
+    }
+}
+
+static void qsort_descent_inplace(std::vector<Object>& objects)
+{
+    if (objects.empty())
+        return;
+
+    qsort_descent_inplace(objects, 0, objects.size() - 1);
+}
+
+static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
+{
+    picked.clear();
+
+    const int n = faceobjects.size();
+
+    std::vector<float> areas(n);
+    for (int i = 0; i < n; i++)
+    {
+        areas[i] = faceobjects[i].rect.area();
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        const Object& a = faceobjects[i];
+
+        int keep = 1;
+        for (int j = 0; j < (int)picked.size(); j++)
+        {
+            const Object& b = faceobjects[picked[j]];
+
+            // intersection over union
+            float inter_area = intersection_area(a, b);
+            float union_area = areas[i] + areas[picked[j]] - inter_area;
+            // float IoU = inter_area / union_area
+            if (inter_area / union_area > nms_threshold)
+                keep = 0;
+        }
+
+        if (keep)
+            picked.push_back(i);
+    }
+}
+
+std::vector<int>  my_nms(std::vector<bbox> &bboxes, float nms_threshold)
+{
+    std:: vector<int> choice;
+    for(int i = 0; i<bboxes.size(); i++)
+    {
+        int keep = 1;
+        for(int j = 0; j<choice.size(); j++)
+        {
+            float  IOU = get_IOU(bboxes[i],bboxes[choice[j]]);
+            if (IOU>nms_threshold)
+            keep = 0;
+        }
+
+        if (keep)
+         choice.push_back(i);
+    }
+    return choice;
+}
+
+int find_max(float *prob,int num) //找到类别
+{
+    int max= 0;
+    for(int i=1; i<num; i++)
+    {
+        if (prob[max]<prob[i])
+         max = i;
+    }
+
+    return max;
+
+}
+
+
+static void generate_yolox_proposals(float *feat_blob, float prob_threshold,
+                                     std::vector<Object> &objects,int OUTPUT_CANDIDATES) {
+  const int num_class = 2;
+  const int ckpt=12  ; //yolov7 是12，yolov5是8
+
+  const int num_anchors = OUTPUT_CANDIDATES;
+
+  for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
+    // const int basic_pos = anchor_idx * (num_class + 5 + 1);
+    // float box_objectness = feat_blob[basic_pos + 4];
+
+    // int cls_id = feat_blob[basic_pos + 5];
+    // float score = feat_blob[basic_pos + 5 + 1 + cls_id];
+    // score *= box_objectness;
+
+
+    const int basic_pos = anchor_idx * (num_class + 5 + ckpt); //5代表 x,y,w,h,object_score  8代表4个关键点
+    float box_objectness = feat_blob[basic_pos + 4];
+
+    // int cls_id = find_max(&feat_blob[basic_pos +5+ckpt],num_class);   //找到类别v5
+    int cls_id = find_max(&feat_blob[basic_pos +5],num_class);   //v7
+    // float score = feat_blob[basic_pos + 5 +8 + cls_id]; //v5
+    float score = feat_blob[basic_pos + 5 + cls_id];  //v7
+    score *= box_objectness; 
+
+
+    if (score > prob_threshold) {
+      // yolox/models/yolo_head.py decode logic
+      float x_center = feat_blob[basic_pos + 0];
+      float y_center = feat_blob[basic_pos + 1];
+      float w = feat_blob[basic_pos + 2];
+      float h = feat_blob[basic_pos + 3];
+      float x0 = x_center - w * 0.5f;
+      float y0 = y_center - h * 0.5f;
+      
+    //   float *landmarks=&feat_blob[basic_pos +5]; //v5
+    float *landmarks=&feat_blob[basic_pos +5+num_class];
+
+      Object obj;
+      obj.rect.x = x0;
+      obj.rect.y = y0;
+      obj.rect.width = w;
+      obj.rect.height = h;
+      obj.label = cls_id;
+      obj.prob = score;
+      int k = 0;
+    //   for (int i = 0; i<ckpt; i++)
+    //   {
+    //      obj.landmarks[k++]=landmarks[i];
+    //   }
+
+   
+         obj.landmarks[0]=landmarks[0];
+          obj.landmarks[1]=landmarks[1];
+           obj.landmarks[2]=landmarks[3];
+            obj.landmarks[3]=landmarks[4];
+             obj.landmarks[4]=landmarks[6];
+              obj.landmarks[5]=landmarks[7];
+               obj.landmarks[6]=landmarks[9];
+                obj.landmarks[7]=landmarks[10];
+               
+      
+      
+
+      objects.push_back(obj);
+    }
+  }
+}
+
+
+static void generate_proposals(float *feat_blob, float prob_threshold,
+                                     std::vector<bbox> &bboxes,int OUTPUT_CANDIDATES) {
+  const int num_class = 3;
+
+  const int num_anchors = OUTPUT_CANDIDATES;
+
+  for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
+    // const int basic_pos = anchor_idx * (num_class + 5 + 1);
+    // float box_objectness = feat_blob[basic_pos + 4];
+
+    // int cls_id = feat_blob[basic_pos + 5];
+    // float score = feat_blob[basic_pos + 5 + 1 + cls_id];
+    // score *= box_objectness;
+
+
+    const int basic_pos = anchor_idx * (num_class + 5 + 8); //5代表 x,y,w,h,object_score  8代表4个关键点
+    float box_objectness = feat_blob[basic_pos + 4];
+
+    int cls_id = find_max(&feat_blob[basic_pos +5+8],num_class);   //找到类别
+    float score = feat_blob[basic_pos + 5 +8 + cls_id];
+    score *= box_objectness;
+
+
+    if (score > prob_threshold) {
+      // yolox/models/yolo_head.py decode logic
+      float x_center = feat_blob[basic_pos + 0];
+      float y_center = feat_blob[basic_pos + 1];
+      float w = feat_blob[basic_pos + 2];
+      float h = feat_blob[basic_pos + 3];
+      float x0 = x_center - w * 0.5f;
+      float y0 = y_center - h * 0.5f;
+      
+      float *landmarks=&feat_blob[basic_pos +5];
+    
+
+    bbox obj;
+     obj.x1=x0;
+     obj.y1=y0;
+     obj.x2=x0+w;
+     obj.y2=y0+h;
+     obj.score = score;
+      for (int i = 0; i<8; i++)
+      {
+         obj.landmarks[i]=landmarks[i];
+      }
+      
+
+      bboxes.push_back(obj);
+    }
+  }
+}
+
+float* blobFromImage(cv::Mat& img){
+    float* blob = new float[img.total()*3];
+    int channels = 3;
+    int img_h = img.rows;
+    int img_w = img.cols;
+    int k = 0;
+    for (size_t c = 0; c < channels; c++) 
+    {
+        for (size_t  h = 0; h < img_h; h++) 
+        {
+            for (size_t w = 0; w < img_w; w++) 
+            {
+                // blob[c * img_w * img_h + h * img_w + w] =
+                //     (float)img.at<cv::Vec3b>(h, w)[c];
+                    blob[k++] =
+                    (float)img.at<cv::Vec3b>(h, w)[2-c]/255.0;
+            }
+        }
+    }
+    return blob;
+}
+
+void blobFromImage_plate(cv::Mat& img,float mean_value,float std_value,float *blob)
+{
+    // float* blob = new float[img.total()*3];
+    // int channels = NUM_CLASSES;
+    int img_h = img.rows;
+    int img_w = img.cols;
+    int k = 0;
+    for (size_t c = 0; c <3; c++) 
+    {
+        for (size_t  h = 0; h < img_h; h++) 
+        {
+            for (size_t w = 0; w < img_w; w++) 
+            {
+                    blob[k++] =
+                    ((float)img.at<cv::Vec3b>(h, w)[c]/255.0-mean_value)/std_value;
+            }
+        }
+    }
+    // return blob;
+}
+
+static void decode_outputs(float* prob, std::vector<Object>& objects, float scale, const int img_w, const int img_h,int OUTPUT_CANDIDATES,int top,int left) {
+        std::vector<Object> proposals;
+        std::vector<bbox> bboxes;
+        generate_yolox_proposals(prob,  BBOX_CONF_THRESH, proposals,OUTPUT_CANDIDATES);
+        // generate_proposals(prob,  BBOX_CONF_THRESH, bboxes,OUTPUT_CANDIDATES);
+        // std::cout << "num of boxes before nms: " << proposals.size() << std::endl;
+
+        qsort_descent_inplace(proposals);
+        // std::sort(bboxes.begin(),bboxes.end(),my_func);
+        std::vector<int> picked;
+        nms_sorted_bboxes(proposals, picked, NMS_THRESH);
+        // auto choice =my_nms(bboxes, NMS_THRESH);
+
+        int count = picked.size();
+
+        // std::cout << "num of boxes: " << count << std::endl;
+
+        objects.resize(count);
+        for (int i = 0; i < count; i++)
+        {
+            objects[i] = proposals[picked[i]];
+
+            // adjust offset to original unpadded
+            float x0 = (objects[i].rect.x-left) / scale;
+            float y0 = (objects[i].rect.y-top) / scale;
+            float x1 = (objects[i].rect.x + objects[i].rect.width-left) / scale;
+            float y1 = (objects[i].rect.y + objects[i].rect.height-top) / scale;
+            
+            float *landmarks = objects[i].landmarks;
+            for(int i= 0; i<8; i++)
+            {
+                if(i%2==0)
+                landmarks[i]=(landmarks[i]-left)/scale;
+                else
+                landmarks[i]=(landmarks[i]-top)/scale;
+            }
+            // clip
+            x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
+            y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
+            x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
+            y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+
+            objects[i].rect.x = x0;
+            objects[i].rect.y = y0;
+            objects[i].rect.width = x1 - x0;
+            objects[i].rect.height = y1 - y0;
+        }
+}
+
+const float color_list[4][3] =
+{
+    {255, 0, 0},
+    {0, 255, 0},
+    {0, 0, 255},
+    {0, 255, 255},
+};
+
+static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, std::string f)
+{
+    static const char* class_names[] = {
+        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+        "hair drier", "toothbrush"
+    };
+
+    cv::Mat image = bgr.clone();
+
+    for (size_t i = 0; i < objects.size(); i++)
+    {
+        const Object& obj = objects[i];
+
+        // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+        //         obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+
+        cv::Scalar color = cv::Scalar(color_list[obj.label][0], color_list[obj.label][1], color_list[obj.label][2]);
+        float c_mean = cv::mean(color)[0];
+        cv::Scalar txt_color;
+        if (c_mean > 0.5){
+            txt_color = cv::Scalar(0, 0, 0);
+        }else{
+            txt_color = cv::Scalar(255, 255, 255);
+        }
+
+        cv::rectangle(image, obj.rect, color * 255, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
+
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        cv::Scalar txt_bk_color = color * 0.7 * 255;
+
+        int x = obj.rect.x;
+        int y = obj.rect.y + 1;
+        //int y = obj.rect.y - label_size.height - baseLine;
+        if (y > image.rows)
+            y = image.rows;
+        //if (x + label_size.width > image.cols)
+            //x = image.cols - label_size.width;
+
+        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                      txt_bk_color, -1);
+
+        cv::putText(image, text, cv::Point(x, y + label_size.height),
+                    cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
+    }
+    int pos = f.find_last_of("/");
+    auto substr = f.substr(pos+1);
+    std::string savePath = "/mnt/Gpan/Mydata/pytorchPorject/yoloxNew/newYoloxCpp/result_pic/"+substr;
+    cv::imwrite(savePath, image);
+    // fprintf(stderr, "save vis file\n");
+    // cv::imshow("image", image);
+    // cv::waitKey(0);
+}
+
+
+void doInference(IExecutionContext& context, float* input, float* output, const int output_size, cv::Size input_shape,const char *INPUT_BLOB_NAME,const char *OUTPUT_BLOB_NAME) {
+    const ICudaEngine& engine = context.getEngine();
+
+    // Pointers to input and output device buffers to pass to engine.
+    // Engine requires exactly IEngine::getNbBindings() number of buffers.
+    assert(engine.getNbBindings() == 2);
+    void* buffers[2];
+
+    // In order to bind the buffers, we need to know the names of the input and output tensors.
+    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
+    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
+
+    assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);
+    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
+    assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);
+    int mBatchSize = engine.getMaxBatchSize();
+
+    // Create GPU buffers on device
+    CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));
+    CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));
+
+    // Create stream
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
+    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));
+    context.enqueue(1, buffers, stream, nullptr);
+    // context.enqueueV2( buffers, stream, nullptr);
+    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+
+    // Release stream and buffers
+    cudaStreamDestroy(stream);
+    CHECK(cudaFree(buffers[inputIndex]));
+    CHECK(cudaFree(buffers[outputIndex]));
+}
+
+float getNorm2(float x,float y)
+{
+    return sqrt(x*x+y*y);
+}
+
+cv::Mat getTransForm(cv::Mat &src_img, cv::Point2f  order_rect[4]) //透视变换
+{
+      cv::Point2f w1=order_rect[0]-order_rect[1];
+            cv::Point2f w2=order_rect[2]-order_rect[3];
+            auto width1 = getNorm2(w1.x,w1.y);
+            auto width2 = getNorm2(w2.x,w2.y);
+            auto maxWidth = std::max(width1,width2);
+
+            cv::Point2f h1=order_rect[0]-order_rect[3];
+            cv::Point2f h2=order_rect[1]-order_rect[2];
+            auto height1 = getNorm2(h1.x,h1.y);
+            auto height2 = getNorm2(h2.x,h2.y);
+            auto maxHeight = std::max(height1,height2);
+            //  透视变换
+            std::vector<cv::Point2f> pts_ori(4);
+            std::vector<cv::Point2f> pts_std(4);
+
+            pts_ori[0]=order_rect[0];
+            pts_ori[1]=order_rect[1];
+            pts_ori[2]=order_rect[2];
+            pts_ori[3]=order_rect[3];
+
+            pts_std[0]=cv::Point2f(0,0);
+            pts_std[1]=cv::Point2f(maxWidth,0);
+            pts_std[2]=cv::Point2f(maxWidth,maxHeight);
+            pts_std[3]=cv::Point2f(0,maxHeight);
+
+            cv::Mat M = cv::getPerspectiveTransform(pts_ori,pts_std);
+            cv:: Mat dstimg;
+            cv::warpPerspective(src_img,dstimg,M,cv::Size(maxWidth,maxHeight));
+            return dstimg;
+}
+ 
+cv::Mat get_split_merge(cv::Mat &img)   //双层车牌 分割 拼接
+{
+    cv::Rect  upper_rect_area = cv::Rect(0,0,img.cols,int(5.0/12*img.rows));
+    cv::Rect  lower_rect_area = cv::Rect(0,int(1.0/3*img.rows),img.cols,img.rows-int(1.0/3*img.rows));
+    cv::Mat img_upper = img(upper_rect_area);
+    cv::Mat img_lower =img(lower_rect_area);
+    cv::resize(img_upper,img_upper,img_lower.size());
+    cv::Mat out(img_lower.rows,img_lower.cols+img_upper.cols, CV_8UC3, cv::Scalar(114, 114, 114));
+    img_upper.copyTo(out(cv::Rect(0,0,img_upper.cols,img_upper.rows)));
+    img_lower.copyTo(out(cv::Rect(img_upper.cols,0,img_lower.cols,img_lower.rows)));
+
+    return out;
+}
+
+
+std::string decode_outputs(float *prob,int output_size)
+{
+    std::string plate ="";
+    std::string pre_str ="#";
+    for (int i = 0; i<output_size; i++)
+    {
+       int  index = int(prob[i]);
+        if (plate_string[index]!="#" && plate_string[index]!=pre_str)
+            plate+=plate_string[index];
+        pre_str = plate_string[index];
+        
+    }
+    return plate;
+}
+
+std::string decode_outputs_pingyin(float *prob,int output_size) //拼音
+{
+    std::string plate ="";
+    std::string pre_str ="#";
+    for (int i = 0; i<output_size; i++)
+    {
+       int  index = int(prob[i]);
+        if (plate_string_yinwen[index]!="#" && plate_string_yinwen[index]!=pre_str)
+            plate+=plate_string_yinwen[index];
+        pre_str = plate_string_yinwen[index];
+        
+    }
+    return plate;
+}
+
+void doInference_cu(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize,int OUTPUT_SIZE) {
+    // infer on the batch asynchronously, and DMA output back to host
+    context.enqueue(batchSize, buffers, stream, nullptr);
+    CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+}
+
+
+
+int main(int argc, char** argv) {
+    cudaSetDevice(DEVICE);
+    char *trtModelStreamDet{nullptr};
+    char *trtModelStreamRec{nullptr};
+    size_t size{0};
+    size_t size_rec{0};
+    // argv[1]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/build/plate_detect.trt"; 
+    // argv[2]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/build/plate_rec.trt";
+    // argv[3]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/test_imgs/single_blue.jpg";
+    // argv[4]="output.jpg";
+
+    const std::string engine_file_path {argv[1]};  
+    std::ifstream file(engine_file_path, std::ios::binary);
+    if (file.good()) {
+        file.seekg(0, file.end);
+        size = file.tellg();
+        file.seekg(0, file.beg);
+        trtModelStreamDet = new char[size];
+        assert(trtModelStreamDet);
+        file.read(trtModelStreamDet, size);
+        file.close();
+    }
+
+    const std::string engine_file_path_rec {argv[2]};
+    std::ifstream file_rec(engine_file_path_rec, std::ios::binary);
+    if (file_rec.good()) {
+        file_rec.seekg(0, file_rec.end);
+        size_rec = file_rec.tellg();
+        file_rec.seekg(0, file_rec.beg);
+        trtModelStreamRec = new char[size_rec];
+        assert(trtModelStreamRec);
+        file_rec.read(trtModelStreamRec, size_rec);
+        file_rec.close();
+    }
+
+    //det模型trt初始化
+    IRuntime* runtime_det = createInferRuntime(gLogger);
+    assert(runtime_det != nullptr);
+    ICudaEngine* engine_det = runtime_det->deserializeCudaEngine(trtModelStreamDet, size);
+    assert(engine_det != nullptr); 
+    IExecutionContext* context_det = engine_det->createExecutionContext();
+    assert(context_det != nullptr);
+    delete[] trtModelStreamDet;
+
+    //rec模型trt初始化
+    IRuntime* runtime_rec = createInferRuntime(gLogger);
+    assert(runtime_rec!= nullptr);
+    ICudaEngine* engine_rec = runtime_rec->deserializeCudaEngine(trtModelStreamRec, size_rec);
+    assert(engine_rec != nullptr); 
+    IExecutionContext* context_rec = engine_rec->createExecutionContext();
+    assert(context_rec != nullptr);
+    delete[] trtModelStreamRec;
+
+    float *buffers[2];
+    const int inputIndex = engine_det->getBindingIndex(INPUT_BLOB_NAME);
+    const int outputIndex = engine_det->getBindingIndex(OUTPUT_BLOB_NAME);
+    assert(inputIndex == 0);
+    assert(outputIndex == 1);
+    // Create GPU buffers on device
+   
+
+    auto out_dims = engine_det->getBindingDimensions(1);
+    auto output_size = 1;
+    int OUTPUT_CANDIDATES = out_dims.d[1];
+
+       for(int j=0;j<out_dims.nbDims;j++) {
+        output_size *= out_dims.d[j];
+    }
+
+
+    CHECK(cudaMalloc((void**)&buffers[inputIndex],  3 * INPUT_H * INPUT_W * sizeof(float)));
+    CHECK(cudaMalloc((void**)&buffers[outputIndex], output_size * sizeof(float)));
+
+
+     // Create stream
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+    uint8_t* img_host = nullptr;
+    uint8_t* img_device = nullptr;
+    // prepare input data cache in pinned memory 
+    CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
+    // prepare input data cache in device memory
+    CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
+
+    auto out_dims_rec = engine_rec->getBindingDimensions(1);
+    auto output_size_rec = 1;
+    int OUTPUT_CANDIDATES_REC = out_dims_rec.d[1];
+
+    for(int j=0;j<out_dims_rec.nbDims;j++) {
+        output_size_rec *= out_dims_rec.d[j];
+    }
+
+    static float* prob = new float[output_size];
+
+    static float* prob_rec = new float[output_size_rec];
+
+      
+ 
+ // 识别模型 参数
+     int plate_rec_input_w = 168;  
+    int plate_rec_input_h = 48;
+    float* blob_rec=new float[plate_rec_input_w*plate_rec_input_h*3];
+
+    float mean_value=0.588;
+    float std_value =0.193;
+
+    const char* plate_rec_input_name = "images"; //onnx 输入  名字
+    const char* plate_rec_out_name= "output"; //onnx 输出 名字
+
+//  识别模型 参数
+    
+    cv::Point2f rect[4];
+    cv::Point2f order_rect[4];
+    cv::Point  point[1][4];
+
+    // std::string imgPath ="/mnt/Gpan/Mydata/pytorchPorject/Chinese_license_plate_detection_recognition/imgs";
+    std::string input_image_path=argv[3];
+     std::string imgPath=argv[3];
+    std::vector<std::string> imagList;
+    std::vector<std::string>fileType{"jpg","png"};
+    readFileList(const_cast<char *>(imgPath.c_str()),imagList,fileType);
+    double sumTime = 0;
+    int index = 0;
+    for (auto &input_image_path:imagList) 
+    {
+        
+        cv::Mat img = cv::imread(input_image_path);
+          double begin_time = cv::getTickCount();
+         float *buffer_idx = (float*)buffers[inputIndex];
+        size_t size_image = img.cols * img.rows * 3;
+        size_t size_image_dst = INPUT_H * INPUT_W * 3;
+        memcpy(img_host, img.data, size_image);
+       
+        CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream));
+        preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
+        double time_pre = cv::getTickCount();
+        double time_pre_=(time_pre-begin_time)/cv::getTickFrequency()*1000;
+        // std::cout<<"preprocessing time is "<<time_pre_<<" ms"<<std::endl;
+      
+        doInference_cu(*context_det,stream, (void**)buffers,prob,1,output_size);
+        
+         
+    float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
+    // r = std::min(r, 1.0f);
+    int unpad_w = r * img.cols;
+    int unpad_h = r * img.rows;
+    int left = (INPUT_W-unpad_w)/2;
+    int top = (INPUT_H-unpad_h)/2;
+        //    if (index)
+        //     {
+        //         double use_time =(cv::getTickCount()-begin_time)/cv::getTickFrequency()*1000;
+        //         sumTime+=use_time;
+        //     }
+        int img_w = img.cols;
+        int img_h = img.rows;
+        // int top=0;
+        // int left= 0;
+        // cv::Mat pr_img = static_resize(img,top,left);
+        // float* blob_detect;
+        
+        // blob_detect = blobFromImage(pr_img);
+      
+       
+        float scale = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
+        
+        //run inference
+        // auto start = cv::getTickCount();
+      
+        // doInference(*context_det, blob_detect, prob, output_size, pr_img.size(),INPUT_BLOB_NAME,OUTPUT_BLOB_NAME);
+        
+        // auto end = cv::getTickCount();
+       
+        // if (index)
+        // sumTime+=double((end-begin_time)/cv::getTickFrequency()*1000);
+        // std::cout << double((end-start)/cv::getTickFrequency()*1000) << "ms" << std::endl;
+
+
+        std::vector<Object> objects;
+      
+        decode_outputs(prob, objects, scale, img_w, img_h,OUTPUT_CANDIDATES,top,left);
+        
+        // std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
+       
+        // std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
+        std::cout<<input_image_path<<" ";
+        
+        for (int i = 0; i<objects.size(); i++)
+        {
+            // cv::rectangle(img, objects[i].rect, cv::Scalar(0,255,0), 2);
+            for (int j= 0; j<4; j++)
+            {
+            // cv::Scalar color = cv::Scalar(color_list[j][0], color_list[j][1], color_list[j][2]);
+            // cv::circle(img,cv::Point(objects[i].landmarks[2*j], objects[i].landmarks[2*j+1]),5,color,-1);
+            order_rect[j]=cv::Point(objects[i].landmarks[2*j],objects[i].landmarks[2*j+1]);
+            }
+            
+           cv::Mat roiImg = getTransForm(img,order_rect);  //根据关键点进行透视变换
+           int label = objects[i].label;
+           if (label)             //判断是否双层车牌，是的话进行分割拼接
+                roiImg=get_split_merge(roiImg);
+            //    cv::imwrite("roi.jpg",roiImg);
+            cv::resize(roiImg,roiImg,cv::Size(plate_rec_input_w,plate_rec_input_h));
+            cv::Mat pr_img =roiImg;
+            // std::cout << "blob image" << std::endl;
+          
+            auto rec_b = cv::getTickCount();
+            blobFromImage_plate(pr_img,mean_value,std_value,blob_rec);
+            auto rec_e = cv::getTickCount();
+            auto rec_gap = (rec_e-rec_b)/cv::getTickFrequency()*1000;
+            
+            doInference(*context_rec, blob_rec, prob_rec, output_size_rec, pr_img.size(),plate_rec_input_name,plate_rec_out_name);
+            auto plate_number = decode_outputs(prob_rec,output_size_rec);
+            auto plate_number_pinyin= decode_outputs_pingyin(prob_rec,output_size_rec); 
+            cv::Point origin; 
+            origin.x = objects[i].rect.x;
+            origin.y = objects[i].rect.y;
+            cv::putText(img, plate_number_pinyin, origin, cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8, 0);
+            std::cout<<" "<<plate_number;
+        
+        }
+          double end_time = cv::getTickCount();
+          auto time_gap = (end_time-begin_time)/cv::getTickFrequency()*1000;
+        std::cout<<"  time_gap: "<<time_gap<<"ms ";
+         if (index)
+            {
+                // double use_time =(cv::getTickCount()-begin_time)/cv::getTickFrequency()*1000;
+                sumTime+=time_gap;
+            }
+        std::cout<<std::endl;
+        // delete [] blob_detect;
+        index+=1;
+    }
+
+//    cv::imwrite("out.jpg",img);
+ 
+    // destroy the engine
+    std::cout<<"averageTime:"<<(sumTime/(imagList.size()-1))<<"ms"<<std::endl;
+    context_det->destroy();
+    engine_det->destroy();
+    runtime_det->destroy();
+ 
+    context_rec->destroy();
+    engine_rec->destroy();
+    runtime_rec->destroy();
+   delete [] blob_rec;
+    cudaStreamDestroy(stream);
+    CHECK(cudaFree(img_device));
+    CHECK(cudaFreeHost(img_host));
+    CHECK(cudaFree(buffers[inputIndex]));
+    CHECK(cudaFree(buffers[outputIndex]));
+    return 0;
+}
--- a/tensorrt/include/logging.h
+++ b/tensorrt/include/logging.h
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_LOGGING_H
+#define TENSORRT_LOGGING_H
+
+#include "NvInferRuntimeCommon.h"
+#include <cassert>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+using Severity = nvinfer1::ILogger::Severity;
+
+class LogStreamConsumerBuffer : public std::stringbuf
+{
+public:
+    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mOutput(stream)
+        , mPrefix(prefix)
+        , mShouldLog(shouldLog)
+    {
+    }
+
+    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
+        : mOutput(other.mOutput)
+    {
+    }
+
+    ~LogStreamConsumerBuffer()
+    {
+        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
+        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
+        // if the pointer to the beginning is not equal to the pointer to the current position,
+        // call putOutput() to log the output to the stream
+        if (pbase() != pptr())
+        {
+            putOutput();
+        }
+    }
+
+    // synchronizes the stream buffer and returns 0 on success
+    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
+    // resetting the buffer and flushing the stream
+    virtual int sync()
+    {
+        putOutput();
+        return 0;
+    }
+
+    void putOutput()
+    {
+        if (mShouldLog)
+        {
+            // prepend timestamp
+            std::time_t timestamp = std::time(nullptr);
+            tm* tm_local = std::localtime(&timestamp);
+            std::cout << "[";
+            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
+            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
+            // std::stringbuf::str() gets the string contents of the buffer
+            // insert the buffer contents pre-appended by the appropriate prefix into the stream
+            mOutput << mPrefix << str();
+            // set the buffer to empty
+            str("");
+            // flush the stream
+            mOutput.flush();
+        }
+    }
+
+    void setShouldLog(bool shouldLog)
+    {
+        mShouldLog = shouldLog;
+    }
+
+private:
+    std::ostream& mOutput;
+    std::string mPrefix;
+    bool mShouldLog;
+};
+
+//!
+//! \class LogStreamConsumerBase
+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
+//!
+class LogStreamConsumerBase
+{
+public:
+    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mBuffer(stream, prefix, shouldLog)
+    {
+    }
+
+protected:
+    LogStreamConsumerBuffer mBuffer;
+};
+
+//!
+//! \class LogStreamConsumer
+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
+//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
+//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
+//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
+//!  Please do not change the order of the parent classes.
+//!
+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
+{
+public:
+    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
+    //!  Reportable severity determines if the messages are severe enough to be logged.
+    LogStreamConsumer(Severity reportableSeverity, Severity severity)
+        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(severity <= reportableSeverity)
+        , mSeverity(severity)
+    {
+    }
+
+    LogStreamConsumer(LogStreamConsumer&& other)
+        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(other.mShouldLog)
+        , mSeverity(other.mSeverity)
+    {
+    }
+
+    void setReportableSeverity(Severity reportableSeverity)
+    {
+        mShouldLog = mSeverity <= reportableSeverity;
+        mBuffer.setShouldLog(mShouldLog);
+    }
+
+private:
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    static std::string severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    bool mShouldLog;
+    Severity mSeverity;
+};
+
+//! \class Logger
+//!
+//! \brief Class which manages logging of TensorRT tools and samples
+//!
+//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
+//! and supports logging two types of messages:
+//!
+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
+//! - Test pass/fail messages
+//!
+//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
+//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
+//!
+//! In the future, this class could be extended to support dumping test results to a file in some standard format
+//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
+//!
+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
+//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
+//! library and messages coming from the sample.
+//!
+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
+//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
+//! object.
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+    Logger(Severity severity = Severity::kWARNING)
+        : mReportableSeverity(severity)
+    {
+    }
+
+    //!
+    //! \enum TestResult
+    //! \brief Represents the state of a given test
+    //!
+    enum class TestResult
+    {
+        kRUNNING, //!< The test is running
+        kPASSED,  //!< The test passed
+        kFAILED,  //!< The test failed
+        kWAIVED   //!< The test was waived
+    };
+
+    //!
+    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
+    //! \return The nvinfer1::ILogger associated with this Logger
+    //!
+    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
+    //! we can eliminate the inheritance of Logger from ILogger
+    //!
+    nvinfer1::ILogger& getTRTLogger()
+    {
+        return *this;
+    }
+
+    //!
+    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
+    //!
+    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
+    //! inheritance from nvinfer1::ILogger
+    //!
+    // void log(Severity severity, const char* msg) override
+    
+void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept
+    {
+        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
+    }
+
+    //!
+    //! \brief Method for controlling the verbosity of logging output
+    //!
+    //! \param severity The logger will only emit messages that have severity of this level or higher.
+    //!
+    void setReportableSeverity(Severity severity)
+    {
+        mReportableSeverity = severity;
+    }
+
+    //!
+    //! \brief Opaque handle that holds logging information for a particular test
+    //!
+    //! This object is an opaque handle to information used by the Logger to print test results.
+    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
+    //! with Logger::reportTest{Start,End}().
+    //!
+    class TestAtom
+    {
+    public:
+        TestAtom(TestAtom&&) = default;
+
+    private:
+        friend class Logger;
+
+        TestAtom(bool started, const std::string& name, const std::string& cmdline)
+            : mStarted(started)
+            , mName(name)
+            , mCmdline(cmdline)
+        {
+        }
+
+        bool mStarted;
+        std::string mName;
+        std::string mCmdline;
+    };
+
+    //!
+    //! \brief Define a test for logging
+    //!
+    //! \param[in] name The name of the test.  This should be a string starting with
+    //!                  "TensorRT" and containing dot-separated strings containing
+    //!                  the characters [A-Za-z0-9_].
+    //!                  For example, "TensorRT.sample_googlenet"
+    //! \param[in] cmdline The command line used to reproduce the test
+    //
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
+    {
+        return TestAtom(false, name, cmdline);
+    }
+
+    //!
+    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
+    //!        as input
+    //!
+    //! \param[in] name The name of the test
+    //! \param[in] argc The number of command-line arguments
+    //! \param[in] argv The array of command-line arguments (given as C strings)
+    //!
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
+    {
+        auto cmdline = genCmdlineString(argc, argv);
+        return defineTest(name, cmdline);
+    }
+
+    //!
+    //! \brief Report that a test has started.
+    //!
+    //! \pre reportTestStart() has not been called yet for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has started
+    //!
+    static void reportTestStart(TestAtom& testAtom)
+    {
+        reportTestResult(testAtom, TestResult::kRUNNING);
+        assert(!testAtom.mStarted);
+        testAtom.mStarted = true;
+    }
+
+    //!
+    //! \brief Report that a test has ended.
+    //!
+    //! \pre reportTestStart() has been called for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has ended
+    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
+    //!                   TestResult::kFAILED, TestResult::kWAIVED
+    //!
+    static void reportTestEnd(const TestAtom& testAtom, TestResult result)
+    {
+        assert(result != TestResult::kRUNNING);
+        assert(testAtom.mStarted);
+        reportTestResult(testAtom, result);
+    }
+
+    static int reportPass(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kPASSED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportFail(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kFAILED);
+        return EXIT_FAILURE;
+    }
+
+    static int reportWaive(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kWAIVED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportTest(const TestAtom& testAtom, bool pass)
+    {
+        return pass ? reportPass(testAtom) : reportFail(testAtom);
+    }
+
+    Severity getReportableSeverity() const
+    {
+        return mReportableSeverity;
+    }
+
+private:
+    //!
+    //! \brief returns an appropriate string for prefixing a log message with the given severity
+    //!
+    static const char* severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate string for prefixing a test result message with the given result
+    //!
+    static const char* testResultString(TestResult result)
+    {
+        switch (result)
+        {
+        case TestResult::kRUNNING: return "RUNNING";
+        case TestResult::kPASSED: return "PASSED";
+        case TestResult::kFAILED: return "FAILED";
+        case TestResult::kWAIVED: return "WAIVED";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
+    //!
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    //!
+    //! \brief method that implements logging test results
+    //!
+    static void reportTestResult(const TestAtom& testAtom, TestResult result)
+    {
+        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
+                                         << testAtom.mCmdline << std::endl;
+    }
+
+    //!
+    //! \brief generate a command line string from the given (argc, argv) values
+    //!
+    static std::string genCmdlineString(int argc, char const* const* argv)
+    {
+        std::stringstream ss;
+        for (int i = 0; i < argc; i++)
+        {
+            if (i > 0)
+                ss << " ";
+            ss << argv[i];
+        }
+        return ss.str();
+    }
+
+    Severity mReportableSeverity;
+};
+
+namespace
+{
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
+//!
+//! Example usage:
+//!
+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
+//!
+//! Example usage:
+//!
+//!     LOG_INFO(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_INFO(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
+//!
+//! Example usage:
+//!
+//!     LOG_WARN(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_WARN(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
+//!
+//! Example usage:
+//!
+//!     LOG_ERROR(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_ERROR(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
+//         ("fatal" severity)
+//!
+//! Example usage:
+//!
+//!     LOG_FATAL(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_FATAL(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
+}
+
+} // anonymous namespace
+
+#endif // TENSORRT_LOGGING_H
--- a/tensorrt/include/utils.hpp
+++ b/tensorrt/include/utils.hpp
@@ -0,0 +1,115 @@
+#ifndef _UTILS_H_
+#define _UTILS_H_
+#include <vector>
+#include <string>
+#include <dirent.h>
+#include <sys/types.h>
+#include <iostream>
+#include<dirent.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <opencv2/opencv.hpp>
+struct boundingBox
+{
+    float x;
+    float y;
+    float w;
+    float h;
+    int label;
+    float score;
+};
+std::string getHouZhui(std::string fileName);
+int readFileList(char *basePath,std::vector<std::string> &fileList,std::vector<std::string> fileType);
+ void draw_rect(const cv::Mat& image, const std::vector<boundingBox>bboxes,const char* class_names[]);
+ bool cmpBox(boundingBox b1, boundingBox b2);
+float getIou(boundingBox b1,boundingBox b2) ;
+void myNms(std::vector<boundingBox>&bboxes,float score);
+
+
+
+const float color_list1[80][3] =
+{
+    {0.000, 0.447, 0.741},
+    {0.850, 0.325, 0.098},
+    {0.929, 0.694, 0.125},
+    {0.494, 0.184, 0.556},
+    {0.466, 0.674, 0.188},
+    {0.301, 0.745, 0.933},
+    {0.635, 0.078, 0.184},
+    {0.300, 0.300, 0.300},
+    {0.600, 0.600, 0.600},
+    {1.000, 0.000, 0.000},
+    {1.000, 0.500, 0.000},
+    {0.749, 0.749, 0.000},
+    {0.000, 1.000, 0.000},
+    {0.000, 0.000, 1.000},
+    {0.667, 0.000, 1.000},
+    {0.333, 0.333, 0.000},
+    {0.333, 0.667, 0.000},
+    {0.333, 1.000, 0.000},
+    {0.667, 0.333, 0.000},
+    {0.667, 0.667, 0.000},
+    {0.667, 1.000, 0.000},
+    {1.000, 0.333, 0.000},
+    {1.000, 0.667, 0.000},
+    {1.000, 1.000, 0.000},
+    {0.000, 0.333, 0.500},
+    {0.000, 0.667, 0.500},
+    {0.000, 1.000, 0.500},
+    {0.333, 0.000, 0.500},
+    {0.333, 0.333, 0.500},
+    {0.333, 0.667, 0.500},
+    {0.333, 1.000, 0.500},
+    {0.667, 0.000, 0.500},
+    {0.667, 0.333, 0.500},
+    {0.667, 0.667, 0.500},
+    {0.667, 1.000, 0.500},
+    {1.000, 0.000, 0.500},
+    {1.000, 0.333, 0.500},
+    {1.000, 0.667, 0.500},
+    {1.000, 1.000, 0.500},
+    {0.000, 0.333, 1.000},
+    {0.000, 0.667, 1.000},
+    {0.000, 1.000, 1.000},
+    {0.333, 0.000, 1.000},
+    {0.333, 0.333, 1.000},
+    {0.333, 0.667, 1.000},
+    {0.333, 1.000, 1.000},
+    {0.667, 0.000, 1.000},
+    {0.667, 0.333, 1.000},
+    {0.667, 0.667, 1.000},
+    {0.667, 1.000, 1.000},
+    {1.000, 0.000, 1.000},
+    {1.000, 0.333, 1.000},
+    {1.000, 0.667, 1.000},
+    {0.333, 0.000, 0.000},
+    {0.500, 0.000, 0.000},
+    {0.667, 0.000, 0.000},
+    {0.833, 0.000, 0.000},
+    {1.000, 0.000, 0.000},
+    {0.000, 0.167, 0.000},
+    {0.000, 0.333, 0.000},
+    {0.000, 0.500, 0.000},
+    {0.000, 0.667, 0.000},
+    {0.000, 0.833, 0.000},
+    {0.000, 1.000, 0.000},
+    {0.000, 0.000, 0.167},
+    {0.000, 0.000, 0.333},
+    {0.000, 0.000, 0.500},
+    {0.000, 0.000, 0.667},
+    {0.000, 0.000, 0.833},
+    {0.000, 0.000, 1.000},
+    {0.000, 0.000, 0.000},
+    {0.143, 0.143, 0.143},
+    {0.286, 0.286, 0.286},
+    {0.429, 0.429, 0.429},
+    {0.571, 0.571, 0.571},
+    {0.714, 0.714, 0.714},
+    {0.857, 0.857, 0.857},
+    {0.000, 0.447, 0.741},
+    {0.314, 0.717, 0.741},
+    {0.50, 0.5, 0}
+};
+
+#endif
--- a/tensorrt/onnx2trt/CMakeLists.txt
+++ b/tensorrt/onnx2trt/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.10)
+project(onnx2trt)
+add_executable(onnx2trt onnx2trt.cpp)
+target_link_libraries(onnx2trt nvinfer)
+target_link_libraries(onnx2trt cudart)
+target_link_libraries(onnx2trt nvonnxparser)
+
--- a/tensorrt/onnx2trt/onnx2trt.cpp
+++ b/tensorrt/onnx2trt/onnx2trt.cpp
@@ -0,0 +1,182 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <numeric>
+// #include <chrono>
+#include <vector>
+#include <opencv2/opencv.hpp>
+// #include <dirent.h>
+#include "NvInfer.h"
+
+#include "NvOnnxParser.h"
+// #include "NvInferRuntime.h"
+#include "logging.h"
+#include "cuda_runtime_api.h"
+using namespace nvinfer1;
+using namespace std;
+static Logger gLogger;
+
+const char* INPUT_BLOB_NAME = "input";
+const char* OUTPUT_BLOB_NAME = "output";
+
+void saveToTrtModel(const char * TrtSaveFileName,IHostMemory*trtModelStream)
+	{
+		std::ofstream out(TrtSaveFileName, std::ios::binary);
+		if (!out.is_open())
+		{
+		std::cout << "打开文件失败!" <<std:: endl;
+		}
+		out.write(reinterpret_cast<const char*>(trtModelStream->data()), trtModelStream->size());
+		out.close();
+	}
+
+
+
+
+void onnxToTRTModel(const std::string& modelFile,unsigned int maxBatchSize,IHostMemory*& trtModelStream,const char * TrtSaveFileName) 
+{
+    int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
+ 
+    // create the builder
+    IBuilder* builder = createInferBuilder(gLogger);//创建构建器(即指向Ibuilder类型对象的指针)
+    IBuilderConfig *config = builder->createBuilderConfig();
+    const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);    //必须加不然报错
+    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);/*等价于*bulider.createNetwork(),通过Ibulider定义的
+    名为creatNetwork()方法，创建INetworkDefinition的对象，ntework这个指针指向这个对象*/ 
+ 
+    auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());//创建解析器
+ 
+    //Optional - uncomment below lines to view network layer information
+    //config->setPrintLayerInfo(true);
+    //parser->reportParsingInfo();
+ 
+    if (!parser->parseFromFile(modelFile.c_str(), verbosity)) //解析onnx文件，并填充网络
+    {
+        string msg("failed to parse onnx file");
+        gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+        exit(EXIT_FAILURE);
+    }
+ 
+    // Build the engine
+    builder->setMaxBatchSize(maxBatchSize);
+    config->setMaxWorkspaceSize(1 << 30);
+    // builder->setMaxWorkspaceSize(1 << 30);
+#ifdef USE_FP16
+	config->setFlag(BuilderFlag::kFP16);
+#endif
+    // samplesCommon::enableDLA(builder, gUseDLACore);
+    //当引擎建立起来时，TensorRT会复制
+    // ICudaEngine* engine = builder->buildCudaEngine(*network);//通过Ibuilder类的buildCudaEngine()方法创建IcudaEngine对象，
+    ICudaEngine *engine = builder->buildEngineWithConfig(*network,*config);
+    assert(engine);
+ 
+    // we can destroy the parser
+    parser->destroy();
+    
+    // serialize the engine, 
+    // then close everything down
+    trtModelStream = engine->serialize();//将引擎序列化，保存到文件中
+    engine->destroy();
+    network->destroy();
+    builder->destroy();
+    config->destroy();
+    saveToTrtModel(TrtSaveFileName,trtModelStream);
+}
+
+	void onnxToTRTModelDynamicBatch(const std::string& modelFile, unsigned int maxBatchSize, IHostMemory*& trtModelStream,const char * TrtSaveFileName,int input_h,int input_w) // output buffer for the TensorRT model 动态batch
+{
+            int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
+        
+            // create the builder
+            IBuilder* builder = createInferBuilder(gLogger);//创建构建器(即指向Ibuilder类型对象的指针)
+            IBuilderConfig *config = builder->createBuilderConfig();
+            auto profile = builder->createOptimizationProfile();
+
+
+            const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);    //必须加不然报错
+            nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);/*等价于*bulider.createNetwork(),通过Ibulider定义的
+            名为creatNetwork()方法，创建INetworkDefinition的对象，ntework这个指针指向这个对象*/ 
+
+
+
+            Dims dims = Dims4{1, 3, input_h, input_w};
+            profile->setDimensions(INPUT_BLOB_NAME,
+                        OptProfileSelector::kMIN, Dims4{1, dims.d[1], dims.d[2], dims.d[3]});
+            profile->setDimensions(INPUT_BLOB_NAME,
+                        OptProfileSelector::kOPT, Dims4{maxBatchSize, dims.d[1], dims.d[2], dims.d[3]});
+            profile->setDimensions(INPUT_BLOB_NAME,
+                        OptProfileSelector::kMAX, Dims4{maxBatchSize, dims.d[1], dims.d[2], dims.d[3]});
+            config->addOptimizationProfile(profile);
+
+        
+            auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());//创建解析器
+        
+            //Optional - uncomment below lines to view network layer information
+            //config->setPrintLayerInfo(true);
+            //parser->reportParsingInfo();
+        
+            if (!parser->parseFromFile(modelFile.c_str(), verbosity)) //解析onnx文件，并填充网络
+            {
+                string msg("failed to parse onnx file");
+                gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+                exit(EXIT_FAILURE);
+            }
+        
+            // Build the engine
+            // builder->setMaxBatchSize(maxBatchSize);
+            config->setMaxWorkspaceSize(1 << 30);
+            // builder->setMaxWorkspaceSize(1 << 30);
+        #ifdef USE_FP16
+            config->setFlag(BuilderFlag::kFP16);
+        #endif
+            // samplesCommon::enableDLA(builder, gUseDLACore);
+            //当引擎建立起来时，TensorRT会复制
+            // ICudaEngine* engine = builder->buildCudaEngine(*network);//通过Ibuilder类的buildCudaEngine()方法创建IcudaEngine对象，
+            ICudaEngine *engine = builder->buildEngineWithConfig(*network,*config);
+            assert(engine);
+        
+            // we can destroy the parser
+            parser->destroy();
+            
+            // serialize the engine, 
+            // then close everything down
+            trtModelStream = engine->serialize();//将引擎序列化，保存到文件中
+            engine->destroy();
+            network->destroy();
+            builder->destroy();
+            config->destroy();
+            saveToTrtModel(TrtSaveFileName,trtModelStream);
+        
+}
+
+// void  readTrtModel(const char * Trtmodel)  //读取onnx模型
+// 	{
+// 		size_t size{ 0 };
+// 		std::ifstream file(Trtmodel, std::ios::binary);
+// 		if (file.good()) {
+// 			file.seekg(0, file.end);
+// 			size = file.tellg();
+// 			file.seekg(0, file.beg);
+// 			_trtModelStream = new char[size];
+// 			assert(_trtModelStream);
+// 			file.read(_trtModelStream, size);
+// 			file.close();
+// 		}
+// 		_trtModelStreamSize = size;
+
+// 		_runtime = createInferRuntime(gLogger);
+// 		_engine1 = _runtime->deserializeCudaEngine(_trtModelStream, _trtModelStreamSize);
+// 		//cudaSetDevice(0);
+// 		context = _engine1->createExecutionContext();
+// 	}
+	
+
+int main(int argc, char** argv)
+{
+    IHostMemory* trtModelStream{nullptr};
+    int batchSize = atoi(argv[3]);
+    // int input_h = atoi(argv[4]);
+    // int input_w=atoi(argv[5]);
+    onnxToTRTModel(argv[1],batchSize,trtModelStream,argv[2]);
+    std::cout<<"convert seccuss!"<<std::endl;
+}
--- a/tensorrt/onnx_model/plate_detect.onnx
+++ b/tensorrt/onnx_model/plate_detect.onnx
--- a/tensorrt/onnx_model/plate_rec.onnx
+++ b/tensorrt/onnx_model/plate_rec.onnx
--- a/tensorrt/plate_rec.cpp
+++ b/tensorrt/plate_rec.cpp
@@ -0,0 +1,214 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <numeric>
+#include <chrono>
+#include <vector>
+#include <opencv2/opencv.hpp>
+#include <dirent.h>
+#include "NvInfer.h"
+#include "cuda_runtime_api.h"
+#include "logging.h"
+#include "include/utils.hpp"
+
+#define CHECK(status) \
+    do\
+    {\
+        auto ret = (status);\
+        if (ret != 0)\
+        {\
+            std::cerr << "Cuda failure: " << ret << std::endl;\
+            abort();\
+        }\
+    } while (0)
+
+#define DEVICE 0  // GPU id
+
+const std::vector<std::string> plate_string={"#","京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖", \
+"闽","赣","鲁","豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新","学","警","港","澳","挂","使","领","民","航","深", \
+"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
+using namespace nvinfer1;
+// stuff we know about the network and the input/output blobs
+
+static Logger gLogger;
+
+float* blobFromImage_plate(cv::Mat& img,float mean_value,float std_value){
+    float* blob = new float[img.total()*3];
+    int channels = 3;
+    int img_h = img.rows;
+    int img_w = img.cols;
+    int k = 0;
+    for (size_t c = 0; c <3; c++) 
+    {
+        for (size_t  h = 0; h < img_h; h++) 
+        {
+            for (size_t w = 0; w < img_w; w++) 
+            {
+                    blob[k++] =
+                    ((float)img.at<cv::Vec3b>(h, w)[c]/255.0-mean_value)/std_value;
+            }
+        }
+    }
+    return blob;
+}
+
+void doInference(IExecutionContext& context, float* input, float* output, const int output_size, cv::Size input_shape,const char *INPUT_BLOB_NAME,const char *OUTPUT_BLOB_NAME) {
+    const ICudaEngine& engine = context.getEngine();
+
+    // Pointers to input and output device buffers to pass to engine.
+    // Engine requires exactly IEngine::getNbBindings() number of buffers.
+    assert(engine.getNbBindings() == 2);
+    void* buffers[2];
+
+    // In order to bind the buffers, we need to know the names of the input and output tensors.
+    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
+    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
+
+    assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);
+    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
+    assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);
+    int mBatchSize = engine.getMaxBatchSize();
+
+    // Create GPU buffers on device
+    CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));
+    CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));
+
+    // Create stream
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
+    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));
+    context.enqueue(1, buffers, stream, nullptr);
+    // context.enqueueV2( buffers, stream, nullptr);
+    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+
+    // Release stream and buffers
+    cudaStreamDestroy(stream);
+    CHECK(cudaFree(buffers[inputIndex]));
+    CHECK(cudaFree(buffers[outputIndex]));
+}
+
+std::string decode_outputs(float *prob,int output_size)
+{
+    std::string plate ="";
+    std::string pre_str ="#";
+    for (int i = 0; i<output_size; i++)
+    {
+       int  index = int(prob[i]);
+        if (plate_string[index]!="#" && plate_string[index]!=pre_str)
+            plate+=plate_string[index];
+        pre_str = plate_string[index];
+        
+    }
+    return plate;
+}
+
+int main(int argc, char** argv) 
+{
+   
+    cudaSetDevice(DEVICE);
+    // create a model using the API directly and serialize it to a stream
+    char *trtModelStream{nullptr};
+    size_t size{0};
+
+    int plate_rec_input_w = 168;  
+    int plate_rec_input_h = 48;
+
+
+    float mean_value=0.588;
+    float std_value =0.193;
+
+    const char* plate_rec_input_name = "images"; //onnx 输入  名字
+    const char* plate_rec_out_name= "output"; //onnx 输出 名字
+
+    if (argc == 4 && std::string(argv[2]) == "-i") {
+        const std::string engine_file_path {argv[1]};
+        std::ifstream file(engine_file_path, std::ios::binary);
+        if (file.good()) {
+            file.seekg(0, file.end);
+            size = file.tellg();
+            file.seekg(0, file.beg);
+            trtModelStream = new char[size];
+            assert(trtModelStream);
+            file.read(trtModelStream, size);
+            file.close();
+        }
+    } else {
+        std::cerr << "arguments not right!" << std::endl;
+        std::cerr << "run 'python3 yolox/deploy/trt.py -n yolox-{tiny, s, m, l, x}' to serialize model first!" << std::endl;
+        std::cerr << "Then use the following command:" << std::endl;
+        std::cerr << "./yolox ../model_trt.engine -i ../../../assets/dog.jpg  // deserialize file and run inference" << std::endl;
+        return -1;
+    }
+    const std::string input_image_path {argv[3]};
+
+    //std::vector<std::string> file_names;
+    //if (read_files_in_dir(argv[2], file_names) < 0) {
+        //std::cout << "read_files_in_dir failed." << std::endl;
+        //return -1;
+    //}
+
+    IRuntime* runtime = createInferRuntime(gLogger);
+    assert(runtime != nullptr);
+    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(engine != nullptr); 
+    IExecutionContext* context = engine->createExecutionContext();
+    assert(context != nullptr);
+    delete[] trtModelStream;
+    auto out_dims = engine->getBindingDimensions(1);
+    auto output_size = 1;
+    int OUTPUT_CANDIDATES = out_dims.d[1];
+
+    for(int j=0;j<out_dims.nbDims;j++) {
+        output_size *= out_dims.d[j];
+    }
+    static float* prob = new float[output_size];
+    
+
+
+    std::string imgPath ="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/result";
+
+    std::vector<std::string> imagList;
+    std::vector<std::string>fileType{"jpg","png"};
+    readFileList(const_cast<char *>(imgPath.c_str()),imagList,fileType);
+    double sumTime = 0;
+    int right_label = 0;
+    int file_num = imagList.size();
+    for (auto &input_image_path:imagList) 
+    {
+        cv::Mat img = cv::imread(input_image_path);
+        int img_w = img.cols;
+        int img_h = img.rows;
+        int top=0;
+        int left= 0;
+        cv::resize(img,img,cv::Size(plate_rec_input_w,plate_rec_input_h));
+        cv::Mat pr_img =img;
+        // std::cout << "blob image" << std::endl;
+        float* blob;
+        blob = blobFromImage_plate(pr_img,mean_value,std_value);
+        doInference(*context, blob, prob, output_size, pr_img.size(),plate_rec_input_name,plate_rec_out_name);
+        auto plate_number = decode_outputs(prob,output_size);
+        
+        int pos = input_image_path.find_last_of("/");
+        auto image_name = input_image_path.substr(pos+1);
+
+        int pos2= image_name.find_last_of("_");
+        auto gt=image_name.substr(0,pos2);
+        if(gt==plate_number)
+          right_label+=1;
+
+        std::cout<<input_image_path<<" "<<right_label<<" "<<plate_number<<std::endl;
+    delete blob;
+    }
+    printf("sum is %d,right is %d ,accuracy is %.4f",file_num,right_label,1.0*right_label/file_num);
+    
+ 
+    // destroy the engine
+    // std::cout<<"averageTime:"<<sumTime/imagList.size()<<std::endl;
+    context->destroy();
+    engine->destroy();
+    runtime->destroy();
+    return 0;
+}
--- a/tensorrt/preprocess.cu
+++ b/tensorrt/preprocess.cu
@@ -0,0 +1,116 @@
+#include "preprocess.h"
+#include <opencv2/opencv.hpp>
+
+__global__ void warpaffine_kernel( 
+    uint8_t* src, int src_line_size, int src_width, 
+    int src_height, float* dst, int dst_width, 
+    int dst_height, uint8_t const_value_st,
+    AffineMatrix d2s, int edge) {
+    int position = blockDim.x * blockIdx.x + threadIdx.x;
+    if (position >= edge) return;
+
+    float m_x1 = d2s.value[0];
+    float m_y1 = d2s.value[1];
+    float m_z1 = d2s.value[2];
+    float m_x2 = d2s.value[3];
+    float m_y2 = d2s.value[4];
+    float m_z2 = d2s.value[5];
+
+    int dx = position % dst_width;
+    int dy = position / dst_width;
+    float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
+    float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
+    float c0, c1, c2;
+
+    if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
+        // out of range
+        c0 = const_value_st;
+        c1 = const_value_st;
+        c2 = const_value_st;
+    } else {
+        int y_low = floorf(src_y);
+        int x_low = floorf(src_x);
+        int y_high = y_low + 1;
+        int x_high = x_low + 1;
+
+        uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
+        float ly = src_y - y_low;
+        float lx = src_x - x_low;
+        float hy = 1 - ly;
+        float hx = 1 - lx;
+        float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+        uint8_t* v1 = const_value;
+        uint8_t* v2 = const_value;
+        uint8_t* v3 = const_value;
+        uint8_t* v4 = const_value;
+
+        if (y_low >= 0) {
+            if (x_low >= 0)
+                v1 = src + y_low * src_line_size + x_low * 3;
+
+            if (x_high < src_width)
+                v2 = src + y_low * src_line_size + x_high * 3;
+        }
+
+        if (y_high < src_height) {
+            if (x_low >= 0)
+                v3 = src + y_high * src_line_size + x_low * 3;
+
+            if (x_high < src_width)
+                v4 = src + y_high * src_line_size + x_high * 3;
+        }
+
+        c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
+        c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
+        c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
+    }
+
+    //bgr to rgb 
+    float t = c2;
+    c2 = c0;
+    c0 = t;
+
+    //normalization
+    c0 = c0 / 255.0f;
+    c1 = c1 / 255.0f;
+    c2 = c2 / 255.0f;
+
+    //rgbrgbrgb to rrrgggbbb
+    int area = dst_width * dst_height;
+    float* pdst_c0 = dst + dy * dst_width + dx;
+    float* pdst_c1 = pdst_c0 + area;
+    float* pdst_c2 = pdst_c1 + area;
+    *pdst_c0 = c0;
+    *pdst_c1 = c1;
+    *pdst_c2 = c2;
+}
+
+void preprocess_kernel_img(
+    uint8_t* src, int src_width, int src_height,
+    float* dst, int dst_width, int dst_height,
+    cudaStream_t stream) {
+    AffineMatrix s2d,d2s;
+    float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
+
+    s2d.value[0] = scale;
+    s2d.value[1] = 0;
+    s2d.value[2] = -scale * src_width  * 0.5  + dst_width * 0.5;
+    s2d.value[3] = 0;
+    s2d.value[4] = scale;
+    s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5;
+
+    cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
+    cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
+    cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
+
+    memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
+
+    int jobs = dst_height * dst_width;
+    int threads = 256;
+    int blocks = ceil(jobs / (float)threads);
+    warpaffine_kernel<<<blocks, threads, 0, stream>>>(
+        src, src_width*3, src_width,
+        src_height, dst, dst_width,
+        dst_height, 128, d2s, jobs);
+
+}
--- a/tensorrt/preprocess.h
+++ b/tensorrt/preprocess.h
@@ -0,0 +1,16 @@
+#ifndef __PREPROCESS_H
+#define __PREPROCESS_H
+
+#include <cuda_runtime.h>
+#include <cstdint>
+
+
+struct AffineMatrix{
+    float value[6];
+};
+
+
+void preprocess_kernel_img(uint8_t* src, int src_width, int src_height,
+                           float* dst, int dst_width, int dst_height,
+                           cudaStream_t stream);
+#endif  // __PREPROCESS_H
--- a/tensorrt/test_imgs/14.jpg
+++ b/tensorrt/test_imgs/14.jpg
--- a/tensorrt/test_imgs/single_blue.jpg
+++ b/tensorrt/test_imgs/single_blue.jpg
--- a/tensorrt/test_imgs/tmp3514.png
+++ b/tensorrt/test_imgs/tmp3514.png
--- a/tensorrt/utils.cpp
+++ b/tensorrt/utils.cpp
@@ -0,0 +1,158 @@
+#include "utils.hpp"
+std::string getHouZhui(std::string fileName)
+{
+    //    std::string fileName="/home/xiaolei/23.jpg";
+    int pos=fileName.find_last_of(std::string("."));
+    std::string houZui=fileName.substr(pos+1);
+    return houZui;
+}
+
+int readFileList(char *basePath,std::vector<std::string> &fileList,std::vector<std::string> fileType)
+{
+    DIR *dir;
+    struct dirent *ptr;
+    char base[1000];
+
+    if ((dir=opendir(basePath)) == NULL)
+    {
+        perror("Open dir error...");
+        exit(1);
+    }
+
+    while ((ptr=readdir(dir)) != NULL)
+    {
+        if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0)    ///current dir OR parrent dir
+            continue;
+        else if(ptr->d_type == 8)
+        {    ///file
+            if (fileType.size())
+            {
+            std::string houZui=getHouZhui(std::string(ptr->d_name));
+            for (auto &s:fileType)
+            {
+            if (houZui==s)
+            {
+            fileList.push_back(basePath+std::string("/")+std::string(ptr->d_name));
+            break;
+            }
+            }
+            }
+            else
+            {
+                fileList.push_back(basePath+std::string("/")+std::string(ptr->d_name));
+            }
+        }
+        else if(ptr->d_type == 10)    ///link file
+            printf("d_name:%s/%s\n",basePath,ptr->d_name);
+        else if(ptr->d_type == 4)    ///dir
+        {
+            memset(base,'\0',sizeof(base));
+            strcpy(base,basePath);
+            strcat(base,"/");
+            strcat(base,ptr->d_name);
+            readFileList(base,fileList,fileType);
+        }
+    }
+    closedir(dir);
+    return 1;
+}
+
+
+void draw_rect(const cv::Mat& image, const std::vector<boundingBox>bboxes,const char* class_names[])
+{
+    // static const char* class_names[] = {
+    //     "head", "leg", "hand", "back", "nostd", "body", "plate", "logo"};
+
+    // cv::Mat image = bgr.clone();
+
+    for (size_t i = 0; i < bboxes.size(); i++)
+    {
+        // const Object& obj = objects[i];
+        const boundingBox &obj= bboxes[i];
+
+        // fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
+        //         obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
+
+        cv::Scalar color = cv::Scalar(color_list1[obj.label][0], color_list1[obj.label][1], color_list1[obj.label][2]);
+        float c_mean = cv::mean(color)[0];
+        cv::Scalar txt_color;
+        if (c_mean > 0.5){
+            txt_color = cv::Scalar(0, 0, 0);
+        }else{
+            txt_color = cv::Scalar(255, 255, 255);
+        }
+        cv::Rect myRect(obj.x,obj.y,obj.w,obj.h);
+        cv::rectangle(image,myRect, color * 255, 2);
+
+        char text[256];
+        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.score * 100);
+
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+        cv::Scalar txt_bk_color = color * 0.7 * 255;
+
+        int x = obj.x;
+        int y = obj.y + 1;
+        //int y = obj.rect.y - label_size.height - baseLine;
+        if (y > image.rows)
+            y = image.rows;
+        //if (x + label_size.width > image.cols)
+            //x = image.cols - label_size.width;
+
+        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+                      txt_bk_color,-1);
+
+        cv::putText(image, text, cv::Point(x, y + label_size.height),
+                    cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
+    }
+
+}
+
+
+bool cmpBox(boundingBox b1, boundingBox b2)
+{
+    return b1.score > b2.score;
+}
+
+float getIou(boundingBox b1,boundingBox b2)   //计算IOU
+{
+    int xl1 = b1.x;         //左
+    int xr1 = b1.w+b1.x;  // 右
+    int yt1=b1.y;          //顶
+    int yb1 = b1.y+b1.h;  //底
+
+    int xl2 = b2.x;         //左
+    int xr2 = b2.w+b2.x;  // 右
+    int yt2=b2.y;          //顶
+    int yb2 = b2.y+b2.h;  //底
+
+    int x11 =std::max(xl1,xl2);
+    int y11 = std::max(yt1,yt2);
+    int x22 = std::min(xr1,xr2);
+    int y22 = std::min(yb1,yb2);  
+
+    float intersectionArea= (x22-x11)*(y22-y11);  //交集
+
+    float unionArea = (xr1-xl1)*(yb1-yt1)+(xr2-xl2)*(yb2-yt2)-intersectionArea; //并集
+    
+    return 1.0f*intersectionArea/unionArea;
+}
+
+
+void myNms(std::vector<boundingBox>&bboxes,float score)
+{
+     std::sort(bboxes.begin(),bboxes.end(),cmpBox);
+
+    for(int i = 0; i<bboxes.size()-1; i++)
+    {
+        for(int j = i+1;j<bboxes.size(); j++)
+        {
+            if(getIou(bboxes[i],bboxes[j])>score)
+            {
+                bboxes.erase(bboxes.begin()+j);
+                j--;
+            }
+        }
+    }
+}