tensorrt support

This commit is contained in:
we0091234
2022-12-10 10:05:16 +08:00
parent 73178d353f
commit 8c4e58f2c8
20 changed files with 2374 additions and 13 deletions

3
.gitignore vendored
View File

@@ -29,4 +29,5 @@ plate/
!*.md
!*.txt
!*.yaml
!*.ttf
!*.ttf
!*.cu

View File

@@ -67,9 +67,10 @@ if __name__ == '__main__':
# elif isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.model[-1].export = not (opt.grid or opt.export_nms) # set Detect() layer grid export
model.model[-1].export_cat = True #onnx export
for _ in range(2):
y = model(img) # dry runs
output_names = None
output_names = ["output"]
if opt.export_nms:
nms = models.common.NMS(conf=0.01, kpt_label=4)
nms_export = models.common.NMS_Export(conf=0.01, kpt_label=4)
@@ -83,16 +84,16 @@ if __name__ == '__main__':
print(f"\n{colorstr('PyTorch:')} starting from {opt.weights} ({file_size(opt.weights):.1f} MB)")
# TorchScript export -----------------------------------------------------------------------------------------------
prefix = colorstr('TorchScript:')
try:
print(f'\n{prefix} starting export with torch {torch.__version__}...')
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
ts = torch.jit.trace(model, img, strict=False)
ts = optimize_for_mobile(ts) # https://pytorch.org/tutorials/recipes/script_optimized.html
ts.save(f)
print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
except Exception as e:
print(f'{prefix} export failure: {e}')
# prefix = colorstr('TorchScript:')
# try:
# print(f'\n{prefix} starting export with torch {torch.__version__}...')
# f = opt.weights.replace('.pt', '.torchscript.pt') # filename
# ts = torch.jit.trace(model, img, strict=False)
# ts = optimize_for_mobile(ts) # https://pytorch.org/tutorials/recipes/script_optimized.html
# ts.save(f)
# print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
# except Exception as e:
# print(f'{prefix} export failure: {e}')
# ONNX export ------------------------------------------------------------------------------------------------------
prefix = colorstr('ONNX:')

View File

@@ -215,7 +215,7 @@ class IDetect(nn.Module):
class IKeypoint(nn.Module):
stride = None # strides computed during build
export = False # onnx export
export_cat = False # onnx export cat output
def __init__(self, nc=80, anchors=(), nkpt=5, ch=(), inplace=True, dw_conv_kpt=False): # detection layer
super(IKeypoint, self).__init__()
self.nc = nc # number of classes
@@ -254,6 +254,57 @@ class IKeypoint(nn.Module):
# x = x.copy() # for profiling
z = [] # inference output
self.training |= self.export
if self.export_cat:
for i in range(self.nl):
x[i] = torch.cat((self.im[i](self.m[i](self.ia[i](x[i]))), self.m_kpt[i](x[i])), axis=1)
bs, _, ny, nx = map(int,x[i].shape) # x(bs,255,20,20) to x(bs,3,20,20,85)
bs=-1
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
x_det = x[i][..., :5+self.nc]
x_kpt = x[i][..., 5+self.nc:]
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
kpt_grid_x = self.grid[i][:,:,:,:, 0:1]
kpt_grid_y = self.grid[i][:,:,:,:, 1:2]
y = x_det.sigmoid()
xy = (y[:,:,:,:, 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
wh = (y[:,:,:,:, 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
classfify=y[...,4:]
# x_kpt[:,:,:,:, 0::3] = (x_kpt[:,:,:,:, ::3] * 2. - 0.5 + kpt_grid_x.repeat(1,1,1,1,self.nkpt)) * self.stride[i] # xy
# x_kpt[:,:,:,:,1::3] = (x_kpt[:,:,:,:, 1::3] * 2. - 0.5 + kpt_grid_y.repeat(1,1,1,1,self.nkpt)) * self.stride[i] # xy
# x_kpt[:,:,:,:,2::3] = x_kpt[:,:,:,:, 2::3].sigmoid()
x1=(x_kpt[:,:,:,:, 0:1] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
y1=(x_kpt[:,:,:,:, 1:2] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
s1=x_kpt[:,:,:,:, 2:3].sigmoid()
landmarks1=torch.cat((x1,y1,s1),-1)
x2=(x_kpt[:,:,:,:, 3:4] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
y2=(x_kpt[:,:,:,:, 4:5] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
s2=x_kpt[:,:,:,:, 5:6].sigmoid()
landmarks2=torch.cat((x2,y2,s2),-1)
x3=(x_kpt[:,:,:,:, 6:7] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
y3=(x_kpt[:,:,:,:, 7:8] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
s3=x_kpt[:,:,:,:, 8:9].sigmoid()
landmarks3=torch.cat((x3,y3,s3),-1)
x4=(x_kpt[:,:,:,:, 9:10] * 2. - 0.5 + kpt_grid_x) * self.stride[i]
y4=(x_kpt[:,:,:,:, 10:11] * 2. - 0.5 + kpt_grid_y) * self.stride[i]
s4=x_kpt[:,:,:,:, 11:12].sigmoid()
landmarks4=torch.cat((x4,y4,s4),-1)
y = torch.cat((xy, wh, classfify, landmarks1,landmarks2,landmarks3,landmarks4), dim = -1)
z.append(y.view(bs, self.na*nx*ny, self.no))
return torch.cat(z,1)
for i in range(self.nl):
if self.nkpt is None or self.nkpt==0:
x[i] = self.im[i](self.m[i](self.ia[i](x[i]))) # conv

28
tensorrt/.gitignore vendored Normal file
View File

@@ -0,0 +1,28 @@
# .gitignore
# 首先忽略所有的文件
*
# 但是不忽略目录
!*/
# 忽略一些指定的目录名
ut/
runs/
.vscode/
build/
# 不忽略下面指定的文件类型
!*.cpp
!*.h
!*.hpp
!*.c
!.gitignore
!*.py
!*.sh
!*.npy
!*.jpg
!*.pth
!*.npy
!*.txt
!*.md
!*.yaml
!*.png
!*.onnx
!*.cu

34
tensorrt/CMakeLists.txt Normal file
View File

@@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 3.10)
project(plate_rec)
add_definitions(-std=c++11)
add_definitions(-w)
# option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
find_package(CUDA REQUIRED)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Release)
#cuda
include_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/include)
link_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/lib)
#tensorrt
include_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6//include/)
link_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/lib/)
#opencv
find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})
include_directories(${PROJECT_SOURCE_DIR}/include)
#onnx2trt
add_subdirectory(${PROJECT_SOURCE_DIR}/onnx2trt)
cuda_add_executable(plate_rec detect_rec_plate.cpp utils.cpp preprocess.cu)
target_link_libraries(plate_rec nvinfer)
target_link_libraries(plate_rec cudart)
target_link_libraries(plate_rec nvonnxparser)
target_link_libraries(plate_rec ${OpenCV_LIBS})
add_definitions(-O2 -pthread)

36
tensorrt/README.md Normal file
View File

@@ -0,0 +1,36 @@
# yolov7 车牌识别TensorRT
1. 修改CMakeLists.txt 换成你的cuda tensorrt 以及opencv 路径
```
#cuda
include_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/include)
link_directories(/mnt/Gu/softWare/cuda-11.0/targets/x86_64-linux/lib)
#tensorrt
include_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/include/)
link_directories(/mnt/Gpan/tensorRT/TensorRT-8.2.0.6/lib/)
```
2. build
```
1. mkdir build
2. cmake ..
3. make
```
3. onnx 转成tensorrt模型 onnx模型看这里[车牌识别](https://github.com/we0091234/Chinese_license_plate_detection_recognition)
```
当前在build目录
#1 生成检测模型
./onnx2trt/onnx2trt ../onnx_model/plate_detect.onnx ./plate_detect.trt 1
#2 生成识别模型
./onnx2trt/onnx2trt ../onnx_model/plate_rec.onnx ./plate_rec.trt 1
```
4. 推理
```
./plate_rec ./plate_detect.trt ./plate_rec.trt ../test_imgs
```
结果显示在控制台

View File

@@ -0,0 +1,897 @@
#include <fstream>
#include <iostream>
#include <sstream>
#include <numeric>
#include <chrono>
#include <vector>
#include <opencv2/opencv.hpp>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "logging.h"
#include "include/utils.hpp"
#include "preprocess.h"
#define MAX_IMAGE_INPUT_SIZE_THRESH 5000 * 5000
struct bbox
{
float x1,x2,y1,y2;
float landmarks[8];
float score;
};
bool my_func(bbox a,bbox b)
{
return a.score>b.score;
}
float get_IOU(bbox a,bbox b)
{
float x1 = std::max(a.x1,b.x1);
float x2 = std::min(a.x2,b.x2);
float y1 = std::max(a.y1,b.y1);
float y2 = std::min(a.y2,b.y2);
float w = std::max(0.0f,x2-x1);
float h = std::max(0.0f,y2-y1);
float inter_area = w*h;
float union_area = (a.x2-a.x1)*(a.y2-a.y1)+(b.x2-b.x1)*(b.y2-b.y1)-inter_area;
float IOU = 1.0*inter_area/ union_area;
return IOU;
}
#define CHECK(status) \
do\
{\
auto ret = (status);\
if (ret != 0)\
{\
std::cerr << "Cuda failure: " << ret << std::endl;\
abort();\
}\
} while (0)
#define DEVICE 0 // GPU id
#define NMS_THRESH 0.45
#define BBOX_CONF_THRESH 0.3
using namespace nvinfer1;
// stuff we know about the network and the input/output blobs
const std::vector<std::string> plate_string={"#","","","","","","","","","","","","","", \
"","","","","","","","","","","","","","","","","","","","","","","","使","","","","", \
"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
const std::vector<std::string> plate_string_yinwen={"#","<beijing>","<hu>","<tianjin>","<chongqing>","<hebei>","<jing>","<meng>","<liao>","<jilin>","<hei>","<su>","<zhe>","<wan>", \
"<fujian>","<gan>","<lun>","<henan>","<hubei>","<hunan>","<yue>","<guangxi>","<qiong>","<chuan>","<guizhou>","<yun>","<zang>","<shanxi>","<gan>","<qinghai>",\
"<ning>","<xin>","<xue>","<police>","<hongkang>","<Macao>","<gua>","<shi>","<ling>","<min>","<hang>","<shen>", \
"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
static const int INPUT_W = 640;
static const int INPUT_H = 640;
static const int NUM_CLASSES = 2; //单层车牌,双层车牌两类
const char* INPUT_BLOB_NAME = "images"; //onnx 输入 名字
const char* OUTPUT_BLOB_NAME = "output"; //onnx 输出 名字
static Logger gLogger;
cv::Mat static_resize(cv::Mat& img,int &top,int &left) //对应yolov5中的letter_box
{
float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
// r = std::min(r, 1.0f);
int unpad_w = r * img.cols;
int unpad_h = r * img.rows;
left = (INPUT_W-unpad_w)/2;
top = (INPUT_H-unpad_h)/2;
int right = INPUT_W-unpad_w-left;
int bottom = INPUT_H-unpad_h-top;
cv::Mat re(unpad_h, unpad_w, CV_8UC3);
cv::resize(img, re, re.size());
cv::Mat out;
cv::copyMakeBorder(re,out,top,bottom,left,right,cv::BORDER_CONSTANT,cv::Scalar(114,114,114));
return out;
}
struct Object
{
cv::Rect_<float> rect; //
float landmarks[8]; //4个关键点
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
std::vector<int> my_nms(std::vector<bbox> &bboxes, float nms_threshold)
{
std:: vector<int> choice;
for(int i = 0; i<bboxes.size(); i++)
{
int keep = 1;
for(int j = 0; j<choice.size(); j++)
{
float IOU = get_IOU(bboxes[i],bboxes[choice[j]]);
if (IOU>nms_threshold)
keep = 0;
}
if (keep)
choice.push_back(i);
}
return choice;
}
int find_max(float *prob,int num) //找到类别
{
int max= 0;
for(int i=1; i<num; i++)
{
if (prob[max]<prob[i])
max = i;
}
return max;
}
static void generate_yolox_proposals(float *feat_blob, float prob_threshold,
std::vector<Object> &objects,int OUTPUT_CANDIDATES) {
const int num_class = 2;
const int ckpt=12 ; //yolov7 是12yolov5是8
const int num_anchors = OUTPUT_CANDIDATES;
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
// const int basic_pos = anchor_idx * (num_class + 5 + 1);
// float box_objectness = feat_blob[basic_pos + 4];
// int cls_id = feat_blob[basic_pos + 5];
// float score = feat_blob[basic_pos + 5 + 1 + cls_id];
// score *= box_objectness;
const int basic_pos = anchor_idx * (num_class + 5 + ckpt); //5代表 x,y,w,h,object_score 8代表4个关键点
float box_objectness = feat_blob[basic_pos + 4];
// int cls_id = find_max(&feat_blob[basic_pos +5+ckpt],num_class); //找到类别v5
int cls_id = find_max(&feat_blob[basic_pos +5],num_class); //v7
// float score = feat_blob[basic_pos + 5 +8 + cls_id]; //v5
float score = feat_blob[basic_pos + 5 + cls_id]; //v7
score *= box_objectness;
if (score > prob_threshold) {
// yolox/models/yolo_head.py decode logic
float x_center = feat_blob[basic_pos + 0];
float y_center = feat_blob[basic_pos + 1];
float w = feat_blob[basic_pos + 2];
float h = feat_blob[basic_pos + 3];
float x0 = x_center - w * 0.5f;
float y0 = y_center - h * 0.5f;
// float *landmarks=&feat_blob[basic_pos +5]; //v5
float *landmarks=&feat_blob[basic_pos +5+num_class];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = w;
obj.rect.height = h;
obj.label = cls_id;
obj.prob = score;
int k = 0;
// for (int i = 0; i<ckpt; i++)
// {
// obj.landmarks[k++]=landmarks[i];
// }
obj.landmarks[0]=landmarks[0];
obj.landmarks[1]=landmarks[1];
obj.landmarks[2]=landmarks[3];
obj.landmarks[3]=landmarks[4];
obj.landmarks[4]=landmarks[6];
obj.landmarks[5]=landmarks[7];
obj.landmarks[6]=landmarks[9];
obj.landmarks[7]=landmarks[10];
objects.push_back(obj);
}
}
}
static void generate_proposals(float *feat_blob, float prob_threshold,
std::vector<bbox> &bboxes,int OUTPUT_CANDIDATES) {
const int num_class = 3;
const int num_anchors = OUTPUT_CANDIDATES;
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
// const int basic_pos = anchor_idx * (num_class + 5 + 1);
// float box_objectness = feat_blob[basic_pos + 4];
// int cls_id = feat_blob[basic_pos + 5];
// float score = feat_blob[basic_pos + 5 + 1 + cls_id];
// score *= box_objectness;
const int basic_pos = anchor_idx * (num_class + 5 + 8); //5代表 x,y,w,h,object_score 8代表4个关键点
float box_objectness = feat_blob[basic_pos + 4];
int cls_id = find_max(&feat_blob[basic_pos +5+8],num_class); //找到类别
float score = feat_blob[basic_pos + 5 +8 + cls_id];
score *= box_objectness;
if (score > prob_threshold) {
// yolox/models/yolo_head.py decode logic
float x_center = feat_blob[basic_pos + 0];
float y_center = feat_blob[basic_pos + 1];
float w = feat_blob[basic_pos + 2];
float h = feat_blob[basic_pos + 3];
float x0 = x_center - w * 0.5f;
float y0 = y_center - h * 0.5f;
float *landmarks=&feat_blob[basic_pos +5];
bbox obj;
obj.x1=x0;
obj.y1=y0;
obj.x2=x0+w;
obj.y2=y0+h;
obj.score = score;
for (int i = 0; i<8; i++)
{
obj.landmarks[i]=landmarks[i];
}
bboxes.push_back(obj);
}
}
}
float* blobFromImage(cv::Mat& img){
float* blob = new float[img.total()*3];
int channels = 3;
int img_h = img.rows;
int img_w = img.cols;
int k = 0;
for (size_t c = 0; c < channels; c++)
{
for (size_t h = 0; h < img_h; h++)
{
for (size_t w = 0; w < img_w; w++)
{
// blob[c * img_w * img_h + h * img_w + w] =
// (float)img.at<cv::Vec3b>(h, w)[c];
blob[k++] =
(float)img.at<cv::Vec3b>(h, w)[2-c]/255.0;
}
}
}
return blob;
}
void blobFromImage_plate(cv::Mat& img,float mean_value,float std_value,float *blob)
{
// float* blob = new float[img.total()*3];
// int channels = NUM_CLASSES;
int img_h = img.rows;
int img_w = img.cols;
int k = 0;
for (size_t c = 0; c <3; c++)
{
for (size_t h = 0; h < img_h; h++)
{
for (size_t w = 0; w < img_w; w++)
{
blob[k++] =
((float)img.at<cv::Vec3b>(h, w)[c]/255.0-mean_value)/std_value;
}
}
}
// return blob;
}
static void decode_outputs(float* prob, std::vector<Object>& objects, float scale, const int img_w, const int img_h,int OUTPUT_CANDIDATES,int top,int left) {
std::vector<Object> proposals;
std::vector<bbox> bboxes;
generate_yolox_proposals(prob, BBOX_CONF_THRESH, proposals,OUTPUT_CANDIDATES);
// generate_proposals(prob, BBOX_CONF_THRESH, bboxes,OUTPUT_CANDIDATES);
// std::cout << "num of boxes before nms: " << proposals.size() << std::endl;
qsort_descent_inplace(proposals);
// std::sort(bboxes.begin(),bboxes.end(),my_func);
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, NMS_THRESH);
// auto choice =my_nms(bboxes, NMS_THRESH);
int count = picked.size();
// std::cout << "num of boxes: " << count << std::endl;
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x-left) / scale;
float y0 = (objects[i].rect.y-top) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width-left) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height-top) / scale;
float *landmarks = objects[i].landmarks;
for(int i= 0; i<8; i++)
{
if(i%2==0)
landmarks[i]=(landmarks[i]-left)/scale;
else
landmarks[i]=(landmarks[i]-top)/scale;
}
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
}
}
const float color_list[4][3] =
{
{255, 0, 0},
{0, 255, 0},
{0, 0, 255},
{0, 255, 255},
};
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, std::string f)
{
static const char* class_names[] = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::Scalar color = cv::Scalar(color_list[obj.label][0], color_list[obj.label][1], color_list[obj.label][2]);
float c_mean = cv::mean(color)[0];
cv::Scalar txt_color;
if (c_mean > 0.5){
txt_color = cv::Scalar(0, 0, 0);
}else{
txt_color = cv::Scalar(255, 255, 255);
}
cv::rectangle(image, obj.rect, color * 255, 2);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
cv::Scalar txt_bk_color = color * 0.7 * 255;
int x = obj.rect.x;
int y = obj.rect.y + 1;
//int y = obj.rect.y - label_size.height - baseLine;
if (y > image.rows)
y = image.rows;
//if (x + label_size.width > image.cols)
//x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
txt_bk_color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
}
int pos = f.find_last_of("/");
auto substr = f.substr(pos+1);
std::string savePath = "/mnt/Gpan/Mydata/pytorchPorject/yoloxNew/newYoloxCpp/result_pic/"+substr;
cv::imwrite(savePath, image);
// fprintf(stderr, "save vis file\n");
// cv::imshow("image", image);
// cv::waitKey(0);
}
void doInference(IExecutionContext& context, float* input, float* output, const int output_size, cv::Size input_shape,const char *INPUT_BLOB_NAME,const char *OUTPUT_BLOB_NAME) {
const ICudaEngine& engine = context.getEngine();
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
assert(engine.getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);
const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);
int mBatchSize = engine.getMaxBatchSize();
// Create GPU buffers on device
CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));
CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));
// Create stream
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(1, buffers, stream, nullptr);
// context.enqueueV2( buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release stream and buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}
float getNorm2(float x,float y)
{
return sqrt(x*x+y*y);
}
cv::Mat getTransForm(cv::Mat &src_img, cv::Point2f order_rect[4]) //透视变换
{
cv::Point2f w1=order_rect[0]-order_rect[1];
cv::Point2f w2=order_rect[2]-order_rect[3];
auto width1 = getNorm2(w1.x,w1.y);
auto width2 = getNorm2(w2.x,w2.y);
auto maxWidth = std::max(width1,width2);
cv::Point2f h1=order_rect[0]-order_rect[3];
cv::Point2f h2=order_rect[1]-order_rect[2];
auto height1 = getNorm2(h1.x,h1.y);
auto height2 = getNorm2(h2.x,h2.y);
auto maxHeight = std::max(height1,height2);
// 透视变换
std::vector<cv::Point2f> pts_ori(4);
std::vector<cv::Point2f> pts_std(4);
pts_ori[0]=order_rect[0];
pts_ori[1]=order_rect[1];
pts_ori[2]=order_rect[2];
pts_ori[3]=order_rect[3];
pts_std[0]=cv::Point2f(0,0);
pts_std[1]=cv::Point2f(maxWidth,0);
pts_std[2]=cv::Point2f(maxWidth,maxHeight);
pts_std[3]=cv::Point2f(0,maxHeight);
cv::Mat M = cv::getPerspectiveTransform(pts_ori,pts_std);
cv:: Mat dstimg;
cv::warpPerspective(src_img,dstimg,M,cv::Size(maxWidth,maxHeight));
return dstimg;
}
cv::Mat get_split_merge(cv::Mat &img) //双层车牌 分割 拼接
{
cv::Rect upper_rect_area = cv::Rect(0,0,img.cols,int(5.0/12*img.rows));
cv::Rect lower_rect_area = cv::Rect(0,int(1.0/3*img.rows),img.cols,img.rows-int(1.0/3*img.rows));
cv::Mat img_upper = img(upper_rect_area);
cv::Mat img_lower =img(lower_rect_area);
cv::resize(img_upper,img_upper,img_lower.size());
cv::Mat out(img_lower.rows,img_lower.cols+img_upper.cols, CV_8UC3, cv::Scalar(114, 114, 114));
img_upper.copyTo(out(cv::Rect(0,0,img_upper.cols,img_upper.rows)));
img_lower.copyTo(out(cv::Rect(img_upper.cols,0,img_lower.cols,img_lower.rows)));
return out;
}
std::string decode_outputs(float *prob,int output_size)
{
std::string plate ="";
std::string pre_str ="#";
for (int i = 0; i<output_size; i++)
{
int index = int(prob[i]);
if (plate_string[index]!="#" && plate_string[index]!=pre_str)
plate+=plate_string[index];
pre_str = plate_string[index];
}
return plate;
}
std::string decode_outputs_pingyin(float *prob,int output_size) //拼音
{
std::string plate ="";
std::string pre_str ="#";
for (int i = 0; i<output_size; i++)
{
int index = int(prob[i]);
if (plate_string_yinwen[index]!="#" && plate_string_yinwen[index]!=pre_str)
plate+=plate_string_yinwen[index];
pre_str = plate_string_yinwen[index];
}
return plate;
}
void doInference_cu(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize,int OUTPUT_SIZE) {
// infer on the batch asynchronously, and DMA output back to host
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
int main(int argc, char** argv) {
cudaSetDevice(DEVICE);
char *trtModelStreamDet{nullptr};
char *trtModelStreamRec{nullptr};
size_t size{0};
size_t size_rec{0};
// argv[1]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/build/plate_detect.trt";
// argv[2]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/build/plate_rec.trt";
// argv[3]="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/test_imgs/single_blue.jpg";
// argv[4]="output.jpg";
const std::string engine_file_path {argv[1]};
std::ifstream file(engine_file_path, std::ios::binary);
if (file.good()) {
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStreamDet = new char[size];
assert(trtModelStreamDet);
file.read(trtModelStreamDet, size);
file.close();
}
const std::string engine_file_path_rec {argv[2]};
std::ifstream file_rec(engine_file_path_rec, std::ios::binary);
if (file_rec.good()) {
file_rec.seekg(0, file_rec.end);
size_rec = file_rec.tellg();
file_rec.seekg(0, file_rec.beg);
trtModelStreamRec = new char[size_rec];
assert(trtModelStreamRec);
file_rec.read(trtModelStreamRec, size_rec);
file_rec.close();
}
//det模型trt初始化
IRuntime* runtime_det = createInferRuntime(gLogger);
assert(runtime_det != nullptr);
ICudaEngine* engine_det = runtime_det->deserializeCudaEngine(trtModelStreamDet, size);
assert(engine_det != nullptr);
IExecutionContext* context_det = engine_det->createExecutionContext();
assert(context_det != nullptr);
delete[] trtModelStreamDet;
//rec模型trt初始化
IRuntime* runtime_rec = createInferRuntime(gLogger);
assert(runtime_rec!= nullptr);
ICudaEngine* engine_rec = runtime_rec->deserializeCudaEngine(trtModelStreamRec, size_rec);
assert(engine_rec != nullptr);
IExecutionContext* context_rec = engine_rec->createExecutionContext();
assert(context_rec != nullptr);
delete[] trtModelStreamRec;
float *buffers[2];
const int inputIndex = engine_det->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine_det->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
auto out_dims = engine_det->getBindingDimensions(1);
auto output_size = 1;
int OUTPUT_CANDIDATES = out_dims.d[1];
for(int j=0;j<out_dims.nbDims;j++) {
output_size *= out_dims.d[j];
}
CHECK(cudaMalloc((void**)&buffers[inputIndex], 3 * INPUT_H * INPUT_W * sizeof(float)));
CHECK(cudaMalloc((void**)&buffers[outputIndex], output_size * sizeof(float)));
// Create stream
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
uint8_t* img_host = nullptr;
uint8_t* img_device = nullptr;
// prepare input data cache in pinned memory
CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
// prepare input data cache in device memory
CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
auto out_dims_rec = engine_rec->getBindingDimensions(1);
auto output_size_rec = 1;
int OUTPUT_CANDIDATES_REC = out_dims_rec.d[1];
for(int j=0;j<out_dims_rec.nbDims;j++) {
output_size_rec *= out_dims_rec.d[j];
}
static float* prob = new float[output_size];
static float* prob_rec = new float[output_size_rec];
// 识别模型 参数
int plate_rec_input_w = 168;
int plate_rec_input_h = 48;
float* blob_rec=new float[plate_rec_input_w*plate_rec_input_h*3];
float mean_value=0.588;
float std_value =0.193;
const char* plate_rec_input_name = "images"; //onnx 输入 名字
const char* plate_rec_out_name= "output"; //onnx 输出 名字
// 识别模型 参数
cv::Point2f rect[4];
cv::Point2f order_rect[4];
cv::Point point[1][4];
// std::string imgPath ="/mnt/Gpan/Mydata/pytorchPorject/Chinese_license_plate_detection_recognition/imgs";
std::string input_image_path=argv[3];
std::string imgPath=argv[3];
std::vector<std::string> imagList;
std::vector<std::string>fileType{"jpg","png"};
readFileList(const_cast<char *>(imgPath.c_str()),imagList,fileType);
double sumTime = 0;
int index = 0;
for (auto &input_image_path:imagList)
{
cv::Mat img = cv::imread(input_image_path);
double begin_time = cv::getTickCount();
float *buffer_idx = (float*)buffers[inputIndex];
size_t size_image = img.cols * img.rows * 3;
size_t size_image_dst = INPUT_H * INPUT_W * 3;
memcpy(img_host, img.data, size_image);
CHECK(cudaMemcpyAsync(img_device, img_host, size_image, cudaMemcpyHostToDevice, stream));
preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
double time_pre = cv::getTickCount();
double time_pre_=(time_pre-begin_time)/cv::getTickFrequency()*1000;
// std::cout<<"preprocessing time is "<<time_pre_<<" ms"<<std::endl;
doInference_cu(*context_det,stream, (void**)buffers,prob,1,output_size);
float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
// r = std::min(r, 1.0f);
int unpad_w = r * img.cols;
int unpad_h = r * img.rows;
int left = (INPUT_W-unpad_w)/2;
int top = (INPUT_H-unpad_h)/2;
// if (index)
// {
// double use_time =(cv::getTickCount()-begin_time)/cv::getTickFrequency()*1000;
// sumTime+=use_time;
// }
int img_w = img.cols;
int img_h = img.rows;
// int top=0;
// int left= 0;
// cv::Mat pr_img = static_resize(img,top,left);
// float* blob_detect;
// blob_detect = blobFromImage(pr_img);
float scale = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
//run inference
// auto start = cv::getTickCount();
// doInference(*context_det, blob_detect, prob, output_size, pr_img.size(),INPUT_BLOB_NAME,OUTPUT_BLOB_NAME);
// auto end = cv::getTickCount();
// if (index)
// sumTime+=double((end-begin_time)/cv::getTickFrequency()*1000);
// std::cout << double((end-start)/cv::getTickFrequency()*1000) << "ms" << std::endl;
std::vector<Object> objects;
decode_outputs(prob, objects, scale, img_w, img_h,OUTPUT_CANDIDATES,top,left);
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::cout<<input_image_path<<" ";
for (int i = 0; i<objects.size(); i++)
{
// cv::rectangle(img, objects[i].rect, cv::Scalar(0,255,0), 2);
for (int j= 0; j<4; j++)
{
// cv::Scalar color = cv::Scalar(color_list[j][0], color_list[j][1], color_list[j][2]);
// cv::circle(img,cv::Point(objects[i].landmarks[2*j], objects[i].landmarks[2*j+1]),5,color,-1);
order_rect[j]=cv::Point(objects[i].landmarks[2*j],objects[i].landmarks[2*j+1]);
}
cv::Mat roiImg = getTransForm(img,order_rect); //根据关键点进行透视变换
int label = objects[i].label;
if (label) //判断是否双层车牌,是的话进行分割拼接
roiImg=get_split_merge(roiImg);
// cv::imwrite("roi.jpg",roiImg);
cv::resize(roiImg,roiImg,cv::Size(plate_rec_input_w,plate_rec_input_h));
cv::Mat pr_img =roiImg;
// std::cout << "blob image" << std::endl;
auto rec_b = cv::getTickCount();
blobFromImage_plate(pr_img,mean_value,std_value,blob_rec);
auto rec_e = cv::getTickCount();
auto rec_gap = (rec_e-rec_b)/cv::getTickFrequency()*1000;
doInference(*context_rec, blob_rec, prob_rec, output_size_rec, pr_img.size(),plate_rec_input_name,plate_rec_out_name);
auto plate_number = decode_outputs(prob_rec,output_size_rec);
auto plate_number_pinyin= decode_outputs_pingyin(prob_rec,output_size_rec);
cv::Point origin;
origin.x = objects[i].rect.x;
origin.y = objects[i].rect.y;
cv::putText(img, plate_number_pinyin, origin, cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2, 8, 0);
std::cout<<" "<<plate_number;
}
double end_time = cv::getTickCount();
auto time_gap = (end_time-begin_time)/cv::getTickFrequency()*1000;
std::cout<<" time_gap: "<<time_gap<<"ms ";
if (index)
{
// double use_time =(cv::getTickCount()-begin_time)/cv::getTickFrequency()*1000;
sumTime+=time_gap;
}
std::cout<<std::endl;
// delete [] blob_detect;
index+=1;
}
// cv::imwrite("out.jpg",img);
// destroy the engine
std::cout<<"averageTime:"<<(sumTime/(imagList.size()-1))<<"ms"<<std::endl;
context_det->destroy();
engine_det->destroy();
runtime_det->destroy();
context_rec->destroy();
engine_rec->destroy();
runtime_rec->destroy();
delete [] blob_rec;
cudaStreamDestroy(stream);
CHECK(cudaFree(img_device));
CHECK(cudaFreeHost(img_host));
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
return 0;
}

505
tensorrt/include/logging.h Normal file
View File

@@ -0,0 +1,505 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf
{
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mOutput(stream)
, mPrefix(prefix)
, mShouldLog(shouldLog)
{
}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
: mOutput(other.mOutput)
{
}
~LogStreamConsumerBuffer()
{
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
// if the pointer to the beginning is not equal to the pointer to the current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr())
{
putOutput();
}
}
// synchronizes the stream buffer and returns 0 on success
// synchronizing the stream buffer consists of inserting the buffer contents into the stream,
// resetting the buffer and flushing the stream
virtual int sync()
{
putOutput();
return 0;
}
void putOutput()
{
if (mShouldLog)
{
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
std::cout << "[";
std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into the stream
mOutput << mPrefix << str();
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
}
void setShouldLog(bool shouldLog)
{
mShouldLog = shouldLog;
}
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog;
};
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase
{
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mBuffer(stream, prefix, shouldLog)
{
}
protected:
LogStreamConsumerBuffer mBuffer;
};
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
{
public:
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
//! Reportable severity determines if the messages are severe enough to be logged.
LogStreamConsumer(Severity reportableSeverity, Severity severity)
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(severity <= reportableSeverity)
, mSeverity(severity)
{
}
LogStreamConsumer(LogStreamConsumer&& other)
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(other.mShouldLog)
, mSeverity(other.mSeverity)
{
}
void setReportableSeverity(Severity reportableSeverity)
{
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
private:
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
bool mShouldLog;
Severity mSeverity;
};
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.
class Logger : public nvinfer1::ILogger
{
public:
Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity)
{
}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult
{
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger()
{
return *this;
}
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
// void log(Severity severity, const char* msg) override
void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept
{
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of this level or higher.
//!
void setReportableSeverity(Severity severity)
{
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom
{
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started)
, mName(name)
, mCmdline(cmdline)
{
}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting with
//! "TensorRT" and containing dot-separated strings containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
{
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
{
auto cmdline = genCmdlineString(argc, argv);
return defineTest(name, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom)
{
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(const TestAtom& testAtom, TestResult result)
{
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int reportPass(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int reportFail(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int reportWaive(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int reportTest(const TestAtom& testAtom, bool pass)
{
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const
{
return mReportableSeverity;
}
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the given severity
//!
static const char* severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message with the given result
//!
static const char* testResultString(TestResult result)
{
switch (result)
{
case TestResult::kRUNNING: return "RUNNING";
case TestResult::kPASSED: return "PASSED";
case TestResult::kFAILED: return "FAILED";
case TestResult::kWAIVED: return "WAIVED";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
//!
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(const TestAtom& testAtom, TestResult result)
{
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int argc, char const* const* argv)
{
std::stringstream ss;
for (int i = 0; i < argc; i++)
{
if (i > 0)
ss << " ";
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
};
namespace
{
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
// ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
#endif // TENSORRT_LOGGING_H

115
tensorrt/include/utils.hpp Normal file
View File

@@ -0,0 +1,115 @@
#ifndef _UTILS_H_
#define _UTILS_H_
#include <vector>
#include <string>
#include <dirent.h>
#include <sys/types.h>
#include <iostream>
#include<dirent.h>
#include <sys/types.h>
#include <string.h>
#include <sys/stat.h>
#include <opencv2/opencv.hpp>
struct boundingBox
{
float x;
float y;
float w;
float h;
int label;
float score;
};
std::string getHouZhui(std::string fileName);
int readFileList(char *basePath,std::vector<std::string> &fileList,std::vector<std::string> fileType);
void draw_rect(const cv::Mat& image, const std::vector<boundingBox>bboxes,const char* class_names[]);
bool cmpBox(boundingBox b1, boundingBox b2);
float getIou(boundingBox b1,boundingBox b2) ;
void myNms(std::vector<boundingBox>&bboxes,float score);
const float color_list1[80][3] =
{
{0.000, 0.447, 0.741},
{0.850, 0.325, 0.098},
{0.929, 0.694, 0.125},
{0.494, 0.184, 0.556},
{0.466, 0.674, 0.188},
{0.301, 0.745, 0.933},
{0.635, 0.078, 0.184},
{0.300, 0.300, 0.300},
{0.600, 0.600, 0.600},
{1.000, 0.000, 0.000},
{1.000, 0.500, 0.000},
{0.749, 0.749, 0.000},
{0.000, 1.000, 0.000},
{0.000, 0.000, 1.000},
{0.667, 0.000, 1.000},
{0.333, 0.333, 0.000},
{0.333, 0.667, 0.000},
{0.333, 1.000, 0.000},
{0.667, 0.333, 0.000},
{0.667, 0.667, 0.000},
{0.667, 1.000, 0.000},
{1.000, 0.333, 0.000},
{1.000, 0.667, 0.000},
{1.000, 1.000, 0.000},
{0.000, 0.333, 0.500},
{0.000, 0.667, 0.500},
{0.000, 1.000, 0.500},
{0.333, 0.000, 0.500},
{0.333, 0.333, 0.500},
{0.333, 0.667, 0.500},
{0.333, 1.000, 0.500},
{0.667, 0.000, 0.500},
{0.667, 0.333, 0.500},
{0.667, 0.667, 0.500},
{0.667, 1.000, 0.500},
{1.000, 0.000, 0.500},
{1.000, 0.333, 0.500},
{1.000, 0.667, 0.500},
{1.000, 1.000, 0.500},
{0.000, 0.333, 1.000},
{0.000, 0.667, 1.000},
{0.000, 1.000, 1.000},
{0.333, 0.000, 1.000},
{0.333, 0.333, 1.000},
{0.333, 0.667, 1.000},
{0.333, 1.000, 1.000},
{0.667, 0.000, 1.000},
{0.667, 0.333, 1.000},
{0.667, 0.667, 1.000},
{0.667, 1.000, 1.000},
{1.000, 0.000, 1.000},
{1.000, 0.333, 1.000},
{1.000, 0.667, 1.000},
{0.333, 0.000, 0.000},
{0.500, 0.000, 0.000},
{0.667, 0.000, 0.000},
{0.833, 0.000, 0.000},
{1.000, 0.000, 0.000},
{0.000, 0.167, 0.000},
{0.000, 0.333, 0.000},
{0.000, 0.500, 0.000},
{0.000, 0.667, 0.000},
{0.000, 0.833, 0.000},
{0.000, 1.000, 0.000},
{0.000, 0.000, 0.167},
{0.000, 0.000, 0.333},
{0.000, 0.000, 0.500},
{0.000, 0.000, 0.667},
{0.000, 0.000, 0.833},
{0.000, 0.000, 1.000},
{0.000, 0.000, 0.000},
{0.143, 0.143, 0.143},
{0.286, 0.286, 0.286},
{0.429, 0.429, 0.429},
{0.571, 0.571, 0.571},
{0.714, 0.714, 0.714},
{0.857, 0.857, 0.857},
{0.000, 0.447, 0.741},
{0.314, 0.717, 0.741},
{0.50, 0.5, 0}
};
#endif

View File

@@ -0,0 +1,7 @@
cmake_minimum_required(VERSION 3.10)
project(onnx2trt)
add_executable(onnx2trt onnx2trt.cpp)
target_link_libraries(onnx2trt nvinfer)
target_link_libraries(onnx2trt cudart)
target_link_libraries(onnx2trt nvonnxparser)

View File

@@ -0,0 +1,182 @@
#include <fstream>
#include <iostream>
#include <sstream>
#include <numeric>
// #include <chrono>
#include <vector>
#include <opencv2/opencv.hpp>
// #include <dirent.h>
#include "NvInfer.h"
#include "NvOnnxParser.h"
// #include "NvInferRuntime.h"
#include "logging.h"
#include "cuda_runtime_api.h"
using namespace nvinfer1;
using namespace std;
static Logger gLogger;
const char* INPUT_BLOB_NAME = "input";
const char* OUTPUT_BLOB_NAME = "output";
void saveToTrtModel(const char * TrtSaveFileName,IHostMemory*trtModelStream)
{
std::ofstream out(TrtSaveFileName, std::ios::binary);
if (!out.is_open())
{
std::cout << "打开文件失败!" <<std:: endl;
}
out.write(reinterpret_cast<const char*>(trtModelStream->data()), trtModelStream->size());
out.close();
}
void onnxToTRTModel(const std::string& modelFile,unsigned int maxBatchSize,IHostMemory*& trtModelStream,const char * TrtSaveFileName)
{
int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
// create the builder
IBuilder* builder = createInferBuilder(gLogger);//创建构建器(即指向Ibuilder类型对象的指针)
IBuilderConfig *config = builder->createBuilderConfig();
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); //必须加不然报错
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);/*等价于*bulider.createNetwork(),通过Ibulider定义的
名为creatNetwork()方法创建INetworkDefinition的对象ntework这个指针指向这个对象*/
auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());//创建解析器
//Optional - uncomment below lines to view network layer information
//config->setPrintLayerInfo(true);
//parser->reportParsingInfo();
if (!parser->parseFromFile(modelFile.c_str(), verbosity)) //解析onnx文件并填充网络
{
string msg("failed to parse onnx file");
gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
exit(EXIT_FAILURE);
}
// Build the engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 30);
// builder->setMaxWorkspaceSize(1 << 30);
#ifdef USE_FP16
config->setFlag(BuilderFlag::kFP16);
#endif
// samplesCommon::enableDLA(builder, gUseDLACore);
//当引擎建立起来时TensorRT会复制
// ICudaEngine* engine = builder->buildCudaEngine(*network);//通过Ibuilder类的buildCudaEngine()方法创建IcudaEngine对象
ICudaEngine *engine = builder->buildEngineWithConfig(*network,*config);
assert(engine);
// we can destroy the parser
parser->destroy();
// serialize the engine,
// then close everything down
trtModelStream = engine->serialize();//将引擎序列化,保存到文件中
engine->destroy();
network->destroy();
builder->destroy();
config->destroy();
saveToTrtModel(TrtSaveFileName,trtModelStream);
}
void onnxToTRTModelDynamicBatch(const std::string& modelFile, unsigned int maxBatchSize, IHostMemory*& trtModelStream,const char * TrtSaveFileName,int input_h,int input_w) // output buffer for the TensorRT model 动态batch
{
int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
// create the builder
IBuilder* builder = createInferBuilder(gLogger);//创建构建器(即指向Ibuilder类型对象的指针)
IBuilderConfig *config = builder->createBuilderConfig();
auto profile = builder->createOptimizationProfile();
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); //必须加不然报错
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);/*等价于*bulider.createNetwork(),通过Ibulider定义的
名为creatNetwork()方法创建INetworkDefinition的对象ntework这个指针指向这个对象*/
Dims dims = Dims4{1, 3, input_h, input_w};
profile->setDimensions(INPUT_BLOB_NAME,
OptProfileSelector::kMIN, Dims4{1, dims.d[1], dims.d[2], dims.d[3]});
profile->setDimensions(INPUT_BLOB_NAME,
OptProfileSelector::kOPT, Dims4{maxBatchSize, dims.d[1], dims.d[2], dims.d[3]});
profile->setDimensions(INPUT_BLOB_NAME,
OptProfileSelector::kMAX, Dims4{maxBatchSize, dims.d[1], dims.d[2], dims.d[3]});
config->addOptimizationProfile(profile);
auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());//创建解析器
//Optional - uncomment below lines to view network layer information
//config->setPrintLayerInfo(true);
//parser->reportParsingInfo();
if (!parser->parseFromFile(modelFile.c_str(), verbosity)) //解析onnx文件并填充网络
{
string msg("failed to parse onnx file");
gLogger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
exit(EXIT_FAILURE);
}
// Build the engine
// builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 30);
// builder->setMaxWorkspaceSize(1 << 30);
#ifdef USE_FP16
config->setFlag(BuilderFlag::kFP16);
#endif
// samplesCommon::enableDLA(builder, gUseDLACore);
//当引擎建立起来时TensorRT会复制
// ICudaEngine* engine = builder->buildCudaEngine(*network);//通过Ibuilder类的buildCudaEngine()方法创建IcudaEngine对象
ICudaEngine *engine = builder->buildEngineWithConfig(*network,*config);
assert(engine);
// we can destroy the parser
parser->destroy();
// serialize the engine,
// then close everything down
trtModelStream = engine->serialize();//将引擎序列化,保存到文件中
engine->destroy();
network->destroy();
builder->destroy();
config->destroy();
saveToTrtModel(TrtSaveFileName,trtModelStream);
}
// void readTrtModel(const char * Trtmodel) //读取onnx模型
// {
// size_t size{ 0 };
// std::ifstream file(Trtmodel, std::ios::binary);
// if (file.good()) {
// file.seekg(0, file.end);
// size = file.tellg();
// file.seekg(0, file.beg);
// _trtModelStream = new char[size];
// assert(_trtModelStream);
// file.read(_trtModelStream, size);
// file.close();
// }
// _trtModelStreamSize = size;
// _runtime = createInferRuntime(gLogger);
// _engine1 = _runtime->deserializeCudaEngine(_trtModelStream, _trtModelStreamSize);
// //cudaSetDevice(0);
// context = _engine1->createExecutionContext();
// }
int main(int argc, char** argv)
{
IHostMemory* trtModelStream{nullptr};
int batchSize = atoi(argv[3]);
// int input_h = atoi(argv[4]);
// int input_w=atoi(argv[5]);
onnxToTRTModel(argv[1],batchSize,trtModelStream,argv[2]);
std::cout<<"convert seccuss!"<<std::endl;
}

Binary file not shown.

Binary file not shown.

214
tensorrt/plate_rec.cpp Normal file
View File

@@ -0,0 +1,214 @@
#include <fstream>
#include <iostream>
#include <sstream>
#include <numeric>
#include <chrono>
#include <vector>
#include <opencv2/opencv.hpp>
#include <dirent.h>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "logging.h"
#include "include/utils.hpp"
#define CHECK(status) \
do\
{\
auto ret = (status);\
if (ret != 0)\
{\
std::cerr << "Cuda failure: " << ret << std::endl;\
abort();\
}\
} while (0)
#define DEVICE 0 // GPU id
const std::vector<std::string> plate_string={"#","","","","","","","","","","","","","", \
"","","","","","","","","","","","","","","","","","","","","","","","使","","","","", \
"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"};
using namespace nvinfer1;
// stuff we know about the network and the input/output blobs
static Logger gLogger;
float* blobFromImage_plate(cv::Mat& img,float mean_value,float std_value){
float* blob = new float[img.total()*3];
int channels = 3;
int img_h = img.rows;
int img_w = img.cols;
int k = 0;
for (size_t c = 0; c <3; c++)
{
for (size_t h = 0; h < img_h; h++)
{
for (size_t w = 0; w < img_w; w++)
{
blob[k++] =
((float)img.at<cv::Vec3b>(h, w)[c]/255.0-mean_value)/std_value;
}
}
}
return blob;
}
void doInference(IExecutionContext& context, float* input, float* output, const int output_size, cv::Size input_shape,const char *INPUT_BLOB_NAME,const char *OUTPUT_BLOB_NAME) {
const ICudaEngine& engine = context.getEngine();
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
assert(engine.getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);
const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);
int mBatchSize = engine.getMaxBatchSize();
// Create GPU buffers on device
CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));
CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));
// Create stream
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(1, buffers, stream, nullptr);
// context.enqueueV2( buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release stream and buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex]));
}
std::string decode_outputs(float *prob,int output_size)
{
std::string plate ="";
std::string pre_str ="#";
for (int i = 0; i<output_size; i++)
{
int index = int(prob[i]);
if (plate_string[index]!="#" && plate_string[index]!=pre_str)
plate+=plate_string[index];
pre_str = plate_string[index];
}
return plate;
}
int main(int argc, char** argv)
{
cudaSetDevice(DEVICE);
// create a model using the API directly and serialize it to a stream
char *trtModelStream{nullptr};
size_t size{0};
int plate_rec_input_w = 168;
int plate_rec_input_h = 48;
float mean_value=0.588;
float std_value =0.193;
const char* plate_rec_input_name = "images"; //onnx 输入 名字
const char* plate_rec_out_name= "output"; //onnx 输出 名字
if (argc == 4 && std::string(argv[2]) == "-i") {
const std::string engine_file_path {argv[1]};
std::ifstream file(engine_file_path, std::ios::binary);
if (file.good()) {
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
}
} else {
std::cerr << "arguments not right!" << std::endl;
std::cerr << "run 'python3 yolox/deploy/trt.py -n yolox-{tiny, s, m, l, x}' to serialize model first!" << std::endl;
std::cerr << "Then use the following command:" << std::endl;
std::cerr << "./yolox ../model_trt.engine -i ../../../assets/dog.jpg // deserialize file and run inference" << std::endl;
return -1;
}
const std::string input_image_path {argv[3]};
//std::vector<std::string> file_names;
//if (read_files_in_dir(argv[2], file_names) < 0) {
//std::cout << "read_files_in_dir failed." << std::endl;
//return -1;
//}
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
auto out_dims = engine->getBindingDimensions(1);
auto output_size = 1;
int OUTPUT_CANDIDATES = out_dims.d[1];
for(int j=0;j<out_dims.nbDims;j++) {
output_size *= out_dims.d[j];
}
static float* prob = new float[output_size];
std::string imgPath ="/mnt/Gu/xiaolei/cplusplus/trt_project/chinese_plate_recoginition/result";
std::vector<std::string> imagList;
std::vector<std::string>fileType{"jpg","png"};
readFileList(const_cast<char *>(imgPath.c_str()),imagList,fileType);
double sumTime = 0;
int right_label = 0;
int file_num = imagList.size();
for (auto &input_image_path:imagList)
{
cv::Mat img = cv::imread(input_image_path);
int img_w = img.cols;
int img_h = img.rows;
int top=0;
int left= 0;
cv::resize(img,img,cv::Size(plate_rec_input_w,plate_rec_input_h));
cv::Mat pr_img =img;
// std::cout << "blob image" << std::endl;
float* blob;
blob = blobFromImage_plate(pr_img,mean_value,std_value);
doInference(*context, blob, prob, output_size, pr_img.size(),plate_rec_input_name,plate_rec_out_name);
auto plate_number = decode_outputs(prob,output_size);
int pos = input_image_path.find_last_of("/");
auto image_name = input_image_path.substr(pos+1);
int pos2= image_name.find_last_of("_");
auto gt=image_name.substr(0,pos2);
if(gt==plate_number)
right_label+=1;
std::cout<<input_image_path<<" "<<right_label<<" "<<plate_number<<std::endl;
delete blob;
}
printf("sum is %d,right is %d ,accuracy is %.4f",file_num,right_label,1.0*right_label/file_num);
// destroy the engine
// std::cout<<"averageTime:"<<sumTime/imagList.size()<<std::endl;
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}

116
tensorrt/preprocess.cu Normal file
View File

@@ -0,0 +1,116 @@
#include "preprocess.h"
#include <opencv2/opencv.hpp>
__global__ void warpaffine_kernel(
uint8_t* src, int src_line_size, int src_width,
int src_height, float* dst, int dst_width,
int dst_height, uint8_t const_value_st,
AffineMatrix d2s, int edge) {
int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= edge) return;
float m_x1 = d2s.value[0];
float m_y1 = d2s.value[1];
float m_z1 = d2s.value[2];
float m_x2 = d2s.value[3];
float m_y2 = d2s.value[4];
float m_z2 = d2s.value[5];
int dx = position % dst_width;
int dy = position / dst_width;
float src_x = m_x1 * dx + m_y1 * dy + m_z1 + 0.5f;
float src_y = m_x2 * dx + m_y2 * dy + m_z2 + 0.5f;
float c0, c1, c2;
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
// out of range
c0 = const_value_st;
c1 = const_value_st;
c2 = const_value_st;
} else {
int y_low = floorf(src_y);
int x_low = floorf(src_x);
int y_high = y_low + 1;
int x_high = x_low + 1;
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
float ly = src_y - y_low;
float lx = src_x - x_low;
float hy = 1 - ly;
float hx = 1 - lx;
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
uint8_t* v1 = const_value;
uint8_t* v2 = const_value;
uint8_t* v3 = const_value;
uint8_t* v4 = const_value;
if (y_low >= 0) {
if (x_low >= 0)
v1 = src + y_low * src_line_size + x_low * 3;
if (x_high < src_width)
v2 = src + y_low * src_line_size + x_high * 3;
}
if (y_high < src_height) {
if (x_low >= 0)
v3 = src + y_high * src_line_size + x_low * 3;
if (x_high < src_width)
v4 = src + y_high * src_line_size + x_high * 3;
}
c0 = w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0];
c1 = w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1];
c2 = w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2];
}
//bgr to rgb
float t = c2;
c2 = c0;
c0 = t;
//normalization
c0 = c0 / 255.0f;
c1 = c1 / 255.0f;
c2 = c2 / 255.0f;
//rgbrgbrgb to rrrgggbbb
int area = dst_width * dst_height;
float* pdst_c0 = dst + dy * dst_width + dx;
float* pdst_c1 = pdst_c0 + area;
float* pdst_c2 = pdst_c1 + area;
*pdst_c0 = c0;
*pdst_c1 = c1;
*pdst_c2 = c2;
}
void preprocess_kernel_img(
uint8_t* src, int src_width, int src_height,
float* dst, int dst_width, int dst_height,
cudaStream_t stream) {
AffineMatrix s2d,d2s;
float scale = std::min(dst_height / (float)src_height, dst_width / (float)src_width);
s2d.value[0] = scale;
s2d.value[1] = 0;
s2d.value[2] = -scale * src_width * 0.5 + dst_width * 0.5;
s2d.value[3] = 0;
s2d.value[4] = scale;
s2d.value[5] = -scale * src_height * 0.5 + dst_height * 0.5;
cv::Mat m2x3_s2d(2, 3, CV_32F, s2d.value);
cv::Mat m2x3_d2s(2, 3, CV_32F, d2s.value);
cv::invertAffineTransform(m2x3_s2d, m2x3_d2s);
memcpy(d2s.value, m2x3_d2s.ptr<float>(0), sizeof(d2s.value));
int jobs = dst_height * dst_width;
int threads = 256;
int blocks = ceil(jobs / (float)threads);
warpaffine_kernel<<<blocks, threads, 0, stream>>>(
src, src_width*3, src_width,
src_height, dst, dst_width,
dst_height, 128, d2s, jobs);
}

16
tensorrt/preprocess.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef __PREPROCESS_H
#define __PREPROCESS_H
#include <cuda_runtime.h>
#include <cstdint>
struct AffineMatrix{
float value[6];
};
void preprocess_kernel_img(uint8_t* src, int src_width, int src_height,
float* dst, int dst_width, int dst_height,
cudaStream_t stream);
#endif // __PREPROCESS_H

BIN
tensorrt/test_imgs/14.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

158
tensorrt/utils.cpp Normal file
View File

@@ -0,0 +1,158 @@
#include "utils.hpp"
std::string getHouZhui(std::string fileName)
{
// std::string fileName="/home/xiaolei/23.jpg";
int pos=fileName.find_last_of(std::string("."));
std::string houZui=fileName.substr(pos+1);
return houZui;
}
int readFileList(char *basePath,std::vector<std::string> &fileList,std::vector<std::string> fileType)
{
DIR *dir;
struct dirent *ptr;
char base[1000];
if ((dir=opendir(basePath)) == NULL)
{
perror("Open dir error...");
exit(1);
}
while ((ptr=readdir(dir)) != NULL)
{
if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0) ///current dir OR parrent dir
continue;
else if(ptr->d_type == 8)
{ ///file
if (fileType.size())
{
std::string houZui=getHouZhui(std::string(ptr->d_name));
for (auto &s:fileType)
{
if (houZui==s)
{
fileList.push_back(basePath+std::string("/")+std::string(ptr->d_name));
break;
}
}
}
else
{
fileList.push_back(basePath+std::string("/")+std::string(ptr->d_name));
}
}
else if(ptr->d_type == 10) ///link file
printf("d_name:%s/%s\n",basePath,ptr->d_name);
else if(ptr->d_type == 4) ///dir
{
memset(base,'\0',sizeof(base));
strcpy(base,basePath);
strcat(base,"/");
strcat(base,ptr->d_name);
readFileList(base,fileList,fileType);
}
}
closedir(dir);
return 1;
}
void draw_rect(const cv::Mat& image, const std::vector<boundingBox>bboxes,const char* class_names[])
{
// static const char* class_names[] = {
// "head", "leg", "hand", "back", "nostd", "body", "plate", "logo"};
// cv::Mat image = bgr.clone();
for (size_t i = 0; i < bboxes.size(); i++)
{
// const Object& obj = objects[i];
const boundingBox &obj= bboxes[i];
// fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
// obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
cv::Scalar color = cv::Scalar(color_list1[obj.label][0], color_list1[obj.label][1], color_list1[obj.label][2]);
float c_mean = cv::mean(color)[0];
cv::Scalar txt_color;
if (c_mean > 0.5){
txt_color = cv::Scalar(0, 0, 0);
}else{
txt_color = cv::Scalar(255, 255, 255);
}
cv::Rect myRect(obj.x,obj.y,obj.w,obj.h);
cv::rectangle(image,myRect, color * 255, 2);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.score * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
cv::Scalar txt_bk_color = color * 0.7 * 255;
int x = obj.x;
int y = obj.y + 1;
//int y = obj.rect.y - label_size.height - baseLine;
if (y > image.rows)
y = image.rows;
//if (x + label_size.width > image.cols)
//x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
txt_bk_color,-1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
}
}
bool cmpBox(boundingBox b1, boundingBox b2)
{
return b1.score > b2.score;
}
float getIou(boundingBox b1,boundingBox b2) //计算IOU
{
int xl1 = b1.x; //左
int xr1 = b1.w+b1.x; // 右
int yt1=b1.y; //顶
int yb1 = b1.y+b1.h; //底
int xl2 = b2.x; //左
int xr2 = b2.w+b2.x; // 右
int yt2=b2.y; //顶
int yb2 = b2.y+b2.h; //底
int x11 =std::max(xl1,xl2);
int y11 = std::max(yt1,yt2);
int x22 = std::min(xr1,xr2);
int y22 = std::min(yb1,yb2);
float intersectionArea= (x22-x11)*(y22-y11); //交集
float unionArea = (xr1-xl1)*(yb1-yt1)+(xr2-xl2)*(yb2-yt2)-intersectionArea; //并集
return 1.0f*intersectionArea/unionArea;
}
void myNms(std::vector<boundingBox>&bboxes,float score)
{
std::sort(bboxes.begin(),bboxes.end(),cmpBox);
for(int i = 0; i<bboxes.size()-1; i++)
{
for(int j = i+1;j<bboxes.size(); j++)
{
if(getIou(bboxes[i],bboxes[j])>score)
{
bboxes.erase(bboxes.begin()+j);
j--;
}
}
}
}