Add files via upload

This commit is contained in:
hpc203
2023-01-14 21:54:06 +08:00
committed by GitHub
commit 9dad483077
26 changed files with 872 additions and 0 deletions

80
onnxruntime/coco.names Normal file
View File

@@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 476 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 732 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 696 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 601 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

267
onnxruntime/main.cpp Normal file
View File

@@ -0,0 +1,267 @@
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
struct Net_config
{
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
string modelpath;
string datatype;
};
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class FreeYOLO
{
public:
FreeYOLO(Net_config config);
void detect(Mat& frame);
private:
int inpWidth;
int inpHeight;
int nout;
int num_proposal;
vector<string> class_names;
int num_class;
const int num_stride = 3;
int strides[3] = { 8,16,32 };
float confThreshold;
float nmsThreshold;
vector<float> input_image_;
void normalize_(Mat img);
void nms(vector<BoxInfo>& input_boxes);
Env env = Env(ORT_LOGGING_LEVEL_ERROR, "FreeYOLO");
Ort::Session *ort_session = nullptr;
SessionOptions sessionOptions = SessionOptions();
vector<char*> input_names;
vector<char*> output_names;
vector<vector<int64_t>> input_node_dims; // >=1 outputs
vector<vector<int64_t>> output_node_dims; // >=1 outputs
};
FreeYOLO::FreeYOLO(Net_config config)
{
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
string model_path = config.modelpath;
std::wstring widestr = std::wstring(model_path.begin(), model_path.end());
//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
ort_session = new Session(env, widestr.c_str(), sessionOptions);
size_t numInputNodes = ort_session->GetInputCount();
size_t numOutputNodes = ort_session->GetOutputCount();
AllocatorWithDefaultOptions allocator;
for (int i = 0; i < numInputNodes; i++)
{
input_names.push_back(ort_session->GetInputName(i, allocator));
Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_node_dims.push_back(input_dims);
}
for (int i = 0; i < numOutputNodes; i++)
{
output_names.push_back(ort_session->GetOutputName(i, allocator));
Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_node_dims.push_back(output_dims);
}
this->inpHeight = input_node_dims[0][2];
this->inpWidth = input_node_dims[0][3];
if (config.datatype == "coco")
{
string classesFile = "coco.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) this->class_names.push_back(line);
}
else if (config.datatype == "face")
{
this->class_names.push_back("face");
}
else
{
this->class_names.push_back("person");
}
this->num_class = class_names.size();
}
void FreeYOLO::normalize_(Mat img)
{
// img.convertTo(img, CV_32F);
int row = img.rows;
int col = img.cols;
this->input_image_.resize(row * col * img.channels());
for (int c = 0; c < 3; c++)
{
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
float pix = img.ptr<uchar>(i)[j * 3 + c];
this->input_image_[c * row * col + i * col + j] = pix;
}
}
}
}
void FreeYOLO::nms(vector<BoxInfo>& input_boxes)
{
sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
vector<bool> isSuppressed(input_boxes.size(), false);
for (int i = 0; i < int(input_boxes.size()); ++i)
{
if (isSuppressed[i]) { continue; }
for (int j = i + 1; j < int(input_boxes.size()); ++j)
{
if (isSuppressed[j]) { continue; }
float xx1 = (max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (max)(float(0), xx2 - xx1 + 1);
float h = (max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= this->nmsThreshold)
{
isSuppressed[j] = true;
}
}
}
// return post_nms;
int idx_t = 0;
input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}
void FreeYOLO::detect(Mat& frame)
{
const float ratio = std::min(float(this->inpHeight) / float(frame.rows), float(this->inpWidth) / float(frame.cols));
const int neww = int(frame.cols * ratio);
const int newh = int(frame.rows * ratio);
Mat dstimg;
resize(frame, dstimg, Size(neww, newh));
copyMakeBorder(dstimg, dstimg, 0, this->inpHeight - newh, 0, this->inpWidth - neww, BORDER_CONSTANT, 114);
this->normalize_(dstimg);
array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
// <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
vector<Value> ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
vector<BoxInfo> generate_boxes;
Ort::Value &predictions = ort_outputs.at(0);
auto pred_dims = predictions.GetTensorTypeAndShapeInfo().GetShape();
num_proposal = pred_dims.at(1);
nout = pred_dims.at(2);
const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
int n = 0, i = 0, j = 0, k = 0; ///cx, cy, w, h, box_score, class_score
for (n = 0; n < this->num_stride; n++) ///<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD>߶<EFBFBD>
{
int num_grid_x = (int)ceil((this->inpWidth / strides[n]));
int num_grid_y = (int)ceil((this->inpHeight / strides[n]));
for (i = 0; i < num_grid_y; i++)
{
for (j = 0; j < num_grid_x; j++)
{
const float box_score = pdata[4];
int max_ind = 0;
float max_class_socre = 0;
for (k = 0; k < num_class; k++)
{
if (pdata[k + 5] > max_class_socre)
{
max_class_socre = pdata[k + 5];
max_ind = k;
}
}
max_class_socre *= box_score;
max_class_socre = sqrt(max_class_socre);
if (max_class_socre > this->confThreshold)
{
float cx = (0.5f + j + pdata[0]) * strides[n]; ///cx
float cy = (0.5f + i + pdata[1]) * strides[n]; ///cy
float w = expf(pdata[2]) * strides[n]; ///w
float h = expf(pdata[3]) * strides[n]; ///h
float xmin = (cx - 0.5 * w) / ratio;
float ymin = (cy - 0.5 * h) / ratio;
float xmax = (cx + 0.5 * w) / ratio;
float ymax = (cy + 0.5 * h) / ratio;
generate_boxes.push_back(BoxInfo{ xmin, ymin, xmax, ymax, max_class_socre, max_ind });
}
pdata += nout;
}
}
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
nms(generate_boxes);
for (size_t i = 0; i < generate_boxes.size(); ++i)
{
int xmin = int(generate_boxes[i].x1);
int ymin = int(generate_boxes[i].y1);
rectangle(frame, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2);
string label = format("%.2f", generate_boxes[i].score);
label = this->class_names[generate_boxes[i].label] + ":" + label;
putText(frame, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}
}
int main()
{
Net_config cfg = { 0.6, 0.5, "weights/crowdhuman/yolo_free_huge_crowdhuman_192x320.onnx", "person" };
FreeYOLO net(cfg);
string imgpath = "images/person/1.png";
Mat srcimg = imread(imgpath);
net.detect(srcimg);
static const string kWinName = "Deep learning object detection in ONNXRuntime";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
}

138
onnxruntime/main.py Normal file
View File

@@ -0,0 +1,138 @@
import argparse
import cv2
import numpy as np
import onnxruntime as ort
class FreeYOLO():
def __init__(self, model_path, confThreshold=0.4, nmsThreshold=0.85, datatype='coco'):
so = ort.SessionOptions()
so.log_severity_level = 3
self.session = ort.InferenceSession(model_path, so)
model_inputs = self.session.get_inputs()
self.input_name = model_inputs[0].name
self.input_shape = model_inputs[0].shape
self.input_height = int(self.input_shape[2])
self.input_width = int(self.input_shape[3])
self.anchors, self.expand_strides = self.generate_anchors((self.input_height, self.input_width), [8, 16, 32])
if datatype=='coco':
self.classes = list(map(lambda x: x.strip(), open('coco.names', 'r').readlines()))
elif datatype=='face':
self.classes = ['face']
else:
self.classes = ['person']
self.num_class = len(self.classes)
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
def generate_anchors(self, input_shape, strides):
"""
fmp_size: (List) [H, W]
"""
all_anchors = []
all_expand_strides = []
for stride in strides:
# generate grid cells
fmp_h, fmp_w = input_shape[0] // stride, input_shape[1] // stride
anchor_x, anchor_y = np.meshgrid(np.arange(fmp_w),
np.arange(fmp_h))
# [H, W, 2]
anchor_xy = np.stack([anchor_x, anchor_y], axis=-1)
shape = anchor_xy.shape[:2]
# [H, W, 2] -> [HW, 2]
anchor_xy = (anchor_xy.reshape(-1, 2) + 0.5) * stride
all_anchors.append(anchor_xy)
# expanded stride
strides = np.full((*shape, 1), stride)
all_expand_strides.append(strides.reshape(-1, 1))
anchors = np.concatenate(all_anchors, axis=0)
expand_strides = np.concatenate(all_expand_strides, axis=0)
return anchors, expand_strides
def decode_boxes(self, anchors, pred_regs, expand_strides):
"""
anchors: (List[Tensor]) [1, M, 2] or [M, 2]
pred_reg: (List[Tensor]) [B, M, 4] or [B, M, 4]
"""
# center of bbox
pred_ctr_xy = anchors[..., :2] + pred_regs[..., :2] * expand_strides
# size of bbox
pred_box_wh = np.exp(pred_regs[..., 2:]) * expand_strides
pred_x1y1 = pred_ctr_xy - 0.5 * pred_box_wh
# pred_x2y2 = pred_ctr_xy + 0.5 * pred_box_wh
# pred_box = np.concatenate([pred_x1y1, pred_x2y2], axis=-1)
pred_box = np.concatenate([pred_x1y1, pred_box_wh], axis=-1)
return pred_box
def drawPred(self, frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
label = '%.2f' % conf
label = '%s:%s' % (self.classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
cv2.putText(frame, label, (left, top - 10), 0, 0.7, (0, 255, 0), thickness=2)
return frame
def detect(self, frame):
padded_image = np.ones((self.input_height, self.input_width, 3), dtype=np.uint8)*114
ratio = min(self.input_height / frame.shape[0], self.input_width / frame.shape[1])
neww, newh = int(frame.shape[1] * ratio), int(frame.shape[0] * ratio)
temp_image = cv2.resize(frame, (neww, newh), interpolation=cv2.INTER_LINEAR)
padded_image[:newh, :neww, :] = temp_image
padded_image = padded_image.transpose(2, 0, 1)
padded_image = np.expand_dims(padded_image, axis=0).astype(np.float32)
# Inference
results = self.session.run(None, {self.input_name: padded_image})
reg_preds = results[0][0][..., :4]
obj_preds = results[0][0][..., 4:5]
cls_preds = results[0][0][..., 5:]
scores = np.sqrt(obj_preds * cls_preds)
# scores & class_ids
class_ids = np.argmax(scores, axis=1) # [M,]
scores = np.max(scores, axis=1)
# bboxes
bboxes = self.decode_boxes(self.anchors, reg_preds, self.expand_strides) # [M, 4]
# thresh
keep = np.where(scores > self.confThreshold)
scores = scores[keep]
class_ids = class_ids[keep]
bboxes = bboxes[keep]
bboxes /= ratio
indices = cv2.dnn.NMSBoxes(bboxes.tolist(), scores.tolist(), self.confThreshold, self.nmsThreshold)
for i in indices:
left, top, width, height = bboxes[i, :].astype(np.int32)
frame = self.drawPred(frame, class_ids[i], scores[i], left, top, left + width, top + height)
return frame
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--modelpath", type=str, default='weights/coco/yolo_free_nano_192x320.onnx', help="model path")
parser.add_argument("--imgpath", type=str, default='images/coco/dog.jpg', help="image path")
parser.add_argument("--confThreshold", default=0.6, type=float, help='class confidence')
parser.add_argument("--nmsThreshold", default=0.5, type=float, help='iou thresh')
parser.add_argument("--datatype", default='coco', type=str, choices=['coco', 'face', 'person'], help='data type')
args = parser.parse_args()
net = FreeYOLO(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, datatype=args.datatype)
srcimg = cv2.imread(args.imgpath)
srcimg = net.detect(srcimg)
winName = 'Deep learning object detection in ONNXRuntime'
cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
cv2.imshow(winName, srcimg)
cv2.waitKey(0)
cv2.destroyAllWindows()

80
opencv/coco.names Normal file
View File

@@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

BIN
opencv/images/coco/bus.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 476 KiB

BIN
opencv/images/coco/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

BIN
opencv/images/face/1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 732 KiB

BIN
opencv/images/face/2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

BIN
opencv/images/face/3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 696 KiB

BIN
opencv/images/face/4.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 601 KiB

BIN
opencv/images/person/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

174
opencv/main.cpp Normal file
View File

@@ -0,0 +1,174 @@
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
using namespace std;
using namespace cv;
using namespace dnn;
struct Net_config
{
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
string modelpath;
string datatype;
};
class FreeYOLO
{
public:
FreeYOLO(Net_config config);
void detect(Mat& frame);
private:
int inpWidth;
int inpHeight;
int nout;
int num_proposal;
vector<string> class_names;
int num_class;
const int num_stride = 3;
int strides[3] = { 8,16,32 };
float confThreshold;
float nmsThreshold;
Net net;
};
FreeYOLO::FreeYOLO(Net_config config)
{
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
this->net = readNet(config.modelpath);
size_t pos = config.modelpath.rfind("_");
size_t pos_ = config.modelpath.rfind(".");
int len = pos_ - pos - 1;
string hxw = config.modelpath.substr(pos + 1, len);
pos = hxw.rfind("x");
string h = hxw.substr(0, pos);
len = hxw.length() - pos;
string w = hxw.substr(pos + 1, len);
this->inpHeight = stoi(h);
this->inpWidth = stoi(w);
if (config.datatype == "coco")
{
string classesFile = "coco.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) this->class_names.push_back(line);
}
else if (config.datatype == "face")
{
this->class_names.push_back("face");
}
else
{
this->class_names.push_back("person");
}
this->num_class = class_names.size();
}
void FreeYOLO::detect(Mat& frame)
{
const float ratio = std::min(float(this->inpHeight) / float(frame.rows), float(this->inpWidth) / float(frame.cols));
const int neww = int(frame.cols * ratio);
const int newh = int(frame.rows * ratio);
Mat dstimg;
resize(frame, dstimg, Size(neww, newh));
copyMakeBorder(dstimg, dstimg, 0, this->inpHeight - newh, 0, this->inpWidth - neww, BORDER_CONSTANT, 114);
Mat blob = blobFromImage(dstimg);
this->net.setInput(blob);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames()); // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
num_proposal = outs[0].size[1];
nout = outs[0].size[2];
const float* pdata = (float*)outs[0].data;
int n = 0, i = 0, j = 0, k = 0; ///cx, cy, w, h, box_score, class_score
vector<float> confidences;
vector<Rect> boxes;
vector<int> classIds;
for (n = 0; n < this->num_stride; n++) ///<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD>߶<EFBFBD>
{
int num_grid_x = (int)ceil((this->inpWidth / strides[n]));
int num_grid_y = (int)ceil((this->inpHeight / strides[n]));
for (i = 0; i < num_grid_y; i++)
{
for (j = 0; j < num_grid_x; j++)
{
const float box_score = pdata[4];
int max_ind = 0;
float max_class_socre = 0;
for (k = 0; k < num_class; k++)
{
if (pdata[k + 5] > max_class_socre)
{
max_class_socre = pdata[k + 5];
max_ind = k;
}
}
max_class_socre *= box_score;
max_class_socre = sqrt(max_class_socre);
if (max_class_socre > this->confThreshold)
{
float cx = (0.5f + j + pdata[0]) * strides[n]; ///cx
float cy = (0.5f + i + pdata[1]) * strides[n]; ///cy
float w = expf(pdata[2]) * strides[n]; ///w
float h = expf(pdata[3]) * strides[n]; ///h
float xmin = (cx - 0.5 * w) / ratio;
float ymin = (cy - 0.5 * h) / ratio;
float xmax = (cx + 0.5 * w) / ratio;
float ymax = (cy + 0.5 * h) / ratio;
int left = int((cx - 0.5 * w) / ratio);
int top = int((cy - 0.5 * h) / ratio);
int width = int(w / ratio);
int height = int(h / ratio);
confidences.push_back(max_class_socre);
boxes.push_back(Rect(left, top, width, height));
classIds.push_back(max_ind);
}
pdata += nout;
}
}
}
vector<int> indices;
dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(0, 0, 255), 3);
string label = format("%.2f", confidences[idx]);
label = this->class_names[classIds[idx]] + ":" + label;
putText(frame, label, Point(box.x, box.y - 10), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2);
}
}
int main()
{
Net_config cfg = { 0.8, 0.5, "weights/face/yolo_free_huge_widerface_192x320.onnx", "face" };
FreeYOLO net(cfg);
string imgpath = "images/face/1.jpg";
Mat srcimg = imread(imgpath);
net.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
}

133
opencv/main.py Normal file
View File

@@ -0,0 +1,133 @@
import argparse
import cv2
import numpy as np
import os
class FreeYOLO():
def __init__(self, model_path, confThreshold=0.4, nmsThreshold=0.85, datatype='coco'):
self.net = cv2.dnn.readNet(model_path)
filename = os.path.splitext(os.path.basename(model_path))[0]
input_shape = filename.split('_')[-1].split('x')
self.input_height = int(input_shape[0])
self.input_width = int(input_shape[1])
self.anchors, self.expand_strides = self.generate_anchors((self.input_height, self.input_width), [8, 16, 32])
if datatype=='coco':
self.classes = list(map(lambda x: x.strip(), open('coco.names', 'r').readlines()))
elif datatype=='face':
self.classes = ['face']
else:
self.classes = ['person']
self.num_class = len(self.classes)
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
self.output_names = self.net.getUnconnectedOutLayersNames()
def generate_anchors(self, input_shape, strides):
"""
fmp_size: (List) [H, W]
"""
all_anchors = []
all_expand_strides = []
for stride in strides:
# generate grid cells
fmp_h, fmp_w = input_shape[0] // stride, input_shape[1] // stride
anchor_x, anchor_y = np.meshgrid(np.arange(fmp_w),
np.arange(fmp_h))
# [H, W, 2]
anchor_xy = np.stack([anchor_x, anchor_y], axis=-1)
shape = anchor_xy.shape[:2]
# [H, W, 2] -> [HW, 2]
anchor_xy = (anchor_xy.reshape(-1, 2) + 0.5) * stride
all_anchors.append(anchor_xy)
# expanded stride
strides = np.full((*shape, 1), stride)
all_expand_strides.append(strides.reshape(-1, 1))
anchors = np.concatenate(all_anchors, axis=0)
expand_strides = np.concatenate(all_expand_strides, axis=0)
return anchors, expand_strides
def decode_boxes(self, anchors, pred_regs, expand_strides):
"""
anchors: (List[Tensor]) [1, M, 2] or [M, 2]
pred_reg: (List[Tensor]) [B, M, 4] or [B, M, 4]
"""
# center of bbox
pred_ctr_xy = anchors[..., :2] + pred_regs[..., :2] * expand_strides
# size of bbox
pred_box_wh = np.exp(pred_regs[..., 2:]) * expand_strides
pred_x1y1 = pred_ctr_xy - 0.5 * pred_box_wh
# pred_x2y2 = pred_ctr_xy + 0.5 * pred_box_wh
# pred_box = np.concatenate([pred_x1y1, pred_x2y2], axis=-1)
pred_box = np.concatenate([pred_x1y1, pred_box_wh], axis=-1)
return pred_box
def drawPred(self, frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
label = '%.2f' % conf
label = '%s:%s' % (self.classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
cv2.putText(frame, label, (left, top - 10), 0, 0.7, (0, 255, 0), thickness=2)
return frame
def detect(self, frame):
padded_image = np.ones((self.input_height, self.input_width, 3), dtype=np.uint8)*114
ratio = min(self.input_height / frame.shape[0], self.input_width / frame.shape[1])
neww, newh = int(frame.shape[1] * ratio), int(frame.shape[0] * ratio)
temp_image = cv2.resize(frame, (neww, newh), interpolation=cv2.INTER_LINEAR)
padded_image[:newh, :neww, :] = temp_image
blob = cv2.dnn.blobFromImage(padded_image)
self.net.setInput(blob)
results = self.net.forward(self.output_names)
reg_preds = results[0][0][..., :4]
obj_preds = results[0][0][..., 4:5]
cls_preds = results[0][0][..., 5:]
scores = np.sqrt(obj_preds * cls_preds)
# scores & class_ids
class_ids = np.argmax(scores, axis=1) # [M,]
scores = np.max(scores, axis=1)
# bboxes
bboxes = self.decode_boxes(self.anchors, reg_preds, self.expand_strides) # [M, 4]
# thresh
keep = np.where(scores > self.confThreshold)
scores = scores[keep]
class_ids = class_ids[keep]
bboxes = bboxes[keep]
bboxes /= ratio
indices = cv2.dnn.NMSBoxes(bboxes.tolist(), scores.tolist(), self.confThreshold, self.nmsThreshold)
for i in indices:
left, top, width, height = bboxes[i, :].astype(np.int32)
frame = self.drawPred(frame, class_ids[i], scores[i], left, top, left + width, top + height)
return frame
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--modelpath", type=str, default='weights/coco/yolo_free_nano_192x320.onnx', help="model path")
parser.add_argument("--imgpath", type=str, default='images/coco/dog.jpg', help="image path")
parser.add_argument("--confThreshold", default=0.6, type=float, help='class confidence')
parser.add_argument("--nmsThreshold", default=0.5, type=float, help='iou thresh')
parser.add_argument("--datatype", default='coco', type=str, choices=['coco', 'face', 'person'], help='data type')
args = parser.parse_args()
net = FreeYOLO(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, datatype=args.datatype)
srcimg = cv2.imread(args.imgpath)
srcimg = net.detect(srcimg)
winName = 'Deep learning object detection in OpenCV'
cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
cv2.imshow(winName, srcimg)
cv2.waitKey(0)
cv2.destroyAllWindows()