Add files via upload

2025-10-05 16:16:54 +08:00 · 2023-01-14 21:54:06 +08:00
commit 9dad483077
26 changed files with 872 additions and 0 deletions
--- a/onnxruntime/coco.names
+++ b/onnxruntime/coco.names
@@ -0,0 +1,80 @@
 person
 bicycle
 car
 motorbike
 aeroplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 sofa
 pottedplant
 bed
 diningtable
 toilet
 tvmonitor
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/onnxruntime/images/coco/000000000785.jpg
+++ b/onnxruntime/images/coco/000000000785.jpg
--- a/onnxruntime/images/coco/bus.jpg
+++ b/onnxruntime/images/coco/bus.jpg
--- a/onnxruntime/images/coco/dog.jpg
+++ b/onnxruntime/images/coco/dog.jpg
--- a/onnxruntime/images/coco/person.jpg
+++ b/onnxruntime/images/coco/person.jpg
--- a/onnxruntime/images/coco/zidane.jpg
+++ b/onnxruntime/images/coco/zidane.jpg
--- a/onnxruntime/images/face/1.jpg
+++ b/onnxruntime/images/face/1.jpg
--- a/onnxruntime/images/face/2.jpg
+++ b/onnxruntime/images/face/2.jpg
--- a/onnxruntime/images/face/3.jpg
+++ b/onnxruntime/images/face/3.jpg
--- a/onnxruntime/images/face/4.jpg
+++ b/onnxruntime/images/face/4.jpg
--- a/onnxruntime/images/person/1.png
+++ b/onnxruntime/images/person/1.png
--- a/onnxruntime/main.cpp
+++ b/onnxruntime/main.cpp
@@ -0,0 +1,267 @@
 #include <fstream>
 #include <sstream>
 #include <iostream>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 //#include <cuda_provider_factory.h>
 #include <onnxruntime_cxx_api.h>
 using namespace std;
 using namespace cv;
 using namespace Ort;
 struct Net_config
 {
 	float confThreshold; // Confidence threshold
 	float nmsThreshold;  // Non-maximum suppression threshold
 	string modelpath;
 	string datatype;
 };
 typedef struct BoxInfo
 {
 	float x1;
 	float y1;
 	float x2;
 	float y2;
 	float score;
 	int label;
 } BoxInfo;
 class FreeYOLO
 {
 public:
 	FreeYOLO(Net_config config);
 	void detect(Mat& frame);
 private:
 	int inpWidth;
 	int inpHeight;
 	int nout;
 	int num_proposal;
 	vector<string> class_names;
 	int num_class;
 	const int num_stride = 3;
 	int strides[3] = { 8,16,32 };
 	float confThreshold;
 	float nmsThreshold;
 	vector<float> input_image_;
 	void normalize_(Mat img);
 	void nms(vector<BoxInfo>& input_boxes);
 	Env env = Env(ORT_LOGGING_LEVEL_ERROR, "FreeYOLO");
 	Ort::Session *ort_session = nullptr;
 	SessionOptions sessionOptions = SessionOptions();
 	vector<char*> input_names;
 	vector<char*> output_names;
 	vector<vector<int64_t>> input_node_dims; // >=1 outputs
 	vector<vector<int64_t>> output_node_dims; // >=1 outputs
 };
 FreeYOLO::FreeYOLO(Net_config config)
 {
 	this->confThreshold = config.confThreshold;
 	this->nmsThreshold = config.nmsThreshold;
 	string model_path = config.modelpath;
 	std::wstring widestr = std::wstring(model_path.begin(), model_path.end());
 	//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
 	sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 	ort_session = new Session(env, widestr.c_str(), sessionOptions);
 	size_t numInputNodes = ort_session->GetInputCount();
 	size_t numOutputNodes = ort_session->GetOutputCount();
 	AllocatorWithDefaultOptions allocator;
 	for (int i = 0; i < numInputNodes; i++)
 	{
 		input_names.push_back(ort_session->GetInputName(i, allocator));
 		Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
 		auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
 		auto input_dims = input_tensor_info.GetShape();
 		input_node_dims.push_back(input_dims);
 	}
 	for (int i = 0; i < numOutputNodes; i++)
 	{
 		output_names.push_back(ort_session->GetOutputName(i, allocator));
 		Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
 		auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
 		auto output_dims = output_tensor_info.GetShape();
 		output_node_dims.push_back(output_dims);
 	}
 	this->inpHeight = input_node_dims[0][2];
 	this->inpWidth = input_node_dims[0][3];
 	if (config.datatype == "coco")
 	{
 		string classesFile = "coco.names";
 		ifstream ifs(classesFile.c_str());
 		string line;
 		while (getline(ifs, line)) this->class_names.push_back(line);
 	}
 	else if (config.datatype == "face")
 	{
 		this->class_names.push_back("face");
 	}
 	else
 	{
 		this->class_names.push_back("person");
 	}
 	this->num_class = class_names.size();
 }
 void FreeYOLO::normalize_(Mat img)
 {
 	//    img.convertTo(img, CV_32F);
 	int row = img.rows;
 	int col = img.cols;
 	this->input_image_.resize(row * col * img.channels());
 	for (int c = 0; c < 3; c++)
 	{
 		for (int i = 0; i < row; i++)
 		{
 			for (int j = 0; j < col; j++)
 			{
 				float pix = img.ptr<uchar>(i)[j * 3 + c];
 				this->input_image_[c * row * col + i * col + j] = pix;
 			}
 		}
 	}
 }
 void FreeYOLO::nms(vector<BoxInfo>& input_boxes)
 {
 	sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
 	vector<float> vArea(input_boxes.size());
 	for (int i = 0; i < int(input_boxes.size()); ++i)
 	{
 		vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
 			* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
 	}
 	vector<bool> isSuppressed(input_boxes.size(), false);
 	for (int i = 0; i < int(input_boxes.size()); ++i)
 	{
 		if (isSuppressed[i]) { continue; }
 		for (int j = i + 1; j < int(input_boxes.size()); ++j)
 		{
 			if (isSuppressed[j]) { continue; }
 			float xx1 = (max)(input_boxes[i].x1, input_boxes[j].x1);
 			float yy1 = (max)(input_boxes[i].y1, input_boxes[j].y1);
 			float xx2 = (min)(input_boxes[i].x2, input_boxes[j].x2);
 			float yy2 = (min)(input_boxes[i].y2, input_boxes[j].y2);
 			float w = (max)(float(0), xx2 - xx1 + 1);
 			float h = (max)(float(0), yy2 - yy1 + 1);
 			float inter = w * h;
 			float ovr = inter / (vArea[i] + vArea[j] - inter);
 			if (ovr >= this->nmsThreshold)
 			{
 				isSuppressed[j] = true;
 			}
 		}
 	}
 	// return post_nms;
 	int idx_t = 0;
 	input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
 }
 void FreeYOLO::detect(Mat& frame)
 {
 	const float ratio = std::min(float(this->inpHeight) / float(frame.rows), float(this->inpWidth) / float(frame.cols));
 	const int neww = int(frame.cols * ratio);
 	const int newh = int(frame.rows * ratio);
 	Mat dstimg;
 	resize(frame, dstimg, Size(neww, newh));
 	copyMakeBorder(dstimg, dstimg, 0, this->inpHeight - newh, 0, this->inpWidth - neww, BORDER_CONSTANT, 114);
 	this->normalize_(dstimg);
 	array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
 	auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
 	Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
 	// <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
 	vector<Value> ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size());   // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
 	vector<BoxInfo> generate_boxes;
 	Ort::Value &predictions = ort_outputs.at(0);
 	auto pred_dims = predictions.GetTensorTypeAndShapeInfo().GetShape();
 	num_proposal = pred_dims.at(1);
 	nout = pred_dims.at(2);
 	const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
 	int n = 0, i = 0, j = 0, k = 0; ///cx, cy, w, h, box_score, class_score
 	for (n = 0; n < this->num_stride; n++)   ///<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD>߶<EFBFBD>
 	{
 		int num_grid_x = (int)ceil((this->inpWidth / strides[n]));
 		int num_grid_y = (int)ceil((this->inpHeight / strides[n]));
 		for (i = 0; i < num_grid_y; i++)
 		{
 			for (j = 0; j < num_grid_x; j++)
 			{
 				const float box_score = pdata[4];
 				int max_ind = 0;
 				float max_class_socre = 0;
 				for (k = 0; k < num_class; k++)
 				{
 					if (pdata[k + 5] > max_class_socre)
 					{
 						max_class_socre = pdata[k + 5];
 						max_ind = k;
 					}
 				}
 				max_class_socre *= box_score;
 				max_class_socre = sqrt(max_class_socre);
 				if (max_class_socre > this->confThreshold)
 				{
 					float cx = (0.5f + j + pdata[0]) * strides[n];  ///cx
 					float cy = (0.5f + i + pdata[1]) * strides[n];   ///cy
 					float w = expf(pdata[2]) * strides[n];   ///w
 					float h = expf(pdata[3]) * strides[n];  ///h
 					float xmin = (cx - 0.5 * w) / ratio;
 					float ymin = (cy - 0.5 * h) / ratio;
 					float xmax = (cx + 0.5 * w) / ratio;
 					float ymax = (cy + 0.5 * h) / ratio;
 					generate_boxes.push_back(BoxInfo{ xmin, ymin, xmax, ymax, max_class_socre, max_ind });
 				}
 				pdata += nout;
 			}
 		}		
 	}
 	// Perform non maximum suppression to eliminate redundant overlapping boxes with
 	// lower confidences
 	nms(generate_boxes);
 	for (size_t i = 0; i < generate_boxes.size(); ++i)
 	{
 		int xmin = int(generate_boxes[i].x1);
 		int ymin = int(generate_boxes[i].y1);
 		rectangle(frame, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2);
 		string label = format("%.2f", generate_boxes[i].score);
 		label = this->class_names[generate_boxes[i].label] + ":" + label;
 		putText(frame, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
 	}
 }
 int main()
 {
 	Net_config cfg = { 0.6, 0.5, "weights/crowdhuman/yolo_free_huge_crowdhuman_192x320.onnx", "person" };
 	FreeYOLO net(cfg);
 	string imgpath = "images/person/1.png";
 	Mat srcimg = imread(imgpath);
 	net.detect(srcimg);
 	static const string kWinName = "Deep learning object detection in ONNXRuntime";
 	namedWindow(kWinName, WINDOW_NORMAL);
 	imshow(kWinName, srcimg);
 	waitKey(0);
 	destroyAllWindows();
 }
--- a/onnxruntime/main.py
+++ b/onnxruntime/main.py
@@ -0,0 +1,138 @@
 import argparse
 import cv2
 import numpy as np
 import onnxruntime as ort
 class FreeYOLO():
    def __init__(self, model_path, confThreshold=0.4, nmsThreshold=0.85, datatype='coco'):
        so = ort.SessionOptions()
        so.log_severity_level = 3
        self.session = ort.InferenceSession(model_path, so)
        model_inputs = self.session.get_inputs()
        self.input_name = model_inputs[0].name
        self.input_shape = model_inputs[0].shape
        self.input_height = int(self.input_shape[2])
        self.input_width = int(self.input_shape[3])
        self.anchors, self.expand_strides = self.generate_anchors((self.input_height, self.input_width), [8, 16, 32])
        if datatype=='coco':
            self.classes = list(map(lambda x: x.strip(), open('coco.names', 'r').readlines()))
        elif datatype=='face':
            self.classes = ['face']
        else:
            self.classes = ['person']
        self.num_class = len(self.classes)
        self.confThreshold = confThreshold
        self.nmsThreshold = nmsThreshold
    def generate_anchors(self, input_shape, strides):
        """
            fmp_size: (List) [H, W]
        """
        all_anchors = []
        all_expand_strides = []
        for stride in strides:
            # generate grid cells
            fmp_h, fmp_w = input_shape[0] // stride, input_shape[1] // stride
            anchor_x, anchor_y = np.meshgrid(np.arange(fmp_w),
                                             np.arange(fmp_h))
            # [H, W, 2]
            anchor_xy = np.stack([anchor_x, anchor_y], axis=-1)
            shape = anchor_xy.shape[:2]
            # [H, W, 2] -> [HW, 2]
            anchor_xy = (anchor_xy.reshape(-1, 2) + 0.5) * stride
            all_anchors.append(anchor_xy)
            # expanded stride
            strides = np.full((*shape, 1), stride)
            all_expand_strides.append(strides.reshape(-1, 1))
        anchors = np.concatenate(all_anchors, axis=0)
        expand_strides = np.concatenate(all_expand_strides, axis=0)
        return anchors, expand_strides
    def decode_boxes(self, anchors, pred_regs, expand_strides):
        """
            anchors:  (List[Tensor]) [1, M, 2] or [M, 2]
            pred_reg: (List[Tensor]) [B, M, 4] or [B, M, 4]
        """
        # center of bbox
        pred_ctr_xy = anchors[..., :2] + pred_regs[..., :2] * expand_strides
        # size of bbox
        pred_box_wh = np.exp(pred_regs[..., 2:]) * expand_strides
        pred_x1y1 = pred_ctr_xy - 0.5 * pred_box_wh
        # pred_x2y2 = pred_ctr_xy + 0.5 * pred_box_wh
        # pred_box = np.concatenate([pred_x1y1, pred_x2y2], axis=-1)
        pred_box = np.concatenate([pred_x1y1, pred_box_wh], axis=-1)
        return pred_box
    def drawPred(self, frame, classId, conf, left, top, right, bottom):
        # Draw a bounding box.
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
        label = '%.2f' % conf
        label = '%s:%s' % (self.classes[classId], label)
        # Display the label at the top of the bounding box
        labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        top = max(top, labelSize[1])
        # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
        cv2.putText(frame, label, (left, top - 10), 0, 0.7, (0, 255, 0), thickness=2)
        return frame
    def detect(self, frame):
        padded_image = np.ones((self.input_height, self.input_width, 3), dtype=np.uint8)*114
        ratio = min(self.input_height / frame.shape[0], self.input_width / frame.shape[1])
        neww, newh = int(frame.shape[1] * ratio), int(frame.shape[0] * ratio)
        temp_image = cv2.resize(frame, (neww, newh), interpolation=cv2.INTER_LINEAR)
        padded_image[:newh, :neww, :] = temp_image
        padded_image = padded_image.transpose(2, 0, 1)
        padded_image = np.expand_dims(padded_image, axis=0).astype(np.float32)
        # Inference
        results = self.session.run(None, {self.input_name: padded_image})
        reg_preds = results[0][0][..., :4]
        obj_preds = results[0][0][..., 4:5]
        cls_preds = results[0][0][..., 5:]
        scores = np.sqrt(obj_preds * cls_preds)
        # scores & class_ids
        class_ids = np.argmax(scores, axis=1)  # [M,]
        scores = np.max(scores, axis=1)
        # bboxes
        bboxes = self.decode_boxes(self.anchors, reg_preds, self.expand_strides)  # [M, 4]
        # thresh
        keep = np.where(scores > self.confThreshold)
        scores = scores[keep]
        class_ids = class_ids[keep]
        bboxes = bboxes[keep]
        bboxes /= ratio
        indices = cv2.dnn.NMSBoxes(bboxes.tolist(), scores.tolist(), self.confThreshold, self.nmsThreshold)
        for i in indices:
            left, top, width, height = bboxes[i, :].astype(np.int32)
            frame = self.drawPred(frame, class_ids[i], scores[i], left, top, left + width, top + height)
        return frame
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--modelpath", type=str, default='weights/coco/yolo_free_nano_192x320.onnx', help="model path")
    parser.add_argument("--imgpath", type=str, default='images/coco/dog.jpg', help="image path")
    parser.add_argument("--confThreshold", default=0.6, type=float, help='class confidence')
    parser.add_argument("--nmsThreshold", default=0.5, type=float, help='iou thresh')
    parser.add_argument("--datatype", default='coco', type=str, choices=['coco', 'face', 'person'], help='data type')
    args = parser.parse_args()
    net = FreeYOLO(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, datatype=args.datatype)
    srcimg = cv2.imread(args.imgpath)
    srcimg = net.detect(srcimg)
    winName = 'Deep learning object detection in ONNXRuntime'
    cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
    cv2.imshow(winName, srcimg)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
--- a/opencv/coco.names
+++ b/opencv/coco.names
@@ -0,0 +1,80 @@
 person
 bicycle
 car
 motorbike
 aeroplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 sofa
 pottedplant
 bed
 diningtable
 toilet
 tvmonitor
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/opencv/images/coco/000000000785.jpg
+++ b/opencv/images/coco/000000000785.jpg
--- a/opencv/images/coco/bus.jpg
+++ b/opencv/images/coco/bus.jpg
--- a/opencv/images/coco/dog.jpg
+++ b/opencv/images/coco/dog.jpg
--- a/opencv/images/coco/person.jpg
+++ b/opencv/images/coco/person.jpg
--- a/opencv/images/coco/zidane.jpg
+++ b/opencv/images/coco/zidane.jpg
--- a/opencv/images/face/1.jpg
+++ b/opencv/images/face/1.jpg
--- a/opencv/images/face/2.jpg
+++ b/opencv/images/face/2.jpg
--- a/opencv/images/face/3.jpg
+++ b/opencv/images/face/3.jpg
--- a/opencv/images/face/4.jpg
+++ b/opencv/images/face/4.jpg
--- a/opencv/images/person/1.png
+++ b/opencv/images/person/1.png
--- a/opencv/main.cpp
+++ b/opencv/main.cpp
@@ -0,0 +1,174 @@
 #include <fstream>
 #include <sstream>
 #include <iostream>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 #include <opencv2/dnn.hpp>
 using namespace std;
 using namespace cv;
 using namespace dnn;
 struct Net_config
 {
 	float confThreshold; // Confidence threshold
 	float nmsThreshold;  // Non-maximum suppression threshold
 	string modelpath;
 	string datatype;
 };
 class FreeYOLO
 {
 public:
 	FreeYOLO(Net_config config);
 	void detect(Mat& frame);
 private:
 	int inpWidth;
 	int inpHeight;
 	int nout;
 	int num_proposal;
 	vector<string> class_names;
 	int num_class;
 	const int num_stride = 3;
 	int strides[3] = { 8,16,32 };
 	float confThreshold;
 	float nmsThreshold;
 	Net net;
 };
 FreeYOLO::FreeYOLO(Net_config config)
 {
 	this->confThreshold = config.confThreshold;
 	this->nmsThreshold = config.nmsThreshold;
 	this->net = readNet(config.modelpath);
 	size_t pos = config.modelpath.rfind("_");
 	size_t pos_ = config.modelpath.rfind(".");
 	int len = pos_ - pos - 1;
 	string hxw = config.modelpath.substr(pos + 1, len);
 	pos = hxw.rfind("x");
 	string h = hxw.substr(0, pos);
 	len = hxw.length() - pos;
 	string w = hxw.substr(pos + 1, len);
 	this->inpHeight = stoi(h);
 	this->inpWidth = stoi(w);
 	if (config.datatype == "coco")
 	{
 		string classesFile = "coco.names";
 		ifstream ifs(classesFile.c_str());
 		string line;
 		while (getline(ifs, line)) this->class_names.push_back(line);
 	}
 	else if (config.datatype == "face")
 	{
 		this->class_names.push_back("face");
 	}
 	else
 	{
 		this->class_names.push_back("person");
 	}
 	this->num_class = class_names.size();
 }
 void FreeYOLO::detect(Mat& frame)
 {
 	const float ratio = std::min(float(this->inpHeight) / float(frame.rows), float(this->inpWidth) / float(frame.cols));
 	const int neww = int(frame.cols * ratio);
 	const int newh = int(frame.rows * ratio);
 	Mat dstimg;
 	resize(frame, dstimg, Size(neww, newh));
 	copyMakeBorder(dstimg, dstimg, 0, this->inpHeight - newh, 0, this->inpWidth - neww, BORDER_CONSTANT, 114);
 	Mat blob = blobFromImage(dstimg);
 	this->net.setInput(blob);
 	vector<Mat> outs;
 	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());   // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
 	num_proposal = outs[0].size[1];
 	nout = outs[0].size[2];
 	const float* pdata = (float*)outs[0].data;
 	int n = 0, i = 0, j = 0, k = 0; ///cx, cy, w, h, box_score, class_score
 	vector<float> confidences;
 	vector<Rect> boxes;
 	vector<int> classIds;
 	for (n = 0; n < this->num_stride; n++)   ///<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD>߶<EFBFBD>
 	{
 		int num_grid_x = (int)ceil((this->inpWidth / strides[n]));
 		int num_grid_y = (int)ceil((this->inpHeight / strides[n]));
 		for (i = 0; i < num_grid_y; i++)
 		{
 			for (j = 0; j < num_grid_x; j++)
 			{
 				const float box_score = pdata[4];
 				int max_ind = 0;
 				float max_class_socre = 0;
 				for (k = 0; k < num_class; k++)
 				{
 					if (pdata[k + 5] > max_class_socre)
 					{
 						max_class_socre = pdata[k + 5];
 						max_ind = k;
 					}
 				}
 				max_class_socre *= box_score;
 				max_class_socre = sqrt(max_class_socre);
 				if (max_class_socre > this->confThreshold)
 				{
 					float cx = (0.5f + j + pdata[0]) * strides[n];  ///cx
 					float cy = (0.5f + i + pdata[1]) * strides[n];   ///cy
 					float w = expf(pdata[2]) * strides[n];   ///w
 					float h = expf(pdata[3]) * strides[n];  ///h
 					float xmin = (cx - 0.5 * w) / ratio;
 					float ymin = (cy - 0.5 * h) / ratio;
 					float xmax = (cx + 0.5 * w) / ratio;
 					float ymax = (cy + 0.5 * h) / ratio;
 					int left = int((cx - 0.5 * w) / ratio);
 					int top = int((cy - 0.5 * h) / ratio);
 					int width = int(w / ratio);
 					int height = int(h / ratio);
 					confidences.push_back(max_class_socre);
 					boxes.push_back(Rect(left, top, width, height));
 					classIds.push_back(max_ind);
 				}
 				pdata += nout;
 			}
 		}		
 	}
 	vector<int> indices;
 	dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
 	for (size_t i = 0; i < indices.size(); ++i)
 	{
 		int idx = indices[i];
 		Rect box = boxes[idx];
 		rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(0, 0, 255), 3);
 		string label = format("%.2f", confidences[idx]);
 		label = this->class_names[classIds[idx]] + ":" + label;
 		putText(frame, label, Point(box.x, box.y - 10), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2);
 	}
 }
 int main()
 {
 	Net_config cfg = { 0.8, 0.5, "weights/face/yolo_free_huge_widerface_192x320.onnx", "face" };
 	FreeYOLO net(cfg);
 	string imgpath = "images/face/1.jpg";
 	Mat srcimg = imread(imgpath);
 	net.detect(srcimg);
 	static const string kWinName = "Deep learning object detection in OpenCV";
 	namedWindow(kWinName, WINDOW_NORMAL);
 	imshow(kWinName, srcimg);
 	waitKey(0);
 	destroyAllWindows();
 }
--- a/opencv/main.py
+++ b/opencv/main.py
@@ -0,0 +1,133 @@
 import argparse
 import cv2
 import numpy as np
 import os
 class FreeYOLO():
    def __init__(self, model_path, confThreshold=0.4, nmsThreshold=0.85, datatype='coco'):
        self.net = cv2.dnn.readNet(model_path)
        filename = os.path.splitext(os.path.basename(model_path))[0]
        input_shape = filename.split('_')[-1].split('x')
        self.input_height = int(input_shape[0])
        self.input_width = int(input_shape[1])
        self.anchors, self.expand_strides = self.generate_anchors((self.input_height, self.input_width), [8, 16, 32])
        if datatype=='coco':
            self.classes = list(map(lambda x: x.strip(), open('coco.names', 'r').readlines()))
        elif datatype=='face':
            self.classes = ['face']
        else:
            self.classes = ['person']
        self.num_class = len(self.classes)
        self.confThreshold = confThreshold
        self.nmsThreshold = nmsThreshold
        self.output_names = self.net.getUnconnectedOutLayersNames()
    def generate_anchors(self, input_shape, strides):
        """
            fmp_size: (List) [H, W]
        """
        all_anchors = []
        all_expand_strides = []
        for stride in strides:
            # generate grid cells
            fmp_h, fmp_w = input_shape[0] // stride, input_shape[1] // stride
            anchor_x, anchor_y = np.meshgrid(np.arange(fmp_w),
                                             np.arange(fmp_h))
            # [H, W, 2]
            anchor_xy = np.stack([anchor_x, anchor_y], axis=-1)
            shape = anchor_xy.shape[:2]
            # [H, W, 2] -> [HW, 2]
            anchor_xy = (anchor_xy.reshape(-1, 2) + 0.5) * stride
            all_anchors.append(anchor_xy)
            # expanded stride
            strides = np.full((*shape, 1), stride)
            all_expand_strides.append(strides.reshape(-1, 1))
        anchors = np.concatenate(all_anchors, axis=0)
        expand_strides = np.concatenate(all_expand_strides, axis=0)
        return anchors, expand_strides
    def decode_boxes(self, anchors, pred_regs, expand_strides):
        """
            anchors:  (List[Tensor]) [1, M, 2] or [M, 2]
            pred_reg: (List[Tensor]) [B, M, 4] or [B, M, 4]
        """
        # center of bbox
        pred_ctr_xy = anchors[..., :2] + pred_regs[..., :2] * expand_strides
        # size of bbox
        pred_box_wh = np.exp(pred_regs[..., 2:]) * expand_strides
        pred_x1y1 = pred_ctr_xy - 0.5 * pred_box_wh
        # pred_x2y2 = pred_ctr_xy + 0.5 * pred_box_wh
        # pred_box = np.concatenate([pred_x1y1, pred_x2y2], axis=-1)
        pred_box = np.concatenate([pred_x1y1, pred_box_wh], axis=-1)
        return pred_box
    def drawPred(self, frame, classId, conf, left, top, right, bottom):
        # Draw a bounding box.
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
        label = '%.2f' % conf
        label = '%s:%s' % (self.classes[classId], label)
        # Display the label at the top of the bounding box
        labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        top = max(top, labelSize[1])
        # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
        cv2.putText(frame, label, (left, top - 10), 0, 0.7, (0, 255, 0), thickness=2)
        return frame
    def detect(self, frame):
        padded_image = np.ones((self.input_height, self.input_width, 3), dtype=np.uint8)*114
        ratio = min(self.input_height / frame.shape[0], self.input_width / frame.shape[1])
        neww, newh = int(frame.shape[1] * ratio), int(frame.shape[0] * ratio)
        temp_image = cv2.resize(frame, (neww, newh), interpolation=cv2.INTER_LINEAR)
        padded_image[:newh, :neww, :] = temp_image
        blob = cv2.dnn.blobFromImage(padded_image)
        self.net.setInput(blob)
        results = self.net.forward(self.output_names)
        reg_preds = results[0][0][..., :4]
        obj_preds = results[0][0][..., 4:5]
        cls_preds = results[0][0][..., 5:]
        scores = np.sqrt(obj_preds * cls_preds)
        # scores & class_ids
        class_ids = np.argmax(scores, axis=1)  # [M,]
        scores = np.max(scores, axis=1)
        # bboxes
        bboxes = self.decode_boxes(self.anchors, reg_preds, self.expand_strides)  # [M, 4]
        # thresh
        keep = np.where(scores > self.confThreshold)
        scores = scores[keep]
        class_ids = class_ids[keep]
        bboxes = bboxes[keep]
        bboxes /= ratio
        indices = cv2.dnn.NMSBoxes(bboxes.tolist(), scores.tolist(), self.confThreshold, self.nmsThreshold)
        for i in indices:
            left, top, width, height = bboxes[i, :].astype(np.int32)
            frame = self.drawPred(frame, class_ids[i], scores[i], left, top, left + width, top + height)
        return frame
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--modelpath", type=str, default='weights/coco/yolo_free_nano_192x320.onnx', help="model path")
    parser.add_argument("--imgpath", type=str, default='images/coco/dog.jpg', help="image path")
    parser.add_argument("--confThreshold", default=0.6, type=float, help='class confidence')
    parser.add_argument("--nmsThreshold", default=0.5, type=float, help='iou thresh')
    parser.add_argument("--datatype", default='coco', type=str, choices=['coco', 'face', 'person'], help='data type')
    args = parser.parse_args()
    net = FreeYOLO(args.modelpath, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, datatype=args.datatype)
    srcimg = cv2.imread(args.imgpath)
    srcimg = net.detect(srcimg)
    winName = 'Deep learning object detection in OpenCV'
    cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
    cv2.imshow(winName, srcimg)
    cv2.waitKey(0)
    cv2.destroyAllWindows()