Add files via upload

2025-09-26 20:11:15 +08:00 · 2023-07-28 22:48:39 +08:00
commit ff8b593f06
4 changed files with 22154 additions and 0 deletions
--- a/desk.jpg
+++ b/desk.jpg
--- a/imagenet_21k_class_names.txt
+++ b/imagenet_21k_class_names.txt
--- a/main.cpp
+++ b/main.cpp
@@ -0,0 +1,193 @@
+#define _CRT_SECURE_NO_WARNINGS
+#include <iostream>
+#include <fstream>
+#include <numeric>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+//#include <cuda_provider_factory.h>  ///nvidia-cuda<64><61><EFBFBD><EFBFBD>
+#include <onnxruntime_cxx_api.h>
+
+using namespace cv;
+using namespace std;
+using namespace Ort;
+
+typedef struct BoxInfo
+{
+	int xmin;
+	int ymin;
+	int xmax;
+	int ymax;
+	float score;
+	string name;
+} BoxInfo;
+
+class Detic
+{
+public:
+	Detic(string modelpath);
+	vector<BoxInfo> detect(Mat cv_image);
+private:
+	void preprocess(Mat srcimg);
+	vector<float> input_image_;
+	int inpWidth;
+	int inpHeight;
+	vector<string> class_names;
+	const int max_size = 800;
+
+	//<2F>洢<EFBFBD><E6B4A2>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD>õĿ<C3B5>ִ<EFBFBD><D6B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+	Env env = Env(ORT_LOGGING_LEVEL_ERROR, "Head Pose Estimation");
+	Ort::Session *ort_session = nullptr;
+	SessionOptions sessionOptions = SessionOptions();
+	vector<char*> input_names;
+	vector<char*> output_names;
+	vector<vector<int64_t>> input_node_dims; // >=1 outputs
+	vector<vector<int64_t>> output_node_dims; // >=1 outputs
+};
+
+Detic::Detic(string model_path)
+{
+	//OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);  ///nvidia-cuda<64><61><EFBFBD><EFBFBD>
+	sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
+	std::wstring widestr = std::wstring(model_path.begin(), model_path.end());   ///<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>windowsϵͳ<CFB5><CDB3><EFBFBD><EFBFBD>ôд
+	ort_session = new Session(env, widestr.c_str(), sessionOptions);   ///<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>windowsϵͳ<CFB5><CDB3><EFBFBD><EFBFBD>ôд
+	///ort_session = new Session(env, model_path.c_str(), sessionOptions);  ///<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>linuxϵͳ<CFB5><CDB3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ôд
+
+	size_t numInputNodes = ort_session->GetInputCount();
+	size_t numOutputNodes = ort_session->GetOutputCount();
+	AllocatorWithDefaultOptions allocator;
+	for (int i = 0; i < numInputNodes; i++)
+	{
+		input_names.push_back(ort_session->GetInputName(i, allocator));
+		Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
+		auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
+		auto input_dims = input_tensor_info.GetShape();
+		input_node_dims.push_back(input_dims);
+	}
+	for (int i = 0; i < numOutputNodes; i++)
+	{
+		output_names.push_back(ort_session->GetOutputName(i, allocator));
+		Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
+		auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
+		auto output_dims = output_tensor_info.GetShape();
+		output_node_dims.push_back(output_dims);
+	}
+
+	ifstream ifs("imagenet_21k_class_names.txt");
+	string line;
+	while (getline(ifs, line))
+	{
+		this->class_names.push_back(line);  ///<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>RGBֵ
+	}
+}
+
+void Detic::preprocess(Mat srcimg)
+{
+	Mat dstimg;
+	cvtColor(srcimg, dstimg, COLOR_BGR2RGB);
+	int im_h = srcimg.rows;
+	int im_w = srcimg.cols;
+	float oh, ow, scale;
+	if (im_h < im_w)
+	{
+		scale = (float)max_size / (float)im_h;
+		oh = max_size;
+		ow = scale * (float)im_w;
+	}
+	else
+	{
+		scale = (float)max_size / (float)im_h;
+		oh = scale * (float)im_h;
+		ow = max_size;
+	}
+	float max_hw = std::max(oh, ow);
+	if (max_hw > max_size)
+	{
+		scale = (float)max_size / max_hw;
+		oh *= scale;
+		ow *= scale;
+	}
+
+	resize(dstimg, dstimg, Size(int(ow + 0.5), int(oh + 0.5)), INTER_LINEAR);
+	this->inpHeight = dstimg.rows;
+	this->inpWidth = dstimg.cols;
+	this->input_image_.resize(this->inpWidth * this->inpHeight * dstimg.channels());
+	int k = 0;
+	for (int c = 0; c < 3; c++)
+	{
+		for (int i = 0; i < this->inpHeight; i++)
+		{
+			for (int j = 0; j < this->inpWidth; j++)
+			{
+				float pix = dstimg.ptr<uchar>(i)[j * 3 + c];
+				this->input_image_[k] = pix;
+				k++;
+			}
+		}
+	}
+}
+
+vector<BoxInfo> Detic::detect(Mat srcimg)
+{
+	int im_h = srcimg.rows;
+	int im_w = srcimg.cols;
+	this->preprocess(srcimg);
+	array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
+
+	auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+	Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
+
+	// <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>
+	vector<Value> ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size());
+
+	const float *pred_boxes = ort_outputs[0].GetTensorMutableData<float>();
+	const float *scores = ort_outputs[1].GetTensorMutableData<float>();
+	const int *pred_classes = ort_outputs[2].GetTensorMutableData<int>();
+	//const float *pred_masks = ort_outputs[3].GetTensorMutableData<float>();
+
+	int num_box = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[0];
+	const float scale_x = float(im_w) / float(inpWidth);
+	const float scale_y = float(im_h) / float(inpHeight);
+	vector<BoxInfo> preds;
+	for (int i = 0; i < num_box; i++)
+	{
+		float xmin = pred_boxes[i * 4] * scale_x;
+		float ymin = pred_boxes[i * 4 + 1] * scale_y;
+		float xmax = pred_boxes[i * 4 + 2] * scale_x;
+		float ymax = pred_boxes[i * 4 + 3] * scale_y;
+		xmin = std::min(std::max(xmin, 0.f), float(im_w));
+		ymin = std::min(std::max(ymin, 0.f), float(im_h));
+		xmax = std::min(std::max(xmax, 0.f), float(im_w));
+		ymax = std::min(std::max(ymax, 0.f), float(im_h));
+
+		const float threshold = 0;
+		const float width = xmax - xmin;
+		const float height = ymax - ymin;
+		if (width > threshold && height > threshold)
+		{
+			preds.push_back({ int(xmin), int(ymin), int(xmax), int(ymax), scores[i], class_names[pred_classes[i]] });
+		}
+	}
+	return preds;
+}
+
+int main()
+{
+	Detic mynet("weights/Detic_C2_R50_640_4x_in21k.onnx");
+	string imgpath = "desk.jpg";
+	Mat srcimg = imread(imgpath);
+	vector<BoxInfo> preds = mynet.detect(srcimg);
+	for (size_t i = 0; i < preds.size(); ++i)
+	{
+		rectangle(srcimg, Point(preds[i].xmin, preds[i].ymin), Point(preds[i].xmax, preds[i].ymax), Scalar(0, 0, 255), 2);
+		string label = format("%.2f", preds[i].score);
+		label = preds[i].name + " :" + label;
+		putText(srcimg, label, Point(preds[i].xmin, preds[i].ymin - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 1);
+	}
+
+	//imwrite("result.jpg", srcimg);
+	static const string kWinName = "Deep learning object detection in ONNXRuntime";
+	namedWindow(kWinName, WINDOW_NORMAL);
+	imshow(kWinName, srcimg);
+	waitKey(0);
+	destroyAllWindows();
+}
--- a/main.py
+++ b/main.py
@@ -0,0 +1,118 @@
+import argparse
+import cv2
+import numpy as np
+import onnxruntime as ort
+
+
+class Detic():
+    def __init__(self, modelpath, detection_width=800, confThreshold=0.8):
+        # net = cv2.dnn.readNet(modelpath)
+        so = ort.SessionOptions()
+        so.log_severity_level = 3
+        self.session = ort.InferenceSession(modelpath, so)
+        model_inputs = self.session.get_inputs()
+        self.input_name = model_inputs[0].name
+        self.max_size = detection_width
+        self.confThreshold = confThreshold
+        self.class_names = list(map(lambda x: x.strip(), open('imagenet_21k_class_names.txt').readlines()))
+        self.assigned_colors = np.random.randint(0,high=256, size=(len(self.class_names), 3)).tolist()
+
+    def preprocess(self, srcimg):
+        im_h, im_w, _ = srcimg.shape
+        dstimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB)
+        if im_h < im_w:
+            scale = self.max_size / im_h
+            oh, ow = self.max_size, scale * im_w
+        else:
+            scale = self.max_size / im_w
+            oh, ow = scale * im_h, self.max_size
+
+        max_hw = max(oh, ow)
+        if max_hw > self.max_size:
+            scale = self.max_size / max_hw
+            oh *= scale
+            ow *= scale
+        ow = int(ow + 0.5)
+        oh = int(oh + 0.5)
+        dstimg = cv2.resize(dstimg, (ow, oh))
+        return dstimg
+
+    def post_processing(self, pred_boxes, scores, pred_classes, pred_masks, im_hw, pred_hw):
+        scale_x, scale_y = (im_hw[1] / pred_hw[1], im_hw[0] / pred_hw[0])
+
+        pred_boxes[:, 0::2] *= scale_x
+        pred_boxes[:, 1::2] *= scale_y
+        pred_boxes[:, [0, 2]] = np.clip(pred_boxes[:, [0, 2]], 0, im_hw[1])
+        pred_boxes[:, [1, 3]] = np.clip(pred_boxes[:, [1, 3]], 0, im_hw[0])
+
+        threshold = 0
+        widths = pred_boxes[:, 2] - pred_boxes[:, 0]
+        heights = pred_boxes[:, 3] - pred_boxes[:, 1]
+        keep = (widths > threshold) & (heights > threshold)
+
+        pred_boxes = pred_boxes[keep]
+        scores = scores[keep]
+        pred_classes = pred_classes[keep]
+        pred_masks = pred_masks[keep]
+
+        # mask_threshold = 0.5
+        # pred_masks = paste_masks_in_image(
+        #     pred_masks[:, 0, :, :], pred_boxes,
+        #     (im_hw[0], im_hw[1]), mask_threshold
+        # )
+
+        pred = {
+            'pred_boxes': pred_boxes,
+            'scores': scores,
+            'pred_classes': pred_classes,
+            'pred_masks': pred_masks,
+        }
+        return pred
+
+    def draw_predictions(self, img, predictions):
+        height, width = img.shape[:2]
+        default_font_size = int(max(np.sqrt(height * width) // 90, 10))
+        boxes = predictions["pred_boxes"].astype(np.int64)
+        scores = predictions["scores"]
+        classes_id = predictions["pred_classes"].tolist()
+        # masks = predictions["pred_masks"].astype(np.uint8)
+        num_instances = len(boxes)
+        print('detect', num_instances, 'instances')
+        for i in range(num_instances):
+            x0, y0, x1, y1 = boxes[i]
+            color = self.assigned_colors[classes_id[i]]
+            cv2.rectangle(img, (x0, y0), (x1, y1), color=color,thickness=default_font_size // 4)
+            text = "{} {:.0f}%".format(self.class_names[classes_id[i]], round(scores[i],2) * 100)
+            cv2.putText(img, text, (x0, y0 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=1, lineType=cv2.LINE_AA)
+        return img
+
+    def detect(self, srcimg):
+        im_h, im_w = srcimg.shape[:2]
+        dstimg = self.preprocess(srcimg)
+        pred_hw = dstimg.shape[:2]
+        input_image = np.expand_dims(dstimg.transpose(2, 0, 1), axis=0).astype(np.float32)
+
+        # Inference
+        pred_boxes, scores, pred_classes, pred_masks = self.session.run(None, {self.input_name: input_image})
+        preds = self.post_processing(pred_boxes, scores, pred_classes, pred_masks, (im_h, im_w), pred_hw)
+        return preds
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--imgpath", type=str, default='desk.jpg', help="image path")
+    parser.add_argument("--confThreshold", default=0.5, type=float, help='class confidence')
+    parser.add_argument("--modelpath", type=str, default='weights/Detic_C2_R50_640_4x_in21k.onnx', help="onnxmodel path")
+    args = parser.parse_args()
+
+    mynet = Detic(args.modelpath, confThreshold=args.confThreshold)
+    srcimg = cv2.imread(args.imgpath)
+    preds = mynet.detect(srcimg)
+    srcimg = mynet.draw_predictions(srcimg, preds)
+
+    # cv2.imwrite('result.jpg', srcimg)
+    winName = 'Deep learning Detic in ONNXRuntime'
+    cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
+    cv2.imshow(winName, srcimg)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()