diff --git a/global.go b/global.go new file mode 100644 index 0000000..bfbc2c9 --- /dev/null +++ b/global.go @@ -0,0 +1,16 @@ +package main + +import ort "github.com/yalue/onnxruntime_go" + +var ( + UseCoreML = false + Blank []float32 + ModelPath = "./yolov8m.onnx" + Yolo8Model ModelSession +) + +type ModelSession struct { + Session *ort.AdvancedSession + Input *ort.Tensor[float32] + Output *ort.Tensor[float32] +} diff --git a/go.mod b/go.mod index 4037770..b97d654 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module object_detector go 1.18 -require github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 - -require github.com/yalue/onnxruntime_go v0.0.0-20230331205425-1acf4f2a2e42 // indirect +require ( + github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 + github.com/yalue/onnxruntime_go v1.3.0 +) diff --git a/imagex.go b/imagex.go new file mode 100644 index 0000000..fd68456 --- /dev/null +++ b/imagex.go @@ -0,0 +1,109 @@ +package main + +import ( + "github.com/nfnt/resize" + "image" + "io" + "math" + "sort" +) + +// Function used to convert RAW output from YOLOv8 to an array +// of detected objects. Each object contain the bounding box of +// this object, the type of object and the probability +// Returns array of detected objects in a format [[x1,y1,x2,y2,object_type,probability],..] +func process_output(output []float32, img_width, img_height int64) [][]interface{} { + boxes := [][]interface{}{} + for index := 0; index < 8400; index++ { + class_id, prob := 0, float32(0.0) + for col := 0; col < 80; col++ { + if output[8400*(col+4)+index] > prob { + prob = output[8400*(col+4)+index] + class_id = col + } + } + if prob < 0.5 { + continue + } + label := yolo_classes[class_id] + xc := output[index] + yc := output[8400+index] + w := output[2*8400+index] + h := output[3*8400+index] + x1 := (xc - w/2) / 640 * float32(img_width) + y1 := (yc - h/2) / 640 * float32(img_height) + x2 := (xc + w/2) / 640 * float32(img_width) + y2 := (yc + h/2) / 640 * float32(img_height) + boxes = append(boxes, []interface{}{float64(x1), float64(y1), float64(x2), float64(y2), label, prob}) + } + + sort.Slice(boxes, func(i, j int) bool { + return boxes[i][5].(float32) < boxes[j][5].(float32) + }) + result := [][]interface{}{} + for len(boxes) > 0 { + result = append(result, boxes[0]) + tmp := [][]interface{}{} + for _, box := range boxes { + if iou(boxes[0], box) < 0.7 { + tmp = append(tmp, box) + } + } + boxes = tmp + } + return result +} + +// Function calculates "Intersection-over-union" coefficient for specified two boxes +// https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/. +// Returns Intersection over union ratio as a float number +func iou(box1, box2 []interface{}) float64 { + return intersection(box1, box2) / union(box1, box2) +} + +// Function calculates union area of two boxes +// Returns Area of the boxes union as a float number +func union(box1, box2 []interface{}) float64 { + box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64) + box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64) + box1_area := (box1_x2 - box1_x1) * (box1_y2 - box1_y1) + box2_area := (box2_x2 - box2_x1) * (box2_y2 - box2_y1) + return box1_area + box2_area - intersection(box1, box2) +} + +// Function calculates intersection area of two boxes +// Returns Area of intersection of the boxes as a float number +func intersection(box1, box2 []interface{}) float64 { + box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64) + box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64) + x1 := math.Max(box1_x1, box2_x1) + y1 := math.Max(box1_y1, box2_y1) + x2 := math.Min(box1_x2, box2_x2) + y2 := math.Min(box1_y2, box2_y2) + return (x2 - x1) * (y2 - y1) +} + +// Function used to convert input image to tensor, +// required as an input to YOLOv8 object detection +// network. +// Returns the input tensor, original image width and height +func prepare_input(buf io.Reader) ([]float32, int64, int64) { + img, _, _ := image.Decode(buf) + size := img.Bounds().Size() + img_width, img_height := int64(size.X), int64(size.Y) + img = resize.Resize(640, 640, img, resize.Lanczos3) + red := []float32{} + green := []float32{} + blue := []float32{} + for y := 0; y < 640; y++ { + for x := 0; x < 640; x++ { + r, g, b, _ := img.At(x, y).RGBA() + red = append(red, float32(r/257)/255.0) + green = append(green, float32(g/257)/255.0) + blue = append(blue, float32(b/257)/255.0) + } + } + input := append(red, green...) + input = append(input, blue...) + return input, img_width, img_height +} diff --git a/main.go b/main.go index 1f88a9c..2c3b9c5 100644 --- a/main.go +++ b/main.go @@ -2,17 +2,13 @@ package main import ( "encoding/json" - "github.com/nfnt/resize" - ort "github.com/yalue/onnxruntime_go" - "image" + "fmt" _ "image/gif" _ "image/jpeg" _ "image/png" "io" - "math" "net/http" "os" - "sort" ) // Main function that defines @@ -43,7 +39,10 @@ func index(w http.ResponseWriter, _ *http.Request) { func detect(w http.ResponseWriter, r *http.Request) { r.ParseMultipartForm(0) file, _, _ := r.FormFile("image_file") - boxes := detect_objects_on_image(file) + boxes, err := detect_objects_on_image(file) + if err != nil { + fmt.Println(err.Error()) + } buf, _ := json.Marshal(&boxes) w.Write(buf) } @@ -53,143 +52,33 @@ func detect(w http.ResponseWriter, r *http.Request) { // and returns an array of detected objects // and their bounding boxes // Returns Array of bounding boxes in format [[x1,y1,x2,y2,object_type,probability],..] -func detect_objects_on_image(buf io.Reader) [][]interface{} { +func detect_objects_on_image(buf io.Reader) ([][]interface{}, error) { input, img_width, img_height := prepare_input(buf) - output := run_model(input) - return process_output(output, img_width, img_height) -} - -// Function used to convert input image to tensor, -// required as an input to YOLOv8 object detection -// network. -// Returns the input tensor, original image width and height -func prepare_input(buf io.Reader) ([]float32, int64, int64) { - img, _, _ := image.Decode(buf) - size := img.Bounds().Size() - img_width, img_height := int64(size.X), int64(size.Y) - img = resize.Resize(640, 640, img, resize.Lanczos3) - red := []float32{} - green := []float32{} - blue := []float32{} - for y := 0; y < 640; y++ { - for x := 0; x < 640; x++ { - r, g, b, _ := img.At(x, y).RGBA() - red = append(red, float32(r/257)/255.0) - green = append(green, float32(g/257)/255.0) - blue = append(blue, float32(b/257)/255.0) - } + output, err := run_model(input) + if err != nil { + return nil, err } - input := append(red, green...) - input = append(input, blue...) - return input, img_width, img_height + + data := process_output(output, img_width, img_height) + + return data, nil } // Function used to pass provided input tensor to // YOLOv8 neural network and return result // Returns raw output of YOLOv8 network as a single dimension // array -func run_model(input []float32) []float32 { - ort.SetSharedLibraryPath("./libonnxruntime.so") - _ = ort.InitializeEnvironment() +func run_model(input []float32) ([]float32, error) { - inputShape := ort.NewShape(1, 3, 640, 640) - inputTensor, _ := ort.NewTensor(inputShape, input) + var err error - outputShape := ort.NewShape(1, 84, 8400) - outputTensor, _ := ort.NewEmptyTensor[float32](outputShape) - - session, _ := ort.NewSession[float32]("./yolov8m.onnx", - []string{"images"}, []string{"output0"}, - []*ort.Tensor[float32]{inputTensor}, []*ort.Tensor[float32]{outputTensor}) - - _ = session.Run() - return outputTensor.GetData() -} - -// Function used to convert RAW output from YOLOv8 to an array -// of detected objects. Each object contain the bounding box of -// this object, the type of object and the probability -// Returns array of detected objects in a format [[x1,y1,x2,y2,object_type,probability],..] -func process_output(output []float32, img_width, img_height int64) [][]interface{} { - boxes := [][]interface{}{} - for index := 0; index < 8400; index++ { - class_id, prob := 0, float32(0.0) - for col := 0; col < 80; col++ { - if output[8400*(col+4)+index] > prob { - prob = output[8400*(col+4)+index] - class_id = col - } + if Yolo8Model.Session == nil { + Yolo8Model, err = InitYolo8Session(input) + if err != nil { + return nil, err } - if prob < 0.5 { - continue - } - label := yolo_classes[class_id] - xc := output[index] - yc := output[8400+index] - w := output[2*8400+index] - h := output[3*8400+index] - x1 := (xc - w/2) / 640 * float32(img_width) - y1 := (yc - h/2) / 640 * float32(img_height) - x2 := (xc + w/2) / 640 * float32(img_width) - y2 := (yc + h/2) / 640 * float32(img_height) - boxes = append(boxes, []interface{}{float64(x1), float64(y1), float64(x2), float64(y2), label, prob}) } - sort.Slice(boxes, func(i, j int) bool { - return boxes[i][5].(float32) < boxes[j][5].(float32) - }) - result := [][]interface{}{} - for len(boxes) > 0 { - result = append(result, boxes[0]) - tmp := [][]interface{}{} - for _, box := range boxes { - if iou(boxes[0], box) < 0.7 { - tmp = append(tmp, box) - } - } - boxes = tmp - } - return result -} + return runInference(Yolo8Model, input) -// Function calculates "Intersection-over-union" coefficient for specified two boxes -// https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/. -// Returns Intersection over union ratio as a float number -func iou(box1, box2 []interface{}) float64 { - return intersection(box1, box2) / union(box1, box2) -} - -// Function calculates union area of two boxes -// Returns Area of the boxes union as a float number -func union(box1, box2 []interface{}) float64 { - box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64) - box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64) - box1_area := (box1_x2 - box1_x1) * (box1_y2 - box1_y1) - box2_area := (box2_x2 - box2_x1) * (box2_y2 - box2_y1) - return box1_area + box2_area - intersection(box1, box2) -} - -// Function calculates intersection area of two boxes -// Returns Area of intersection of the boxes as a float number -func intersection(box1, box2 []interface{}) float64 { - box1_x1, box1_y1, box1_x2, box1_y2 := box1[0].(float64), box1[1].(float64), box1[2].(float64), box1[3].(float64) - box2_x1, box2_y1, box2_x2, box2_y2 := box2[0].(float64), box2[1].(float64), box2[2].(float64), box2[3].(float64) - x1 := math.Max(box1_x1, box2_x1) - y1 := math.Max(box1_y1, box2_y1) - x2 := math.Min(box1_x2, box2_x2) - y2 := math.Min(box1_y2, box2_y2) - return (x2 - x1) * (y2 - y1) -} - -// Array of YOLOv8 class labels -var yolo_classes = []string{ - "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", - "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", - "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", - "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", - "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", - "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", - "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", - "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", - "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush", } diff --git a/onnx_util.go b/onnx_util.go new file mode 100644 index 0000000..1912abc --- /dev/null +++ b/onnx_util.go @@ -0,0 +1,99 @@ +package main + +import ( + ort "github.com/yalue/onnxruntime_go" + "runtime" +) + +func InitYolo8Session(input []float32) (ModelSession, error) { + ort.SetSharedLibraryPath(getSharedLibPath()) + err := ort.InitializeEnvironment() + if err != nil { + return ModelSession{}, err + } + + inputShape := ort.NewShape(1, 3, 640, 640) + inputTensor, err := ort.NewTensor(inputShape, input) + if err != nil { + return ModelSession{}, err + } + + outputShape := ort.NewShape(1, 84, 8400) + outputTensor, err := ort.NewEmptyTensor[float32](outputShape) + if err != nil { + return ModelSession{}, err + } + + options, e := ort.NewSessionOptions() + if e != nil { + return ModelSession{}, err + } + + if UseCoreML { // If CoreML is enabled, append the CoreML execution provider + e = options.AppendExecutionProviderCoreML(0) + if e != nil { + options.Destroy() + return ModelSession{}, err + } + defer options.Destroy() + } + + session, err := ort.NewAdvancedSession(ModelPath, + []string{"images"}, []string{"output0"}, + []ort.ArbitraryTensor{inputTensor}, []ort.ArbitraryTensor{outputTensor}, options) + + if err != nil { + return ModelSession{}, err + } + + modelSes := ModelSession{ + Session: session, + Input: inputTensor, + Output: outputTensor, + } + + return modelSes, err +} + +func getSharedLibPath() string { + if runtime.GOOS == "windows" { + if runtime.GOARCH == "amd64" { + return "./third_party/onnxruntime.dll" + } + } + if runtime.GOOS == "darwin" { + if runtime.GOARCH == "arm64" { + return "./third_party/onnxruntime_arm64.dylib" + } + } + if runtime.GOOS == "linux" { + if runtime.GOARCH == "arm64" { + return "../third_party/onnxruntime_arm64.so" + } + return "./third_party/onnxruntime.so" + } + panic("Unable to find a version of the onnxruntime library supporting this system.") +} + +// Array of YOLOv8 class labels +var yolo_classes = []string{ + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", + "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", + "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", + "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", + "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", + "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush", +} + +func runInference(modelSes ModelSession, input []float32) ([]float32, error) { + inTensor := modelSes.Input.GetData() + copy(inTensor, input) + err := modelSes.Session.Run() + if err != nil { + return nil, err + } + return modelSes.Output.GetData(), nil +} diff --git a/third_party/onnxruntime.dll b/third_party/onnxruntime.dll new file mode 100644 index 0000000..872c1a5 Binary files /dev/null and b/third_party/onnxruntime.dll differ diff --git a/libonnxruntime.so b/third_party/onnxruntime.so old mode 100755 new mode 100644 similarity index 66% rename from libonnxruntime.so rename to third_party/onnxruntime.so index eb35d62..fa16542 Binary files a/libonnxruntime.so and b/third_party/onnxruntime.so differ diff --git a/third_party/onnxruntime_arm64.dylib b/third_party/onnxruntime_arm64.dylib new file mode 100644 index 0000000..13721a7 Binary files /dev/null and b/third_party/onnxruntime_arm64.dylib differ diff --git a/libonnxruntime.so.1.14.1 b/third_party/onnxruntime_arm64.so old mode 100755 new mode 100644 similarity index 60% rename from libonnxruntime.so.1.14.1 rename to third_party/onnxruntime_arm64.so index eb35d62..80bfcce Binary files a/libonnxruntime.so.1.14.1 and b/third_party/onnxruntime_arm64.so differ