removed debug from yolov5-seg postprocessing. added readme for yolov5-seg

2025-09-26 19:31:12 +08:00 · 2024-08-20 01:49:39 +12:00
parent 9b5b319568
commit fffac2fb8a
8 changed files with 111 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -1,6 +1,9 @@

 # go-rknnlite

+
+![go-rknnlite-logo.jpg](go-rknnlite-logo.jpg)
+
 go-rknnlite provides Go language bindings for the [RKNN Toolkit2](https://github.com/airockchip/rknn-toolkit2/tree/master)
 C API interface.  It aims to provide lite bindings in the spirit of the closed source
 Python lite bindings used for running AI Inference models on the Rockchip NPU 
@@ -67,11 +70,13 @@ See the [example](example) directory.
  * [YOLOv8 Demo](example/yolov8)
  * [YOLOv10 Demo](example/yolov10)
  * [YOLOX Demo](example/yolox)
+* Instance Segmentation
+  * [YOLOv5-seg Demo](example/yolov5-seg) - Instance Segmentation using YOLOv5. 
 * License Plate Recognition
  * [LPRNet Demo](example/lprnet) 
-  * [ALPR Demo](example/alpr) - Automatic License Plate Recognition combining Yolov8 and LPRNet Models
+  * [ALPR Demo](example/alpr) - Automatic License Plate Recognition combining Yolov8 and LPRNet Models.
 * Text Identification
-  * [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text
+  * [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text.
  * [PPOCR Recognise](example/ppocr#ppocr-recognise) - Takes an area of text and performs OCR on it.
  * [PPOCR System](example/ppocr#ppocr-system) - Combines both Detect and Recognise.
 * Streaming
--- a/example/yolov5-seg/README.md
+++ b/example/yolov5-seg/README.md
@@ -0,0 +1,102 @@
+# YOLOv5-seg Example
+
+This demo uses a YOLOv5-seg model to detect objects and provides 
+instance segmentation. 
+
+
+## Usage
+
+Make sure you have downloaded the data files first for the examples.
+You only need to do this once for all examples.
+
+```
+cd example/
+git clone https://github.com/swdee/go-rknnlite-data.git data
+```
+
+Run the YOLOv5-seg example.
+```
+cd example/yolov5-seg
+go run yolov5-seg.go
+```
+
+This will result in the output of:
+```
+Driver Version: 0.8.2, API Version: 1.6.0 (9a7b5d24c@2023-12-13T17:31:11)
+Model Input Number: 1, Ouput Number: 7
+Input tensors:
+  index=0, name=images, n_dims=4, dims=[1, 640, 640, 3], n_elems=1228800, size=1228800, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+Output tensors:
+  index=0, name=output0, n_dims=4, dims=[1, 255, 80, 80], n_elems=1632000, size=1632000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+  index=1, name=output1, n_dims=4, dims=[1, 96, 80, 80], n_elems=614400, size=614400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=20, scale=0.022222
+  index=2, name=376, n_dims=4, dims=[1, 255, 40, 40], n_elems=408000, size=408000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003922
+  index=3, name=377, n_dims=4, dims=[1, 96, 40, 40], n_elems=153600, size=153600, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=29, scale=0.023239
+  index=4, name=379, n_dims=4, dims=[1, 255, 20, 20], n_elems=102000, size=102000, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.003918
+  index=5, name=380, n_dims=4, dims=[1, 96, 20, 20], n_elems=38400, size=38400, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=32, scale=0.024074
+  index=6, name=371, n_dims=4, dims=[1, 32, 160, 160], n_elems=819200, size=819200, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-116, scale=0.022475
+dog @ (197 83 357 299) 0.786010
+cat @ (714 101 900 336) 0.706588
+dog @ (312 93 526 304) 0.693387
+cat @ (28 113 171 292) 0.641764
+cat @ (530 141 712 299) 0.616804
+Model first run speed: inference=54.833654ms, post processing=54.50291ms, rendering=7.934044ms, total time=117.270608ms
+Saved object detection result to ../data/catdog-yolov5-seg-out.jpg
+done
+```
+
+The saved JPG image with instance segmentation outlines.
+
+![catdog-outline.jpg](catdog-outline.jpg)
+
+
+See the help for command line parameters.
+```
+$ go run yolov5-seg.go --help
+
+Usage of /tmp/go-build401282281/b001/exe/yolov5-seg:
+  -i string
+        Image file to run object detection on (default "../data/catdog.jpg")
+  -l string
+        Text file containing model labels (default "../data/coco_80_labels_list.txt")
+  -m string
+        RKNN compiled YOLO model file (default "../data/yolov5s-seg-640-640-rk3588.rknn")
+  -o string
+        The output JPG file with object detection markers (default "../data/catdog-yolov5-seg-out.jpg")
+  -r string
+        The rendering format used for instance segmentation [outline|mask|dump] (default "outline")
+```
+
+
+## Rendering Methods
+
+The default rendering method is to draw an outline around the edge of the detected
+object as depicted in the image above.   This method however takes the most
+resources to calculate and is more noticable if the scene has more objects in it.
+
+A faster method of rendering is also provided which draws the bounding boxes around
+the object and provides a single transparent overlay to indicate the segment mask.
+
+This can be output with the following flag.
+```
+go run yolov5-seg.go -r mask
+```
+
+![catdog-mask.jpg](catdog-mask.jpg)
+
+For visualisation and debugging purposes the segmentation mask can also be dumped
+to an image.
+```
+go run yolov5-seg.go -r dump
+```
+
+![catdog-dump.jpg](catdog-dump.jpg)
+
+
+
+
+
+## Background
+
+This YOLOv5-seg example is a Go conversion of the [C API example](https://github.com/airockchip/rknn_model_zoo/blob/main/examples/yolov5_seg/cpp/main.cc)
+with improvements made to it inspired by [Ultralytics Instance Segmentation](https://docs.ultralytics.com/guides/instance-segmentation-and-tracking/#what-is-instance-segmentation).
+
--- a/example/yolov5-seg/catdog-dump.jpg
+++ b/example/yolov5-seg/catdog-dump.jpg
--- a/example/yolov5-seg/catdog-mask.jpg
+++ b/example/yolov5-seg/catdog-mask.jpg
--- a/example/yolov5-seg/catdog-outline.jpg
+++ b/example/yolov5-seg/catdog-outline.jpg
--- a/example/yolov5-seg/yolov5-seg.go
+++ b/example/yolov5-seg/yolov5-seg.go
@@ -18,9 +18,9 @@ func main() {

 	// read in cli flags
 	modelFile := flag.String("m", "../data/yolov5s-seg-640-640-rk3588.rknn", "RKNN compiled YOLO model file")
-	imgFile := flag.String("i", "../data/bus.jpg", "Image file to run object detection on")
+	imgFile := flag.String("i", "../data/catdog.jpg", "Image file to run object detection on")
 	labelFile := flag.String("l", "../data/coco_80_labels_list.txt", "Text file containing model labels")
-	saveFile := flag.String("o", "../data/bus-yolov5-seg-out.jpg", "The output JPG file with object detection markers")
+	saveFile := flag.String("o", "../data/catdog-yolov5-seg-out.jpg", "The output JPG file with object detection markers")
 	renderFormat := flag.String("r", "outline", "The rendering format used for instance segmentation [outline|mask|dump]")

 	flag.Parse()
--- a/go-rknnlite-logo.jpg
+++ b/go-rknnlite-logo.jpg
--- a/postprocess/yolov5-seg.go
+++ b/postprocess/yolov5-seg.go
@@ -3,10 +3,8 @@ package postprocess
 import (
 	"github.com/swdee/go-rknnlite"
 	"github.com/swdee/go-rknnlite/preprocess"
-	"log"
 	"runtime"
 	"sync"
-	"time"
 )

 // YOLOv5Seg defines the struct for YOLOv5Seg model inference post processing
@@ -245,8 +243,6 @@ func (y *YOLOv5Seg) DetectObjects(outputs *rknnlite.Outputs,
 	// greater than 6 boxes. the parallel version has a negative consequence
 	// in that it effects the performance of the resizeByOpenCVUint8() call
 	// afterwards due to the overhead of the goroutines being cleaned up.
-	start := time.Now()
-
 	var matmulOut []uint8
 	if boxesNum > 6 {
 		matmulOut = y.matmulUint8Parallel(data, boxesNum)
@@ -254,27 +250,18 @@ func (y *YOLOv5Seg) DetectObjects(outputs *rknnlite.Outputs,
 		matmulOut = y.matmulUint8(data, boxesNum)
 	}

-	log.Printf("DEBUG: matmul use: %dms\n", time.Since(start).Milliseconds())
-
 	// resize to (boxes_num, model_in_width, model_in_height)
-	start = time.Now()
 	segMask := make([]uint8, boxesNum*int(data.height*data.width))

 	resizeByOpenCVUint8(matmulOut, protoWeight, protoHeight,
 		boxesNum, segMask, int(data.width), int(data.height))

-	log.Printf("DEBUG: resize by opencv use: %dms\n", time.Since(start).Milliseconds())
-
 	// crop mask
-	start = time.Now()
 	allMaskInOne := make([]uint8, data.height*data.width)
 	cropMaskWithIDUint8(segMask, allMaskInOne, filterBoxesByNMS, boxesNum,
 		int(data.height), int(data.width))

-	log.Printf("DEBUG: crop mask use: %dms\n", time.Since(start).Milliseconds())
-
 	// get real mask
-	start = time.Now()
 	croppedHeight := int(data.height) - resizer.YPad()*2
 	croppedWidth := int(data.width) - resizer.XPad()*2

@@ -286,8 +273,6 @@ func (y *YOLOv5Seg) DetectObjects(outputs *rknnlite.Outputs,
 		resizer.SrcHeight(), resizer.SrcWidth(), resizer.YPad(), resizer.XPad(),
 	)

-	log.Printf("DEBUG: seg reverse use: %dms\n", time.Since(start).Milliseconds())
-
 	return group, SegMask{realSegMask, boxIDs}
 }