From 51d5c80ce0a24156d8c945dbb48208ee25698c60 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 30 Jun 2025 13:26:10 +1200 Subject: [PATCH 01/16] modified docker toolkit image to compile batch models --- toolkit/Dockerfile | 12 +++++++++--- toolkit/compile-models.sh | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/toolkit/Dockerfile b/toolkit/Dockerfile index f641bc8..a20d4f1 100644 --- a/toolkit/Dockerfile +++ b/toolkit/Dockerfile @@ -73,9 +73,6 @@ RUN git clone --depth 1 https://github.com/swdee/lpd-yolov8.git /opt/lpd-yolov8 cp /opt/rknn_model_zoo/examples/yolov8/python/convert.py /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py && \ sed -i "s|^DATASET_PATH *= *['\"].*['\"]|DATASET_PATH = '/opt/lpd-yolov8/subset.txt'|" /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py -# download other onnx models -RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build - # Upgrade pip to the latest version RUN pip install --upgrade pip @@ -92,6 +89,15 @@ RUN pip install --no-cache-dir \ pyyaml \ "tensorflow<=2.16.0rc0" +# download other onnx models +RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build && \ + git -C /opt/go-rknnlite-build fetch --depth 1 origin 15dd75d4bc23486931e860f8ddd5e505c4003aba && \ + git -C /opt/go-rknnlite-build checkout FETCH_HEAD + +# copy our modified mobilenet.py script into the rknn_model_zoo directory +RUN cp /opt/go-rknnlite-build/mobilenet-batch/mobilenet-rknn.py /opt/rknn_model_zoo/examples/mobilenet/python/mobilenet-rknn-batch.py + + # By default do nothing CMD ["bash"] diff --git a/toolkit/compile-models.sh b/toolkit/compile-models.sh index 8ef120b..a5b81de 100755 --- a/toolkit/compile-models.sh +++ b/toolkit/compile-models.sh @@ -30,6 +30,7 @@ MODELS=( "mobilenet_v1 rknn_convert /opt/models/mobilenet_v1/model_config.yml '' '' mobilenet_v1" "yolov8 convert-lpd.py /opt/lpd-yolov8/lpd-yolov8n.onnx i8 '' lpd-yolov8n" "yolov8 convert.py /opt/go-rknnlite-build/yolonas-s.onnx i8 '' yolonas-s" + "mobilenet mobilenet-rknn-batch.py ../model/mobilenetv2-12.onnx i8 --model mobilenetv2-batch8" ) # compile all entries (or just filter) for one platform From 207a0a5a41d59256e37e259fe9a39f1ac9550b61 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 30 Jun 2025 13:27:08 +1200 Subject: [PATCH 02/16] fix issue on inferfence logic used to calculate tensor size to support 4D (batch) tensors --- inference.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference.go b/inference.go index 66b28e4..1ec3ade 100644 --- a/inference.go +++ b/inference.go @@ -60,7 +60,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) { Index: uint32(idx), Type: TensorFloat32, // multiply by 4 for size of float32 - Size: uint32(mat.Cols() * mat.Rows() * mat.Channels() * 4), + Size: uint32(len(data) * 4), // bytes = elements * 4 Fmt: TensorNHWC, Buf: unsafe.Pointer(&data[0]), PassThrough: false, @@ -77,7 +77,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) { inputs[idx] = Input{ Index: uint32(idx), Type: TensorUint8, - Size: uint32(mat.Cols() * mat.Rows() * mat.Channels()), + Size: uint32(len(data)), // bytes = elements Fmt: TensorNHWC, Buf: unsafe.Pointer(&data[0]), PassThrough: false, From 353d906b73f0e5e1be17669f398f9cec50efa9e9 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 30 Jun 2025 13:28:04 +1200 Subject: [PATCH 03/16] added support for batching images on the runtime with batch example --- README.md | 19 +++ batch.go | 188 +++++++++++++++++++++++ batch_test.go | 329 ++++++++++++++++++++++++++++++++++++++++ example/batch/README.md | 199 ++++++++++++++++++++++++ example/batch/batch.go | 221 +++++++++++++++++++++++++++ 5 files changed, 956 insertions(+) create mode 100644 batch.go create mode 100644 batch_test.go create mode 100644 example/batch/README.md create mode 100644 example/batch/batch.go diff --git a/README.md b/README.md index c20e6b0..489d7cb 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ See the [example](example) directory. * Image Classification * [MobileNet Demo](example/mobilenet) * [Pooled Runtime Usage](example/pool) + * [Batch Input Usage](example/batch) * Object Detection * [YOLOv5 Demo](example/yolov5) * [YOLOv8 Demo](example/yolov8) @@ -164,6 +165,24 @@ If you use `rknnlite.NewRuntimeByPlatform()` instead this will be automatically set for you. +## Runtime Inference + +Once a Runtime has been created inference is performed by passing the input +tensors. + +``` +rt.Inference([]gocv.Mat{}) +``` + +The `Inference()` function takes a slice of gocv.Mat's where the number of +elements in the slice corresponds to the total number of input tensors the +Model has. Typically most models only have a single input tensor so only a single +gocv.Mat would be passed here. + +If you want to pass multiple images in a single `Inference()` call, then you need +to use [Batching](example/batch). + + ## CPU Affinity The performance of the NPU is effected by which CPU cores your program runs on, so diff --git a/batch.go b/batch.go new file mode 100644 index 0000000..fe4f65f --- /dev/null +++ b/batch.go @@ -0,0 +1,188 @@ +package rknnlite + +import ( + "fmt" + "gocv.io/x/gocv" +) + +// Batch defines a struct used for concatenating a batch of gocv.Mat's +// together into a single gocv.Mat for use with image batching on +// a Model +type Batch struct { + mat gocv.Mat + // size of the batch + size int + // width is the input tensor size width + width int + // height is the input tensor size height + height int + // channels is the input tensor number of channels + channels int + // inputTypeFloat32 sets the runtime.inputTypeFloat32 value + inputTypeFloat32 bool + // matType is the Mat type images must be passed as + matType gocv.MatType + // matCnt is a counter for how many Mats have been added with Add() + matCnt int + // imgSize stores an images size made up from its elements + imgSize int +} + +// NewBatch creates a batch of concatenated Mats for the given input tensor +// and batch size +func (r *Runtime) NewBatch(batchSize, height, width, channels int) *Batch { + + // Choose output Mat type + var matType gocv.MatType + + if r.inputTypeFloat32 { + matType = gocv.MatTypeCV32F + } else { + matType = gocv.MatTypeCV8U + } + + shape := []int{batchSize, height, width, channels} + + return &Batch{ + size: batchSize, + height: height, + width: width, + channels: channels, + mat: gocv.NewMatWithSizes(shape, matType), + inputTypeFloat32: r.inputTypeFloat32, + matType: matType, + matCnt: 0, + imgSize: height * width * channels, + } +} + +// Add a Mat to the batch +func (b *Batch) Add(img gocv.Mat) error { + + // check if batch is full + if b.matCnt >= b.size { + return fmt.Errorf("batch full") + } + + res := b.addAt(b.matCnt, img) + + if res != nil { + return res + } + + // increment image counter + b.matCnt++ + return nil +} + +// AddAt adds a Mat to the batch at the specific index location +func (b *Batch) AddAt(idx int, img gocv.Mat) error { + + if idx < 0 || idx >= b.size { + return fmt.Errorf("index %d out of range [0-%d)", idx, b.size) + } + + return b.addAt(idx, img) +} + +// addAt adds a Mat to the specified index location +func (b *Batch) addAt(idx int, img gocv.Mat) error { + + // validate mat dimensions + if img.Rows() != b.height || img.Cols() != b.width || + img.Channels() != b.channels { + return fmt.Errorf("image does not match batch shape") + } + + if !img.IsContinuous() { + img = img.Clone() + } + + if b.inputTypeFloat32 { + // pointer of the batch mat + dstAll, err := b.mat.DataPtrFloat32() + + if err != nil { + return fmt.Errorf("error accessing float32 batch memory: %w", err) + } + + src, err := img.DataPtrFloat32() + + if err != nil { + return fmt.Errorf("error getting float32 data from image: %w", err) + } + + offset := idx * b.imgSize + copy(dstAll[offset:], src) + + } else { + // pointer of the batch mat + dstAll, err := b.mat.DataPtrUint8() + + if err != nil { + return fmt.Errorf("error accessing uint8 batch memory: %w", err) + } + + src, err := img.DataPtrUint8() + + if err != nil { + return fmt.Errorf("error getting uint8 data from image: %w", err) + } + + offset := idx * b.imgSize + copy(dstAll[offset:], src) + } + + return nil +} + +// GetOutputInt returns the tensor output for the specified image number +// as an int8 output. idx starts counting from 1 to (batchsize-1) +func (b *Batch) GetOutputInt(idx int, outputs Output, size int) ([]int8, error) { + + if idx < 0 || idx >= b.size { + return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size) + } + + offset := idx * size + + if offset+size > int(outputs.Size) { + return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size) + } + + return outputs.BufInt[offset : offset+size], nil +} + +// GetOutputF32 returns the tensor output for the specified image number +// as an float32 output. idx starts counting from 0 to (batchsize-1) +func (b *Batch) GetOutputF32(idx int, outputs Output, size int) ([]float32, error) { + + if idx < 0 || idx >= b.size { + return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size) + } + + offset := idx * size + + if offset+size > int(outputs.Size) { + return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size) + } + + return outputs.BufFloat[offset : offset+size], nil +} + +// Mat returns the concatenated mat +func (b *Batch) Mat() gocv.Mat { + return b.mat +} + +// Clear the batch so it can be reused again +func (b *Batch) Clear() { + // just reset the counter, we don't need to clear the underlying b.mat + // as it will be overwritten with Add() is called with new images + b.matCnt = 0 +} + +// Close the batch and free allocated memory +func (b *Batch) Close() error { + return b.mat.Close() +} diff --git a/batch_test.go b/batch_test.go new file mode 100644 index 0000000..e029b12 --- /dev/null +++ b/batch_test.go @@ -0,0 +1,329 @@ +package rknnlite + +import ( + "errors" + "flag" + "fmt" + "gocv.io/x/gocv" + "path/filepath" + "regexp" + "strconv" + "strings" + "testing" + "time" +) + +var modelFiles = flag.String("m", "osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn", + "RKNN compiled model files in format -batch{N1,N2,...,Nk}.rknn") +var rkPlatform = flag.String("p", "rk3588", + "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]") + +// ExpandModelPattern takes a pattern like +// +// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn" +// +// and returns: +// +// []string{ +// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch1.rknn", +// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch4.rknn", +// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch8.rknn", +// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch16.rknn", +// } +func expandModelPattern(pattern string) ([]modelBatches, error) { + + // split off the directory and file + dir, file := filepath.Split(pattern) + + // match exactly "-batch{n1,n2,...}.rknn" + re := regexp.MustCompile(`^(.+)-batch\{([\d,]+)\}\.rknn$`) + m := re.FindStringSubmatch(file) + + if m == nil { + return nil, errors.New("invalid pattern: must be name-batch{n1,n2,...}.rknn") + } + + prefix := m[1] // e.g. "osnet_x1_0_market_256x128-rk3588" + numsCSV := m[2] // e.g. "1,4,8,16" + nums := strings.Split(numsCSV, ",") + out := make([]modelBatches, 0, len(nums)) + + for _, strNum := range nums { + + num, err := strconv.Atoi(strNum) + + if err != nil { + return nil, fmt.Errorf("invalid batch size %q: %w", strNum, err) + } + + name := fmt.Sprintf("%s-batch%d.rknn", prefix, num) + + out = append(out, modelBatches{ + batchSize: num, + modelFile: filepath.Join(dir, name), + }) + } + + return out, nil +} + +type modelBatches struct { + batchSize int + modelFile string +} + +// BenchmarkBatchSize runs benchmarks against multiple models to work out per +// image inference time. +func BenchmarkBatchSize(b *testing.B) { + + flag.Parse() + + // from the modelFiles argument create a table of model files and corresponding + // batch sizes + cases, err := expandModelPattern(*modelFiles) + + if err != nil { + b.Fatalf("Invalid modelFile syntax: %v", err) + } + + const ( + height = 256 + width = 128 + channels = 3 + ) + + for _, tc := range cases { + tc := tc // capture + + b.Run(fmt.Sprintf("Batch%02d", tc.batchSize), func(b *testing.B) { + + // load the RKNN model for this batch size + err := SetCPUAffinityByPlatform(*rkPlatform, FastCores) + + if err != nil { + b.Fatalf("Failed to set CPU Affinity: %v", err) + } + + // check if user specified model file or if default is being used. if default + // then pick the default platform model to use. + modelFile := tc.modelFile + + if *rkPlatform != "rk3588" { + modelFile = strings.ReplaceAll(modelFile, "rk3588", *rkPlatform) + } + + // create rknn runtime instance + rt, err := NewRuntimeByPlatform(*rkPlatform, modelFile) + + if err != nil { + b.Fatalf("Error initializing RKNN runtime: %v", err) + } + + defer rt.Close() + + // set runtime to leave output tensors as int8 + rt.SetWantFloat(false) + + // prepare zero images + imgs := make([]gocv.Mat, tc.batchSize) + + for i := range imgs { + m := gocv.Zeros(height, width, gocv.MatTypeCV8UC3) + defer m.Close() + imgs[i] = m + } + + // pre-allocate the batch container + batch := rt.NewBatch(tc.batchSize, height, width, channels) + defer batch.Close() + + b.ResetTimer() + var totalInf time.Duration + + for i := 0; i < b.N; i++ { + batch.Clear() + start := time.Now() + + for _, img := range imgs { + if err := batch.Add(img); err != nil { + b.Fatalf("Add() error: %v", err) + } + } + + if _, err := rt.Inference([]gocv.Mat{batch.Mat()}); err != nil { + b.Fatalf("Inference() error: %v", err) + } + + totalInf += time.Since(start) + } + + b.StopTimer() + + // milliseconds per batch + msBatch := float64(totalInf.Nanoseconds()) / 1e6 / float64(b.N) + b.ReportMetric(msBatch, "ms/batch") + + // milliseconds per image + msImg := msBatch / float64(tc.batchSize) + b.ReportMetric(msImg, "ms/img") + + }) + } +} + +func TestBatchAddAndOverflow(t *testing.T) { + + r := &Runtime{inputTypeFloat32: false} + + batch := r.NewBatch(2, 2, 3, 1) + defer batch.Close() + + // create Mats with known data + m1 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U) + defer m1.Close() + + buf1, _ := m1.DataPtrUint8() + + for i := range buf1 { + buf1[i] = uint8(i + 1) // 1,2,3...6 + } + + m2 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U) + defer m2.Close() + + buf2, _ := m2.DataPtrUint8() + + for i := range buf2 { + buf2[i] = uint8((i + 1) * 10) // 10,20,...60 + } + + // Add two images + if err := batch.Add(m1); err != nil { + t.Fatalf("Add(m1) failed: %v", err) + } + + if err := batch.Add(m2); err != nil { + t.Fatalf("Add(m2) failed: %v", err) + } + + // Underlying batch mat should contain both + bMat := batch.Mat() + allData, err := bMat.DataPtrUint8() + + if err != nil { + t.Fatalf("DataPtrUint8 on batch failed: %v", err) + } + + // first 6 from buf1, next 6 from buf2 + for i := 0; i < 6; i++ { + if allData[i] != buf1[i] { + t.Errorf("element %d = %d; want %d from img1", i, allData[i], buf1[i]) + } + } + + for i := 0; i < 6; i++ { + if allData[6+i] != buf2[i] { + t.Errorf("element %d = %d; want %d from img2", 6+i, allData[6+i], buf2[i]) + } + } + + // third Add should overflow + m3 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U) + err3 := batch.Add(m3) + + if err3 == nil { + t.Fatal("expected overflow error on third Add, got nil") + } +} + +func TestBatchAddAtAndClear(t *testing.T) { + + r := &Runtime{inputTypeFloat32: false} + + batch := r.NewBatch(3, 2, 2, 1) + defer batch.Close() + + m := gocv.NewMatWithSize(2, 2, gocv.MatTypeCV8U) + defer m.Close() + + dat, _ := m.DataPtrUint8() + + for i := range dat { + dat[i] = uint8(i + 5) + } + + // AddAt index 1 + if err := batch.AddAt(1, m); err != nil { + t.Fatalf("AddAt failed: %v", err) + } + + // matCnt should still be zero + if batch.matCnt != 0 { + t.Errorf("matCnt = %d; want 0 after AddAt", batch.matCnt) + } + + // Clear resets matCnt + batch.Clear() + + if batch.matCnt != 0 { + t.Errorf("matCnt = %d; want 0 after Clear", batch.matCnt) + } + + // Add at invalid index + err := batch.AddAt(5, m) + + if err == nil { + t.Error("expected error for AddAt out of range, got nil") + } +} + +func TestGetOutputIntAndF32(t *testing.T) { + + r := &Runtime{inputTypeFloat32: false} + + batch := r.NewBatch(2, 2, 2, 1) + defer batch.Close() + + // Test GetOutputInt bounds + dOut := Output{BufInt: []int8{1, 2, 3, 4}, Size: 4} + + if _, err := batch.GetOutputInt(-1, dOut, 2); err == nil { + t.Error("expected error for GetOutputInt idx<0") + } + + if _, err := batch.GetOutputInt(2, dOut, 2); err == nil { + t.Error("expected error for GetOutputInt idx>=size") + } + + // valid slice + slice, err := batch.GetOutputInt(1, dOut, 2) + + if err != nil { + t.Errorf("GetOutputInt failed: %v", err) + } + + if len(slice) != 2 { + t.Errorf("len(slice) = %d; want 2", len(slice)) + } + + // Test GetOutputF32 bounds + dOutF := Output{BufFloat: []float32{1, 2, 3, 4}, Size: 4} + + if _, err := batch.GetOutputF32(-1, dOutF, 2); err == nil { + t.Error("expected error for GetOutputF32 idx<0") + } + + if _, err := batch.GetOutputF32(2, dOutF, 2); err == nil { + t.Error("expected error for GetOutputF32 idx>=size") + } + + sliceF, err := batch.GetOutputF32(0, dOutF, 2) + + if err != nil { + t.Errorf("GetOutputF32 failed: %v", err) + } + + if len(sliceF) != 2 { + t.Errorf("len(sliceF) = %d; want 2", len(sliceF)) + } +} diff --git a/example/batch/README.md b/example/batch/README.md new file mode 100644 index 0000000..51f99f5 --- /dev/null +++ b/example/batch/README.md @@ -0,0 +1,199 @@ + +# Batch Models + +## Overview + +Typically computer vision inference models have a single input tensor in +the shape of `NHWC` such as `[1,224,224,3]`. The rknn-toolkit2 allows you to +build the model with Batch tensor inputs by setting the `rknn_batch_size` parameter +in the following python conversion script. + +``` +rknn.build(do_quantization=do_quant, dataset=DATASET_PATH, rknn_batch_size=8) +``` + +This results in a .rknn model with modified tensor input dimensions of `[8,224,244,3]`. + +When taking input from a video source frame-by-frame, the use of batching to process +frames has little use case, as your only dealing with a single frame to be +processed as soon as possible. However batching can be useful if you have many +images to process at a single point in time, some examples of this could be; + * Running YOLO object detection on a frame, then passing all detected objects + through a ReIdentification model in batches. + * Some applications will buffer video frames and upon an external signal, it + will then trigger the processing of those buffered frames as a batch. + + +## Batch Sizing + +The NPU's in the different platforms RK356x, RK3576, and RK3588 have different +amounts of SRAM and NPU core numbers, so finding the optimal batch size for your +Model is critical. + +A benchmarking tool has been created to test different batch sizes of your own +RKNN Models. Use your python conversion script to compile the ONNX model to RKNN +with various `rknn_batch_size` values you would like to test. Name those RKNN +Models using this format `-batch{N1,N2,...,Nk}.rknn`. For example I wish +to test batch sizes of 1, 4, 8, and 16 of an OSNet model and have created the +following files and placed them in the directory `/tmp/models` on the host OS. +``` +osnet-batch1.rknn +osnet-batch4.rknn +osnet-batch8.rknn +osnet-batch16.rknn +``` + +We can then pass all these Models to the benchmark using the `-m` argument in +the format of `-m "/tmp/models/osnet-batch{1,4,8,16}"`. + +To run the benchmark of your models on the rk3588 or replace with your +Platform model. +``` +# from project root directory + +go test -bench=BenchmarkBatchSize -benchtime=10s \ + -args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}.rknn" +``` + +Similarly using Docker we can mount the `/tmp/models` directory and run. +``` +# from project root directory + +docker run --rm \ + --device /dev/dri:/dev/dri \ + -v "$(pwd):/go/src/app" \ + -v "$(pwd)/example/data:/go/src/data" \ + -v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \ + -v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \ + -v "/tmp/models/:/tmp/models/" \ + -w /go/src/app \ + swdee/go-rknnlite:latest \ + go test -bench=BenchmarkBatchSize -benchtime=10s \ + -args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}" +``` + +Running the above benchmark command outputs the following results. + +#### rk3588 + +``` +BenchmarkBatchSize/Batch01-8 1897 8806025 ns/op 8.806 ms/batch 8.806 ms/img +BenchmarkBatchSize/Batch04-8 885 21555109 ns/op 21.55 ms/batch 5.389 ms/img +BenchmarkBatchSize/Batch08-8 534 22335645 ns/op 22.34 ms/batch 2.792 ms/img +BenchmarkBatchSize/Batch16-8 303 40253162 ns/op 40.25 ms/batch 2.516 ms/img +``` + +#### rk3576 + +``` +BenchmarkBatchSize/Batch01-8 1312 8987117 ns/op 8.985 ms/batch 8.985 ms/img +BenchmarkBatchSize/Batch04-8 640 18836090 ns/op 18.83 ms/batch 4.709 ms/img +BenchmarkBatchSize/Batch08-8 385 31702649 ns/op 31.70 ms/batch 3.963 ms/img +BenchmarkBatchSize/Batch16-8 194 63801596 ns/op 63.80 ms/batch 3.988 ms/img +``` + +#### rk3566 + +``` +BenchmarkBatchSize/Batch01-4 661 18658568 ns/op 18.66 ms/batch 18.66 ms/img +BenchmarkBatchSize/Batch04-4 158 74716574 ns/op 74.71 ms/batch 18.68 ms/img +BenchmarkBatchSize/Batch08-4 70 155374027 ns/op 155.4 ms/batch 19.42 ms/img +BenchmarkBatchSize/Batch16-4 37 294969497 ns/op 295.0 ms/batch 18.44 ms/img +``` + + +### Interpreting Benchmark Results + + +The `ms/batch` metric represents the number of milliseconds it took for the +whole batch inference to run and `ms/img` represents the average number of +milliseconds it took to run inference per image. + +As can be seen in the rk3588 results the ideal batch size is 8 as it gives +a low `2.792` ms/img inference time versus total batch inference time of +`22.34ms`. The same applies to the rk3576. + +The rk3566 has a single core NPU, the results show there is no benefit +in running batching at all. + +These results were for an OSNet Model, it's possible that different Models perform +differently so you should run these benchmarks for your own application to +optimize accordingly. + + +## Usage + +An example batch program is provided that combines inferencing on a Pool of runtimes, +make sure you have downloaded the data files first for the examples. +You only need to do this once for all examples. + + +``` +cd example/ +git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data +``` + + +Run the batch example on rk3588 or replace with your Platform model. +``` +cd example/bacth +go run batch.go -s 3 -p rk3588 +``` + +This will result in the output of: +``` +Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33) +Model Input Number: 1, Ouput Number: 1 +Input tensors: + index=0, name=input, n_dims=4, dims=[8, 224, 224, 3], n_elems=1204224, size=1204224, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658 +Output tensors: + index=0, name=output, n_dims=2, dims=[8, 1000, 0, 0], n_elems=8000, size=8000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-55, scale=0.141923 +Running... +File ../data/imagenet/n01514859_hen.JPEG, inference time 40ms +File ../data/imagenet/n01518878_ostrich.JPEG, inference time 40ms +File ../data/imagenet/n01530575_brambling.JPEG, inference time 40ms +File ../data/imagenet/n01531178_goldfinch.JPEG, inference time 40ms +...snip... +File ../data/imagenet/n13054560_bolete.JPEG, inference time 8ms +File ../data/imagenet/n13133613_ear.JPEG, inference time 8ms +File ../data/imagenet/n15075141_toilet_tissue.JPEG, inference time 8ms +Processed 1000 images in 2.098619346s, average inference per image is 2.10ms +``` + +See the help for command line parameters. +``` +$ go run batch.go -h + +Usage of /tmp/go-build1506342544/b001/exe/batch: + -d string + A directory of images to run inference on (default "../data/imagenet/") + -m string + RKNN compiled model file (default "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn") + -p string + Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588") + -q Run in quiet mode, don't display individual inference results + -r int + Repeat processing image directory the specified number of times, use this if you don't have enough images (default 1) + -s int + Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3 (default 1) +``` + + + +### Docker + +To run the batch example using the prebuilt docker image, make sure the data files have been downloaded first, +then run. +``` +# from project root directory + +docker run --rm \ + --device /dev/dri:/dev/dri \ + -v "$(pwd):/go/src/app" \ + -v "$(pwd)/example/data:/go/src/data" \ + -v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \ + -v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \ + -w /go/src/app \ + swdee/go-rknnlite:latest \ + go run ./example/batch/batch.go -p rk3588 -s 3 +``` \ No newline at end of file diff --git a/example/batch/batch.go b/example/batch/batch.go new file mode 100644 index 0000000..ef5d1c5 --- /dev/null +++ b/example/batch/batch.go @@ -0,0 +1,221 @@ +package main + +import ( + "flag" + "github.com/swdee/go-rknnlite" + "gocv.io/x/gocv" + "image" + "log" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +var ( + // model input tensor dimensions, these values will be set + // when runtime queries the modelFile being loaded + height, width, channels, batchSize int +) + +func main() { + // disable logging timestamps + log.SetFlags(0) + + // read in cli flags + modelFile := flag.String("m", "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn", "RKNN compiled model file") + imgDir := flag.String("d", "../data/imagenet/", "A directory of images to run inference on") + poolSize := flag.Int("s", 1, "Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3") + repeat := flag.Int("r", 1, "Repeat processing image directory the specified number of times, use this if you don't have enough images") + quiet := flag.Bool("q", false, "Run in quiet mode, don't display individual inference results") + rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]") + + flag.Parse() + + // set cpu affinity to run on specific CPU cores + err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores) + + if err != nil { + log.Printf("Failed to set CPU Affinity: %v\n", err) + } + + // check dir exists + info, err := os.Stat(*imgDir) + + if err != nil { + log.Fatalf("No such image directory %s, error: %v\n", *imgDir, err) + } + + if !info.IsDir() { + log.Fatal("Image path is not a directory") + } + + // check if user specified model file or if default is being used. if default + // then pick the default platform model to use. + if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" { + *modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform) + } + + // create new pool, we pass NPUCoreAuto as RKNN does not allow batch Models + // to be pinned to specific NPU cores + useCore := rknnlite.NPUCoreAuto + + if strings.HasPrefix(strings.ToLower(*rkPlatform), "rk356") { + useCore = rknnlite.NPUSkipSetCore + } + + pool, err := rknnlite.NewPool(*poolSize, *modelFile, + []rknnlite.CoreMask{useCore}) + + if err != nil { + log.Fatalf("Error creating RKNN pool: %v\n", err) + } + + // set runtime to leave output tensors as int8 + pool.SetWantFloat(false) + + // get a runtime and query the input tensor dimensions of the model + rt := pool.Get() + + // optional querying of model file tensors and SDK version for printing + // to stdout. not necessary for production inference code + err = rt.Query(os.Stdout) + + if err != nil { + log.Fatal("Error querying runtime: ", err) + } + + batchSize = int(rt.InputAttrs()[0].Dims[0]) + width = int(rt.InputAttrs()[0].Dims[1]) + height = int(rt.InputAttrs()[0].Dims[2]) + channels = int(rt.InputAttrs()[0].Dims[3]) + + pool.Return(rt) + + // get list of all files in the directory + entries, err := os.ReadDir(*imgDir) + + if err != nil { + log.Fatalf("Error reading image directory: %v\n", err) + } + + var files []string + + for _, e := range entries { + if e.IsDir() { + continue + } + + files = append(files, filepath.Join(*imgDir, e.Name())) + } + + log.Println("Running...") + + // waitgroup used to wait for all go-routines to complete before closing + // the pool + const batchSize = 8 + var wg sync.WaitGroup + + start := time.Now() + + // repeat processing image set the specified number of times + for i := 0; i < *repeat; i++ { + // process image files in groups of batchSize + for offset := 0; offset < len(files); offset += batchSize { + + end := offset + batchSize + + if end > len(files) { + end = len(files) + } + + subset := files[offset:end] + + // pool.Get() blocks if no runtimes are available in the pool + rt := pool.Get() + wg.Add(1) + + go func(rt *rknnlite.Runtime, batchPaths []string) { + defer wg.Done() + processBatch(rt, batchPaths, *quiet) + pool.Return(rt) + }(rt, subset) + } + } + + wg.Wait() + + // calculate average inference + numFiles := (*repeat * len(files)) + end := time.Since(start) + avg := (end.Seconds() / float64(numFiles)) * 1000 + + log.Printf("Processed %d images in %s, average inference per image is %.2fms\n", + numFiles, end.String(), avg) + + pool.Close() +} + +func processBatch(rt *rknnlite.Runtime, paths []string, quiet bool) { + + // create batch + batch := rt.NewBatch(batchSize, height, width, channels) + defer batch.Close() + + // for each image path, load & preprocess, then Add to batch + for idx, file := range paths { + + img := gocv.IMRead(file, gocv.IMReadColor) + + if img.Empty() { + log.Printf("Error reading %s\n", file) + continue + } + + defer img.Close() + + // rgb + resize + rgbImg := gocv.NewMat() + gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB) + defer rgbImg.Close() + + cropImg := gocv.NewMat() + gocv.Resize(rgbImg, &cropImg, image.Pt(width, height), 0, 0, gocv.InterpolationArea) + defer cropImg.Close() + + if err := batch.AddAt(idx, cropImg); err != nil { + log.Printf("Batch.Add error: %v\n", err) + } + } + + // run inference on the entire batch at once + start := time.Now() + outputs, err := rt.Inference([]gocv.Mat{batch.Mat()}) + spent := time.Since(start) + + if err != nil { + log.Printf("Inference error: %v\n", err) + return + } + + defer outputs.Free() + + // unpack per image results + for idx := 0; idx < len(paths); idx++ { + + if quiet { + continue + } + + // get int8 output tensor for image at idx + _, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + log.Printf("GetOutputInt[%d] error: %v\n", idx, err) + continue + } + + log.Printf("File %s, inference time %dms\n", paths[idx], spent.Milliseconds()) + } +} From 5002860fdd977e3e2265f20bb1375a539254c7cc Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 30 Jun 2025 13:45:46 +1200 Subject: [PATCH 04/16] added API notes to batch readme --- example/batch/README.md | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/example/batch/README.md b/example/batch/README.md index 51f99f5..2fc3d28 100644 --- a/example/batch/README.md +++ b/example/batch/README.md @@ -196,4 +196,35 @@ docker run --rm \ -w /go/src/app \ swdee/go-rknnlite:latest \ go run ./example/batch/batch.go -p rk3588 -s 3 -``` \ No newline at end of file +``` + + +## API + +A convenience function `rknnlite.NewBatch()` is provided to concatenate individual +images into a single input tensor for the Model and then extract their results +from the combined outputs. + +``` +// create a new batch processor +batch := rt.NewBatch(batchSize, height, width, channels) +defer batch.Close() + + +for idx, file := range files { + + // add files to the batch at the given index + batch.AddAt(idx, file) + + // OR you can add images incrementally without specifying an index + batch.Add(file) +} + +// pass the concatenated Mat to the runtime for inference +outputs, err := rt.Inference([]gocv.Mat{batch.Mat()}) + +// then get a single image result by index +output, err := batch.GetOutputInt(4, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) +``` + +See the full example code for more details. \ No newline at end of file From d32a9c3010abea996a3f731100d25fee4e5389cc Mon Sep 17 00:00:00 2001 From: swdee Date: Wed, 2 Jul 2025 21:32:31 +1200 Subject: [PATCH 05/16] fix typo in batch readme --- example/batch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/batch/README.md b/example/batch/README.md index 2fc3d28..baf63e2 100644 --- a/example/batch/README.md +++ b/example/batch/README.md @@ -136,7 +136,7 @@ git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data Run the batch example on rk3588 or replace with your Platform model. ``` -cd example/bacth +cd example/batch go run batch.go -s 3 -p rk3588 ``` From 68af76e39b4d9b7cc9f06fda1f432683a057f9ad Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:28:06 +1200 Subject: [PATCH 06/16] adjust how NewBatch passed float/int output flag --- batch.go | 6 +++--- example/batch/batch.go | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/batch.go b/batch.go index fe4f65f..29ac75e 100644 --- a/batch.go +++ b/batch.go @@ -30,12 +30,12 @@ type Batch struct { // NewBatch creates a batch of concatenated Mats for the given input tensor // and batch size -func (r *Runtime) NewBatch(batchSize, height, width, channels int) *Batch { +func NewBatch(batchSize, height, width, channels int, inputTypeFloat32 bool) *Batch { // Choose output Mat type var matType gocv.MatType - if r.inputTypeFloat32 { + if inputTypeFloat32 { matType = gocv.MatTypeCV32F } else { matType = gocv.MatTypeCV8U @@ -49,7 +49,7 @@ func (r *Runtime) NewBatch(batchSize, height, width, channels int) *Batch { width: width, channels: channels, mat: gocv.NewMatWithSizes(shape, matType), - inputTypeFloat32: r.inputTypeFloat32, + inputTypeFloat32: inputTypeFloat32, matType: matType, matCnt: 0, imgSize: height * width * channels, diff --git a/example/batch/batch.go b/example/batch/batch.go index ef5d1c5..97d0c38 100644 --- a/example/batch/batch.go +++ b/example/batch/batch.go @@ -160,7 +160,8 @@ func main() { func processBatch(rt *rknnlite.Runtime, paths []string, quiet bool) { // create batch - batch := rt.NewBatch(batchSize, height, width, channels) + batch := rknnlite.NewBatch(batchSize, height, width, channels, + rt.GetInputTypeFloat32()) defer batch.Close() // for each image path, load & preprocess, then Add to batch From 7e53a80fbb7d27f04a761d542d9d495b008e7e19 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:28:29 +1200 Subject: [PATCH 07/16] added function to return pool size --- pool.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pool.go b/pool.go index c415ecf..b46a591 100644 --- a/pool.go +++ b/pool.go @@ -109,6 +109,11 @@ func (p *Pool) SetWantFloat(val bool) { } } +// Size returns the Pool size +func (p *Pool) Size() int { + return p.size +} + // getRuntimeCore takes an integer and returns the core mask value to use from // the coremask list func getRuntimeCore(i int, cores []CoreMask) CoreMask { From efa55890bd2e9205672662ca1ea23ce45aa282bf Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:29:07 +1200 Subject: [PATCH 08/16] add function to return runtime inputtypefloat value --- runtime.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime.go b/runtime.go index a5a0dc6..4bb8b0e 100644 --- a/runtime.go +++ b/runtime.go @@ -241,6 +241,11 @@ func (r *Runtime) SetInputTypeFloat32(val bool) { r.inputTypeFloat32 = val } +// GetInputTypeFloat32 returns the input type if set as Float32 (true) or Int8 (false) +func (r *Runtime) GetInputTypeFloat32() bool { + return r.inputTypeFloat32 +} + // SDKVersion represents the C.rknn_sdk_version struct type SDKVersion struct { DriverVersion string From e7b2d0e7769717035e214626ecd286c4b515cabe Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:29:52 +1200 Subject: [PATCH 09/16] added BatchPool functions to have a pool of batches --- batchpool.go | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 batchpool.go diff --git a/batchpool.go b/batchpool.go new file mode 100644 index 0000000..a09fbe7 --- /dev/null +++ b/batchpool.go @@ -0,0 +1,75 @@ +package rknnlite + +import ( + "sync" +) + +// BatchPool is a pool of batches +type BatchPool struct { + // pool of batches + batches chan *Batch + // size of pool + size int + close sync.Once +} + +// NewBatchPool returns a pool of Batches +func NewBatchPool(size int, rt *Runtime) *BatchPool { + + p := &BatchPool{ + batches: make(chan *Batch, size), + size: size, + } + + batchSize := int(rt.InputAttrs()[0].Dims[0]) + width := int(rt.InputAttrs()[0].Dims[1]) + height := int(rt.InputAttrs()[0].Dims[2]) + channels := int(rt.InputAttrs()[0].Dims[3]) + inputType := rt.GetInputTypeFloat32() + + // create batch pool to be the same size as the runtime pool + for i := 0; i < size; i++ { + batch := NewBatch( + batchSize, + height, + width, + channels, + inputType, + ) + + // attach to pool + p.Return(batch) + } + + return p +} + +// Gets a batch from the pool +func (p *BatchPool) Get() *Batch { + return <-p.batches +} + +// Return a batch to the pool +func (p *BatchPool) Return(batch *Batch) { + + batch.Clear() + + select { + case p.batches <- batch: + default: + // pool is full or closed + } +} + +// Close the pool and all batches in it +func (p *BatchPool) Close() { + p.close.Do(func() { + // close channel + close(p.batches) + + // close all runtimes + for next := range p.batches { + _ = next.Close() + } + }) +} From 2f87fc87e68f6cabb9001a005944ab90a6a2e709 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:30:41 +1200 Subject: [PATCH 10/16] added compiling of osnet models to toolkit --- toolkit/compile-models.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/toolkit/compile-models.sh b/toolkit/compile-models.sh index a5b81de..08867b9 100755 --- a/toolkit/compile-models.sh +++ b/toolkit/compile-models.sh @@ -31,6 +31,7 @@ MODELS=( "yolov8 convert-lpd.py /opt/lpd-yolov8/lpd-yolov8n.onnx i8 '' lpd-yolov8n" "yolov8 convert.py /opt/go-rknnlite-build/yolonas-s.onnx i8 '' yolonas-s" "mobilenet mobilenet-rknn-batch.py ../model/mobilenetv2-12.onnx i8 --model mobilenetv2-batch8" + "osnet-market1501 build|onnx_to_rknn.py osnet_x1_0_market_256x128.onnx i8 '' osnet-market1501-batch8" ) # compile all entries (or just filter) for one platform @@ -76,6 +77,7 @@ compile_for_platform() { fi echo "-> building $outprefix for $platform" + local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn" if [[ "$script" == "rknn_convert" ]]; then # mobilenet_v1 special: use the CLI and then rename @@ -84,13 +86,23 @@ compile_for_platform() { -i "$model" \ -o "/opt/rkmodels/$platform/" mv "/opt/rkmodels/$platform/${outprefix}.rknn" \ - "/opt/rkmodels/$platform/${outprefix}-${platform}.rknn" + "$out" + continue + fi + + # build the go-rknnlite-build models + if [[ "$script" == build\|* ]]; then + # strip everything up to (and including) the first pipe to get script name + scriptName="${script#*|}" + # go into the go-rknnlite-build tree + pushd "/opt/go-rknnlite-build/${subdir}" >/dev/null + python "$scriptName" "$model" "$platform" "$dtype" "$out" + popd >/dev/null continue fi # the old examples pushd "/opt/rknn_model_zoo/examples/${subdir}/python/" >/dev/null - local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn" if [[ "$subdir" == "mobilenet" ]]; then python "$script" $extra "$model" \ From d18166968ff4391410733c9dd65252f897cb4c4c Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:31:18 +1200 Subject: [PATCH 11/16] added ReID example --- README.md | 2 + example/reid/README.md | 188 ++++++++++++++ example/reid/reid.go | 524 +++++++++++++++++++++++++++++++++++++++ postprocess/reid/reid.go | 129 ++++++++++ 4 files changed, 843 insertions(+) create mode 100644 example/reid/README.md create mode 100644 example/reid/reid.go create mode 100644 postprocess/reid/reid.go diff --git a/README.md b/README.md index 489d7cb..a282817 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,8 @@ See the [example](example) directory. * [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text. * [PPOCR Recognise](example/ppocr#ppocr-recognise) - Takes an area of text and performs OCR on it. * [PPOCR System](example/ppocr#ppocr-system) - Combines both Detect and Recognise. +* Tracking + * [Re-Identification Demo](example/reid) - Re-Identify (ReID) similar objects for tracking, uses batch processing. * Streaming * [HTTP Stream with ByteTrack Tracking](example/stream) - Demo that streams a video over HTTP with YOLO object detection and ByteTrack object tracking. * Slicing Aided Hyper Inference diff --git a/example/reid/README.md b/example/reid/README.md new file mode 100644 index 0000000..caeb16d --- /dev/null +++ b/example/reid/README.md @@ -0,0 +1,188 @@ + +# Re-Identification (ReID) + +## Overview + +Object trackers like ByteTrack can be used to track visible objects frame‐to‐frame, +but they rely on the assumption that an object's appearance and location change +smoothly over time. If a person goes behind a building or is briefly hidden +by another passerby, the tracker can lose that objects identity. When that same +person reemerges, the tracker often treats them as a new object, assigning a new ID. +This makes analyzing a persons complete path through a scene difficult +or makes counting unique objects much harder. + +Re-Identification (ReID) models help solve this problem by using embedding features +which encode an object into a fixed length vector that captures distinctive +patterns, shapes, or other visual signatures. When an object disappears and +then reappears you can compare the newly detected objects embedding against a list of +past objects. If the similarity (using Cosine or Euclidean distance) +exceeds a chosen threshold, you can confidently link the new detection back to the +original track ID. + + +## Datasets + +The [OSNet model](https://paperswithcode.com/paper/omni-scale-feature-learning-for-person-re) is +lite weight and provides good accuracy for reidentification tasks, however +it must be trained using a dataset to identify specific object classes. + +This example uses the [Market1501](https://paperswithcode.com/dataset/market-1501) +dataset trained for reidentifying people. + +To support other object classifications such as Vehicles, Faces, or Animals, you +will need to source and train these accordingly. + + +## Occlusion Example + +In the [people walking video](https://github.com/swdee/go-rknnlite-data/raw/master/people-walking.mp4) +a lady wearing a CK branded jacket starts +in the beginning of the scene and becomes occluded by passersby. When she reappears Bytetrack +detects them as a new person. + +![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/docimg/reid-ck-lady-movement.jpg) + + + +## Usage + +Make sure you have downloaded the data files first for the examples. +You only need to do this once for all examples. + +``` +cd example/ +git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data +``` + + +Command line Usage. +``` +$ go run reid.go -h + +Usage of /tmp/go-build147978858/b001/exe/reid: + -d string + Data file containing object co-ordinates (default "../data/reid-objects.dat") + -e float + The Euclidean distance [0.0-1.0], a value less than defines a match (default 0.51) + -i string + Image file to run inference on (default "../data/reid-walking.jpg") + -m string + RKNN compiled model file (default "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn") + -p string + Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588") +``` + +Run the ReID example on rk3588 or replace with your Platform model. +``` +cd example/reid/ +go run reid.go -p rk3588 +``` + + +This will result in the output of: +``` +Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33) +Model Input Number: 1, Ouput Number: 1 +Input tensors: + index=0, name=input, n_dims=4, dims=[8, 256, 128, 3], n_elems=786432, size=786432, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658 +Output tensors: + index=0, name=output, n_dims=2, dims=[8, 512, 0, 0], n_elems=4096, size=4096, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.018782 +Comparing object 0 at (0,0,134,361) + Object 0 at (0,0,134,361) has euclidean distance: 0.000000 (same person) + Object 1 at (134,0,251,325) has euclidean distance: 0.423271 (same person) + Object 2 at (251,0,326,208) has euclidean distance: 0.465061 (same person) + Object 3 at (326,0,394,187) has euclidean distance: 0.445583 (same person) +Comparing object 1 at (394,0,513,357) + Object 0 at (0,0,134,361) has euclidean distance: 0.781510 (different person) + Object 1 at (134,0,251,325) has euclidean distance: 0.801649 (different person) + Object 2 at (251,0,326,208) has euclidean distance: 0.680299 (different person) + Object 3 at (326,0,394,187) has euclidean distance: 0.686542 (different person) +Comparing object 2 at (513,0,588,246) + Object 0 at (0,0,134,361) has euclidean distance: 0.860921 (different person) + Object 1 at (134,0,251,325) has euclidean distance: 0.873663 (different person) + Object 2 at (251,0,326,208) has euclidean distance: 0.870753 (different person) + Object 3 at (326,0,394,187) has euclidean distance: 0.820761 (different person) +Comparing object 3 at (588,0,728,360) + Object 0 at (0,0,134,361) has euclidean distance: 0.762738 (different person) + Object 1 at (134,0,251,325) has euclidean distance: 0.800668 (different person) + Object 2 at (251,0,326,208) has euclidean distance: 0.763694 (different person) + Object 3 at (326,0,394,187) has euclidean distance: 0.769597 (different person) +Model first run speed: batch preparation=3.900093ms, inference=47.935686ms, post processing=262.203µs, total time=52.097982ms +done +``` + +### Docker + +To run the ReID example using the prebuilt docker image, make sure the data files have been downloaded first, +then run. +``` +# from project root directory + +docker run --rm \ + --device /dev/dri:/dev/dri \ + -v "$(pwd):/go/src/app" \ + -v "$(pwd)/example/data:/go/src/data" \ + -v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \ + -v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \ + -w /go/src/app \ + swdee/go-rknnlite:latest \ + go run ./example/reid/reid.go -p rk3588 +``` + +### Interpreting Results + +The above example uses people detected with a YOLOv5 model and then cropped to +create the sample input. + +![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/reid-walking.jpg) + +Objects A1 to A4 represent the same person and objects B1, C1, and D1 are other +people from the same scene. + +The first set of comparisons: +``` +Comparing object 0 [A1] at (0,0,134,361) + Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.000000 (same person) + Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.423271 (same person) + Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.465061 (same person) + Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.445583 (same person) +``` + +Object 0 is A1, when compared to itself it has a euclidean distance of 0.0. +Objects 1-3 are A2 to A4, each of these have a similar +distance ranging from 0.42 to 0.46. + +A euclidean distance range is from 0.0 (same object) to 1.0 (different object), so +the lower the distance the more similar the object is. A threshold of `0.51` +is used to define what the maximum distance can be for the object to be considered +the same or different. Your use case and datasets may require calibration of +the ideal threshold. + +The remaining results compare the people B1, C1, and D1. +``` +Comparing object 1 [B1] at (394,0,513,357) + Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.781510 (different person) + Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.801649 (different person) + Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.680299 (different person) + Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.686542 (different person) +Comparing object 2 [C1] at (513,0,588,246) + Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.860921 (different person) + Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.873663 (different person) + Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.870753 (different person) + Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.820761 (different person) +Comparing object 3 [D1] at (588,0,728,360) + Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.762738 (different person) + Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.800668 (different person) + Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.763694 (different person) + Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.769597 (different person) +``` + +All of these other people have a euclidean distance greater than 0.68 indicating +they are different people. + + +## Postprocessing + +[Convenience functions](https://github.com/swdee/go-rknnlite-data/raw/master/postprocess/reid.go) +are provided for calculating the Euclidean Distance or Cosine Similarity +depending on how the Model has been trained. \ No newline at end of file diff --git a/example/reid/reid.go b/example/reid/reid.go new file mode 100644 index 0000000..dd6fc6b --- /dev/null +++ b/example/reid/reid.go @@ -0,0 +1,524 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "github.com/swdee/go-rknnlite" + "github.com/swdee/go-rknnlite/postprocess/reid" + "gocv.io/x/gocv" + "image" + "log" + "os" + "strconv" + "strings" + "time" +) + +func main() { + // disable logging timestamps + log.SetFlags(0) + + // read in cli flags + modelFile := flag.String("m", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled model file") + imgFile := flag.String("i", "../data/reid-walking.jpg", "Image file to run inference on") + objsFile := flag.String("d", "../data/reid-objects.dat", "Data file containing object co-ordinates") + rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]") + euDist := flag.Float64("e", 0.51, "The Euclidean distance [0.0-1.0], a value less than defines a match") + flag.Parse() + + err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores) + + if err != nil { + log.Printf("Failed to set CPU Affinity: %v", err) + } + + // check if user specified model file or if default is being used. if default + // then pick the default platform model to use. + if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" { + *modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform) + } + + // create rknn runtime instance + rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile) + + if err != nil { + log.Fatal("Error initializing RKNN runtime: ", err) + } + + // set runtime to leave output tensors as int8 + rt.SetWantFloat(false) + + // optional querying of model file tensors and SDK version for printing + // to stdout. not necessary for production inference code + err = rt.Query(os.Stdout) + + if err != nil { + log.Fatal("Error querying runtime: ", err) + } + + // load objects file + objs, err := ParseObjects(*objsFile) + + if err != nil { + log.Fatal("Error parsing objects: ", err) + } + + // load image + img := gocv.IMRead(*imgFile, gocv.IMReadColor) + + if img.Empty() { + log.Fatal("Error reading image from: ", *imgFile) + } + + // convert colorspace + srcImg := gocv.NewMat() + gocv.CvtColor(img, &srcImg, gocv.ColorBGRToRGB) + + defer img.Close() + defer srcImg.Close() + + start := time.Now() + + // create a batch to process all images in the compare and dataset's + // in a single forward pass + batch := rknnlite.NewBatch( + int(rt.InputAttrs()[0].Dims[0]), + int(rt.InputAttrs()[0].Dims[2]), + int(rt.InputAttrs()[0].Dims[1]), + int(rt.InputAttrs()[0].Dims[3]), + rt.GetInputTypeFloat32(), + ) + + // scale size is the size of the input tensor dimensions to scale the object too + scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2])) + + // add the compare images to the batch + for _, cmpObj := range objs.Compare { + err := AddObjectToBatch(batch, srcImg, cmpObj, scaleSize) + + if err != nil { + log.Fatal("Error creating batch: ", err) + } + } + + // add the dataset images to the batch + for _, dtObj := range objs.Dataset { + err := AddObjectToBatch(batch, srcImg, dtObj, scaleSize) + + if err != nil { + log.Fatal("Error creating batch: ", err) + } + } + + defer batch.Close() + + endBatch := time.Now() + + // run inference on the batch + outputs, err := rt.Inference([]gocv.Mat{batch.Mat()}) + + endInference := time.Now() + + if err != nil { + log.Fatal("Runtime inferencing failed with error: ", err) + } + + // get total number of compare objects + totalCmp := len(objs.Compare) + + // compare each object to those objects in the dataset for similarity + for i, cmpObj := range objs.Compare { + // get the compare objects output + cmpOutput, err := batch.GetOutputInt(i, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + log.Fatal("Getting output tensor failed with error: ", err) + } + + log.Printf("Comparing object %d at (%d,%d,%d,%d)\n", i, + cmpObj.X1, cmpObj.Y1, cmpObj.X2, cmpObj.Y2) + + for j, dtObj := range objs.Dataset { + // get each objects outputs + nextOutput, err := batch.GetOutputInt(totalCmp+j, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + log.Fatal("Getting output tensor failed with error: ", err) + } + + dist := CompareObjects( + cmpOutput, + nextOutput, + outputs.OutputAttributes().Scales[0], + outputs.OutputAttributes().ZPs[0], + ) + + // check euclidean distance to determine match of same person or not + objRes := "different person" + + if dist < float32(*euDist) { + objRes = "same person" + } + + log.Printf(" Object %d at (%d,%d,%d,%d) has euclidean distance: %f (%s)\n", + j, + dtObj.X1, dtObj.Y1, dtObj.X2, dtObj.Y2, + dist, objRes) + } + } + + endCompare := time.Now() + + log.Printf("Model first run speed: batch preparation=%s, inference=%s, post processing=%s, total time=%s\n", + endBatch.Sub(start).String(), + endInference.Sub(endBatch).String(), + endCompare.Sub(endInference).String(), + endCompare.Sub(start).String(), + ) + + // free outputs allocated in C memory after you have finished post processing + err = outputs.Free() + + if err != nil { + log.Fatal("Error freeing Outputs: ", err) + } + + // close runtime and release resources + err = rt.Close() + + if err != nil { + log.Fatal("Error closing RKNN runtime: ", err) + } + + log.Println("done") + + /* + //CompareObject(rt, srcImg, cmpObj, objs.Dataset) + + //rgbImg := img.Clone() + + + + frameWidth := 67 + frameHeight := 177 + + roiRect1 := image.Rect(497, 195, 497+frameWidth, 195+frameHeight) + + // cklady + //roiRect1 := image.Rect(0, 0, 134, 361) + + roiImg1 := rgbImg.Region(roiRect1) + + cropImg1 := rgbImg.Clone() + scaleSize1 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2])) + gocv.Resize(roiImg1, &cropImg1, scaleSize1, 0, 0, gocv.InterpolationArea) + + defer img.Close() + defer rgbImg.Close() + defer cropImg1.Close() + defer roiImg1.Close() + + gocv.IMWrite("/tmp/frame-master.jpg", cropImg1) + + batch := rt.NewBatch( + int(rt.InputAttrs()[0].Dims[0]), + int(rt.InputAttrs()[0].Dims[2]), + int(rt.InputAttrs()[0].Dims[1]), + int(rt.InputAttrs()[0].Dims[3]), + ) + err = batch.Add(cropImg1) + + if err != nil { + log.Fatal("Error creating batch: ", err) + } + defer batch.Close() + + // perform inference on image file + outputs, err := rt.Inference([]gocv.Mat{batch.Mat()}) + + if err != nil { + log.Fatal("Runtime inferencing failed with error: ", err) + } + + output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + log.Fatal("Getting output tensor failed with error: ", err) + } + + fingerPrint := DequantizeAndL2Normalize( + output, + outputs.OutputAttributes().Scales[0], + outputs.OutputAttributes().ZPs[0], + ) + + // seed the EMA fingerprint to the master + emaFP := make([]float32, len(fingerPrint)) + copy(emaFP, fingerPrint) + const alpha = 0.9 // smoothing factor + + hash, err := FingerprintHash(fingerPrint) + + if err != nil { + log.Fatalf("hashing failed: %v", err) + } + + log.Println("object fingerprint:", hash) + + // free outputs allocated in C memory after you have finished post processing + err = outputs.Free() + + if err != nil { + log.Fatal("Error freeing Outputs: ", err) + } + + + // sample 2 images + + yOffsets := []int{1, 195, 388} + xOffsets := []int{497, 565, 633, 701, 769, 836, 904} + + images := [][]int{} + + for _, ny := range yOffsets { + for _, nx := range xOffsets { + images = append(images, []int{nx, ny}) + } + } + + // ck lady + + // images := [][]int{ + // {134, 0, 117, 325}, + // {251, 0, 75, 208}, + // {326, 0, 68, 187}, + // } + + + // Image 2 + for frame, next := range images { + + roiRect2 := image.Rect(next[0], next[1], next[0]+frameWidth, next[1]+frameHeight) + // ck lady + //roiRect2 := image.Rect(next[0], next[1], next[0]+next[2], next[1]+next[3]) + roiImg2 := rgbImg.Region(roiRect2) + + cropImg2 := rgbImg.Clone() + scaleSize2 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2])) + gocv.Resize(roiImg2, &cropImg2, scaleSize2, 0, 0, gocv.InterpolationArea) + + defer cropImg2.Close() + defer roiImg2.Close() + + gocv.IMWrite(fmt.Sprintf("/tmp/frame-%d.jpg", frame), cropImg2) + + start := time.Now() + + batch.Clear() + err = batch.Add(cropImg2) + + if err != nil { + log.Fatal("Error creating batch: ", err) + } + + outputs, err = rt.Inference([]gocv.Mat{batch.Mat()}) + + if err != nil { + log.Fatal("Runtime inferencing failed with error: ", err) + } + + endInference := time.Now() + + output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + log.Fatal("Getting output tensor failed with error: ", err) + } + + fingerPrint2 := DequantizeAndL2Normalize( + output, + outputs.OutputAttributes().Scales[0], + outputs.OutputAttributes().ZPs[0], + ) + + + // sim := CosineSimilarity(fingerPrint, fingerPrint2) + // dist := CosineDistance(fingerPrint, fingerPrint2) + // fmt.Printf("Frame %d, cosine similarity: %f, distance=%f\n", frame, sim, dist) + + + // compute Euclidean (L2) distance directly + dist := EuclideanDistance(fingerPrint, fingerPrint2) + + // 3) compute vs EMA + emaDist := EuclideanDistance(emaFP, fingerPrint2) + + endDetect := time.Now() + + objRes := "different person" + if emaDist < 0.51 { + objRes = "same person" + } + + fmt.Printf("Frame %d, euclidean distance: %f, ema=%f (%s)\n", frame, dist, emaDist, objRes) + + log.Printf(" Inference=%s, detect=%s, total time=%s\n", + endInference.Sub(start).String(), + endDetect.Sub(endInference).String(), + endDetect.Sub(start).String(), + ) + + // free outputs allocated in C memory after you have finished post processing + err = outputs.Free() + + if err != nil { + log.Fatal("Error freeing Outputs: ", err) + } + + // 4) update the EMA fingerprint + if frame >= 7 && frame <= 13 { + + // emaFP = α*emaFP + (1-α)*fp2 + for i := range emaFP { + emaFP[i] = alpha*emaFP[i] + (1-alpha)*fingerPrint2[i] + } + // 5) re‐normalize emaFP back to unit length + var sum float32 + for _, v := range emaFP { + sum += v * v + } + norm := float32(math.Sqrt(float64(sum))) + if norm > 0 { + for i := range emaFP { + emaFP[i] /= norm + } + } + } + + } + + // close runtime and release resources + err = rt.Close() + + if err != nil { + log.Fatal("Error closing RKNN runtime: ", err) + } + + log.Println("done") + */ +} + +// Box holds object bounding box coordinates (x1, y1, x2, y2) +type Box struct { + X1, Y1, X2, Y2 int +} + +// Objects is a struct to represent the compare and dataset objects parsed +// from the objects data file +type Objects struct { + Compare []Box + Dataset []Box +} + +// ParseObjects reads the TOML-like objects data file returns the two lists +// of objects and their bounding box coordinates +func ParseObjects(path string) (*Objects, error) { + + f, err := os.Open(path) + + if err != nil { + return nil, err + } + + defer f.Close() + + objs := &Objects{} + section := "" // either "compare" or "dataset" + scanner := bufio.NewScanner(f) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + // skip blank or comment + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // section header + if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") { + section = strings.ToLower(line[1 : len(line)-1]) + continue + } + + // data line, expect four ints separated by commas + fields := strings.Split(line, ",") + + if len(fields) != 4 { + return nil, fmt.Errorf("invalid data line %q", line) + } + + nums := make([]int, 4) + + for i, fstr := range fields { + v, err := strconv.Atoi(strings.TrimSpace(fstr)) + + if err != nil { + return nil, fmt.Errorf("parsing %q: %w", fstr, err) + } + + nums[i] = v + } + + // define box + box := Box{nums[0], nums[1], nums[2], nums[3]} + + switch section { + + case "compare": + objs.Compare = append(objs.Compare, box) + + case "dataset": + objs.Dataset = append(objs.Dataset, box) + + default: + return nil, fmt.Errorf("line %q outside of a known section", line) + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return objs, nil +} + +// AddObjectToBatch adds the cropped object from source image to the batch for +// running inference on +func AddObjectToBatch(batch *rknnlite.Batch, srcImg gocv.Mat, obj Box, + scaleSize image.Point) error { + + // get the objects region of interest from source Mat + objRect := image.Rect(obj.X1, obj.Y1, obj.X2, obj.Y2) + objRoi := srcImg.Region(objRect) + + objImg := objRoi.Clone() + gocv.Resize(objRoi, &objImg, scaleSize, 0, 0, gocv.InterpolationArea) + + defer objRoi.Close() + defer objImg.Close() + + return batch.Add(objImg) +} + +// CompareObjects compares the outputs of two objects +func CompareObjects(objA []int8, objB []int8, scales float32, + ZPs int32) float32 { + + // get the fingerprint of both objects + fpA := reid.DequantizeAndL2Normalize(objA, scales, ZPs) + fpB := reid.DequantizeAndL2Normalize(objB, scales, ZPs) + + // compute Euclidean (L2) distance directly + return reid.EuclideanDistance(fpA, fpB) +} diff --git a/postprocess/reid/reid.go b/postprocess/reid/reid.go new file mode 100644 index 0000000..3243af4 --- /dev/null +++ b/postprocess/reid/reid.go @@ -0,0 +1,129 @@ +package reid + +import ( + "bytes" + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "math" +) + +// DequantizeAndL2Normalize converts a quantized int8 vector "q" into a float32 vector, +// applies dequantization using the provided scale "s" and zero-point "z", +// and then normalizes the result to unit length using L2 normalization. +// +// This is commonly used to convert quantized embedding vectors back to a +// normalized float form for comparison or similarity calculations. +// +// If the resulting vector has zero magnitude, the function returns the +// unnormalized dequantized vector. +func DequantizeAndL2Normalize(q []int8, s float32, z int32) []float32 { + + N := len(q) + x := make([]float32, N) + + // dequantize + for i := 0; i < N; i++ { + x[i] = float32(int32(q[i])-z) * s + } + + // compute L2 norm + var sumSquares float32 + + for _, v := range x { + sumSquares += v * v + } + + norm := float32(math.Sqrt(float64(sumSquares))) + + if norm == 0 { + // avoid /0 + return x + } + + // normalize + for i := 0; i < N; i++ { + x[i] /= norm + } + + return x +} + +// FingerprintHash takes an L2-normalized []float32 and returns +// a hex-encoded SHA-256 hash of its binary representation. +func FingerprintHash(feat []float32) (string, error) { + + buf := new(bytes.Buffer) + + // write each float32 in little‐endian + for _, v := range feat { + if err := binary.Write(buf, binary.LittleEndian, v); err != nil { + return "", err + } + } + + sum := sha256.Sum256(buf.Bytes()) + + return hex.EncodeToString(sum[:]), nil +} + +// CosineSimilarity returns the cosine of the angle between vectors a and b. +// Assumes len(a)==len(b). If you have already L2‐normalized them, +// this is just their dot-product. +func CosineSimilarity(a, b []float32) float32 { + + var dot float32 + + for i := range a { + dot += a[i] * b[i] + } + + // If not already normalized, you’d divide by norms here. + return dot +} + +// CosineDistance returns 1 – cosine similarity, which is a proper distance metric +// in [0,2]. For L2-normalized vectors this is in [0,2], and small values mean +// "very similar." +func CosineDistance(a, b []float32) float32 { + return 1 - CosineSimilarity(a, b) +} + +// EuclideanDistance returns the L2 distance between two vectors. +// Lower means "more similar" when your features are L2-normalized. +func EuclideanDistance(a, b []float32) float32 { + var sum float32 + + for i := range a { + d := a[i] - b[i] + sum += d * d + } + + return float32(math.Sqrt(float64(sum))) +} + +// NormalizeVec normalizes the input float32 slice to unit length and returns +// a new slice. If the input vector has zero magnitude, it returns the original +// slice unchanged. +func NormalizeVec(v []float32) []float32 { + + norm := float32(0.0) + + for _, x := range v { + norm += x * x + } + + if norm == 0 { + return v // avoid division by zero + } + + norm = float32(math.Sqrt(float64(norm))) + + out := make([]float32, len(v)) + + for i, x := range v { + out[i] = x / norm + } + + return out +} From d9f9a6b4f061c0295213725696f08bae339b66fc Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:32:39 +1200 Subject: [PATCH 12/16] added ReID tracking to stream example --- example/stream/bytetrack.go | 64 +++++++++- tracker/bytetracker.go | 43 ++++++- tracker/object.go | 2 + tracker/reid.go | 246 ++++++++++++++++++++++++++++++++++++ tracker/strack.go | 81 ++++++++++++ 5 files changed, 425 insertions(+), 11 deletions(-) create mode 100644 tracker/reid.go diff --git a/example/stream/bytetrack.go b/example/stream/bytetrack.go index bb00863..3428c7b 100644 --- a/example/stream/bytetrack.go +++ b/example/stream/bytetrack.go @@ -127,18 +127,27 @@ type Demo struct { // renderFormat indicates which rendering type to use with instance // segmentation, outline or mask renderFormat string + // reidModelFile is the model to use ReID with + reidModelFile string + // reid is a flag to inidicate if reid is being used or not + reid bool + // reidPool of rknnlite runtimes to perform inference in parallel + reidPool *rknnlite.Pool } // NewDemo returns and instance of Demo, a streaming HTTP server showing // video with object detection func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int, - modelType string, renderFormat string, rkPlatform string) (*Demo, error) { + modelType string, renderFormat string, rkPlatform string, + reidModelFile string, useReid bool) (*Demo, error) { var err error d := &Demo{ - vidSrc: vidSrc, - limitObjs: make([]string, 0), + vidSrc: vidSrc, + limitObjs: make([]string, 0), + reidModelFile: reidModelFile, + reid: useReid, } if vidSrc.Format == VideoFile { @@ -220,6 +229,15 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int, log.Printf("***WARNING*** %s only has 1 TOPS NPU, downgraded to %d FPS\n", rkPlatform, FPS) } + if d.reid { + if strings.EqualFold(rkPlatform[:5], "rk356") { + log.Fatal("***WARNING*** ReID is unavailable for RK356x platforms as the 1 TOPS NPU is not powerful enough") + } + FPS = 4 + FPSinterval = time.Duration(float64(time.Second) / float64(FPS)) + log.Println("***WARNING*** ReID is experimental and requires alot of NPU, downgraded to 4 FPS") + } + // load in Model class names d.labels, err = rknnlite.LoadLabels(labelFile) @@ -227,6 +245,19 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int, return nil, fmt.Errorf("Error loading model labels: %w", err) } + // create pool for ReID + if d.reid { + d.reidPool, err = rknnlite.NewPool(poolSize, reidModelFile, + []rknnlite.CoreMask{rknnlite.NPUCoreAuto}) + + if err != nil { + log.Fatalf("Error creating ReID RKNN pool: %v\n", err) + } + + // set runtime to leave output tensors as int8 + d.reidPool.SetWantFloat(false) + } + return d, nil } @@ -360,6 +391,10 @@ func (d *Demo) Stream(w http.ResponseWriter, r *http.Request) { // record of past object detections for tracking byteTrack := tracker.NewBYTETracker(FPS, FPS*10, 0.5, 0.6, 0.8) + if d.reid { + byteTrack.UseReID(d.reidPool, tracker.Euclidean, 0.51) + } + // create a trails history trail := tracker.NewTrail(90) @@ -491,9 +526,18 @@ func (d *Demo) ProcessFrame(img gocv.Mat, retChan chan<- ResultFrame, // track detected objects timing.TrackerStart = time.Now() - trackObjs, err := byteTrack.Update( - postprocess.DetectionsToObjects(detectResults), - ) + var trackObjs []*tracker.STrack + + if d.reid { + trackObjs, err = byteTrack.UpdateWithFrame( + postprocess.DetectionsToObjects(detectResults), + resImg, + ) + } else { + trackObjs, err = byteTrack.Update( + postprocess.DetectionsToObjects(detectResults), + ) + } timing.TrackerEnd = time.Now() @@ -713,6 +757,8 @@ func main() { renderFormat := flag.String("r", "outline", "The rendering format used for instance segmentation [outline|mask]") codecFormat := flag.String("codec", "mjpg", "Web Camera codec The rendering format [mjpg|yuyv]") rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]") + reidModelFile := flag.String("rm", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled OSNet/Re-Identification model file") + useReid := flag.Bool("reid", false, "Enable Re-Identification enhanced tracking") // Initialize the custom camera resolution flag with a default value cameraRes := &cameraResFlag{value: "1280x720@30"} @@ -760,8 +806,12 @@ func main() { *modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform) } + if f := flag.Lookup("rm"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" { + *reidModelFile = strings.ReplaceAll(*reidModelFile, "rk3588", *rkPlatform) + } + demo, err := NewDemo(vidSrc, *modelFile, *labelFile, *poolSize, - *modelType, *renderFormat, *rkPlatform) + *modelType, *renderFormat, *rkPlatform, *reidModelFile, *useReid) if err != nil { log.Fatalf("Error creating demo: %v", err) diff --git a/tracker/bytetracker.go b/tracker/bytetracker.go index b07ea4e..a5c206e 100644 --- a/tracker/bytetracker.go +++ b/tracker/bytetracker.go @@ -25,6 +25,10 @@ type BYTETracker struct { lostStracks []*STrack // List of removed objects removedStracks []*STrack + // reid supported tracking + reid *reID + // useReid is a flag to indicate if ReID supported tracking is to be used + useReid bool } // NewBYTETracker initializes and returns a new BYTETracker @@ -62,6 +66,10 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) { strack := NewSTrack(NewRect(object.Rect.X(), object.Rect.Y(), object.Rect.Width(), object.Rect.Height()), object.Prob, object.ID, object.Label) + if bt.useReid { + strack.WithFeature(object.Feature, 0.9, 30) + } + if object.Prob >= bt.trackThresh { detStracks = append(detStracks, strack) } else { @@ -87,11 +95,18 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) { strack.Predict() } - // Step 2: First association, with IoU + // Step 2: First association, using IoU or feature distance matching var currentTrackedStracks, remainTrackedStracks, remainDetStracks, refindStracks []*STrack + var costMatrix [][]float32 + + if bt.useReid { + costMatrix = bt.calcFeatureDistance(strackPool, detStracks) + } else { + costMatrix = bt.calcIouDistance(strackPool, detStracks) + } matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err := bt.linearAssignment( - bt.calcIouDistance(strackPool, detStracks), + costMatrix, len(strackPool), len(detStracks), bt.matchThresh, ) @@ -126,7 +141,8 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) { } } - // Step 3: Second association, using low score dets + // Step 3: IoU fallback matching for unmatched tracks, + // using low score IOU detections var currentLostStracks []*STrack matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err = bt.linearAssignment( @@ -162,6 +178,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) { } // Step 4: Init new stracks + // Match non-active to unmatched remainingDetStracks (high confidence only) var currentRemovedStracks []*STrack matchesIdx, unmatchUnconfirmedIdx, unmatchDetectionIdx, err := bt.linearAssignment( @@ -197,7 +214,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) { currentTrackedStracks = append(currentTrackedStracks, track) } - // Step 5: Update state + // Step 5: Update state - Time-based removal of old lost tracks for _, lostStrack := range bt.lostStracks { if bt.frameID-lostStrack.GetFrameID() > bt.maxTimeLost { lostStrack.MarkAsRemoved() @@ -508,3 +525,21 @@ func (bt *BYTETracker) execLapjv(cost [][]float32, extendCost bool, return rowsol, colsol, opt, nil } + +// calcFeatureDistance calculates the distance between two embedded features +// of the specified STracks +func (bt *BYTETracker) calcFeatureDistance(tracks, detections []*STrack) [][]float32 { + + cost := make([][]float32, len(tracks)) + + for i, tr := range tracks { + + cost[i] = make([]float32, len(detections)) + + for j, det := range detections { + cost[i][j] = tr.BestMatchDistance(det.feature) + } + } + + return cost +} diff --git a/tracker/object.go b/tracker/object.go index 182c188..6ea6344 100644 --- a/tracker/object.go +++ b/tracker/object.go @@ -11,6 +11,8 @@ type Object struct { // ID is a unique ID to give this object which can be used to match // the input detection object and tracked object ID int64 + // Feature is a ReID embedding feature + Feature []float32 } // NewObject is a constructor function for the Object struct diff --git a/tracker/reid.go b/tracker/reid.go new file mode 100644 index 0000000..3fb5035 --- /dev/null +++ b/tracker/reid.go @@ -0,0 +1,246 @@ +package tracker + +import ( + "fmt" + "github.com/swdee/go-rknnlite" + "github.com/swdee/go-rknnlite/postprocess/reid" + "gocv.io/x/gocv" + "image" + "sync" +) + +// DistanceMethod defines ReID distance calculation methods +type DistanceMethod int + +const ( + Euclidean DistanceMethod = 1 + Cosine DistanceMethod = 2 +) + +// reID struct holds all ReIdentification processing features +type reID struct { + // pool is the rknnlike runtime pool to run inference on + pool *rknnlite.Pool + // dist is the distance method to apply to calculations to determine similarity + dist DistanceMethod + // threshold is the distance cutoff to determine similar or different objects + threshold float32 + // batchSize store model input tensor batch size + batchSize int + width int + height int + channels int + // batchPools holds a pool of batches + batchPool *rknnlite.BatchPool + // scaleSize is the size of the input tensor dimensions to scale the object too + scaleSize image.Point +} + +// UseReID sets up Re-Identification processing on the BYTETracker instance +func (bt *BYTETracker) UseReID(pool *rknnlite.Pool, dist DistanceMethod, + threshold float32) { + + // query runtime and get tensor dimensions + rt := pool.Get() + + batchSize := int(rt.InputAttrs()[0].Dims[0]) + width := int(rt.InputAttrs()[0].Dims[1]) + height := int(rt.InputAttrs()[0].Dims[2]) + channels := int(rt.InputAttrs()[0].Dims[3]) + + bt.reid = &reID{ + pool: pool, + dist: dist, + threshold: threshold, + batchSize: batchSize, + width: width, + height: height, + channels: channels, + scaleSize: image.Pt(width, height), + batchPool: rknnlite.NewBatchPool(pool.Size(), rt), + } + + pool.Return(rt) + + bt.useReid = true +} + +// UpdateWithFrame updates the tracker with new detections and passes the +// image frame so ReID inference can be conducted +func (bt *BYTETracker) UpdateWithFrame(objects []Object, frame gocv.Mat) ([]*STrack, error) { + + // check if ReID is enabled and get embedding features for all objects + if bt.useReid { + + bufFrame := frame.Clone() + defer bufFrame.Close() + + features, err := bt.reid.processObjects(objects, bufFrame) + + if err != nil { + return nil, fmt.Errorf("failed to process objects: %w", err) + } + + for i := range objects { + objects[i].Feature = features[i] + } + } + + // run track update + tracks, err := bt.Update(objects) + + if err != nil { + return nil, fmt.Errorf("error updating objects: %w", err) + } + + return tracks, nil +} + +// Close frees memory from reid instance +func (r *reID) Close() { + r.batchPool.Close() +} + +// processObjects takes the detected objects and runs inference on them to get +// their embedded feature fingerprint. Function should be called from a +// Goroutine. +func (r *reID) processObjects(objects []Object, frame gocv.Mat) ([][]float32, error) { + + var wg sync.WaitGroup + total := len(objects) + + // collect per objects feature embeddings + allEmbeddings := make([][]float32, total) + errCh := make(chan error, (total+r.batchSize-1)/r.batchSize) + + for offset := 0; offset < total; offset += r.batchSize { + + end := offset + r.batchSize + + if end > total { + end = total + } + + batchObjs := objects[offset:end] + + // capture range variables for closure + capOffset := offset + capCnt := end - offset + + wg.Add(1) + batch := r.batchPool.Get() + rt := r.pool.Get() + + go func(rt *rknnlite.Runtime, batch *rknnlite.Batch, bobjs []Object, off, cnt int) { + defer wg.Done() + fps, err := r.processBatch(rt, batch, bobjs, frame) + r.pool.Return(rt) + r.batchPool.Return(batch) + + if err != nil { + errCh <- err + return + } + + // copy this batch’s fingerprints into correct offset place for + // all fingerprint results + for i := 0; i < cnt; i++ { + allEmbeddings[off+i] = fps[i] + } + + errCh <- nil + }(rt, batch, batchObjs, capOffset, capCnt) + } + + wg.Wait() + close(errCh) + + // if any error, just bail + for e := range errCh { + if e != nil { + return nil, fmt.Errorf("ReID error:", e) + } + } + + return allEmbeddings, nil +} + +// processBatch adds the objects to a batch and runs inference on them +func (r *reID) processBatch(rt *rknnlite.Runtime, batch *rknnlite.Batch, + bobjs []Object, frame gocv.Mat) ([][]float32, error) { + + height := frame.Rows() + width := frame.Cols() + + for _, obj := range bobjs { + + // clamp and get bounding box coordinates + x1 := clamp(int(obj.Rect.TLX()), 0, width) + y1 := clamp(int(obj.Rect.TLY()), 0, height) + x2 := clamp(int(obj.Rect.BRX()), 0, width) + y2 := clamp(int(obj.Rect.BRY()), 0, height) + + objRect := image.Rect(x1, y1, x2, y2) + + // get the objects region of interest from source Mat + objRoi := frame.Region(objRect) + objImg := gocv.NewMat() + + // resize to input tensor size + gocv.Resize(objRoi, &objImg, r.scaleSize, 0, 0, gocv.InterpolationArea) + + objRoi.Close() + + err := batch.Add(objImg) + objImg.Close() + + if err != nil { + return nil, fmt.Errorf("error adding image to batch") + } + } + + // run inference on the batch + outputs, err := rt.Inference([]gocv.Mat{batch.Mat()}) + + if err != nil { + return nil, fmt.Errorf("inference failed: %v", err) + } + + defer outputs.Free() + + // unpack per object results + fingerprints := make([][]float32, len(bobjs)) + + for idx := 0; idx < len(bobjs); idx++ { + + output, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL)) + + if err != nil { + return nil, fmt.Errorf("error getting output %d: %v", idx, err) + } + + // get object fingerprint + fingerprints[idx] = reid.DequantizeAndL2Normalize( + output, + outputs.OutputAttributes().Scales[0], + outputs.OutputAttributes().ZPs[0], + ) + } + + return fingerprints, nil +} + +// clamp restricts the value x to be within the range min and max +func clamp(val, min, max int) int { + + if val > min { + + if val < max { + return val // casting the float to int after the comparison + } + + return max + } + + return min +} diff --git a/tracker/strack.go b/tracker/strack.go index e9959ce..e594a0d 100644 --- a/tracker/strack.go +++ b/tracker/strack.go @@ -2,6 +2,7 @@ package tracker import ( "fmt" + "github.com/swdee/go-rknnlite/postprocess/reid" "gonum.org/v1/gonum/mat" ) @@ -47,6 +48,18 @@ type STrack struct { detectionID int64 // label is the object label/class from yolo inference label int + // feature embedding used for ReID + feature []float32 + // smoothFeature an EMA smoothed feature embedding used for ReID + smoothFeature []float32 + // featureQueue is a history of features + featureQueue [][]float32 + // maxQueueSize is the featureQueue maximum size, eg: 30 + maxQueueSize int + // alpha value use in EMA smoothing calculation + alpha float32 + // hasFeature is a flag to indicate if WithFeature() has been set + hasFeature bool } // NewSTrack creates a new STrack @@ -68,6 +81,15 @@ func NewSTrack(rect Rect, score float32, detectionID int64, label int) *STrack { } } +// WithFeature adds an objects embedded feature from ReID inference to the STrack +func (s *STrack) WithFeature(feature []float32, alpha float32, qsize int) { + s.hasFeature = true + s.alpha = alpha + s.maxQueueSize = qsize + s.featureQueue = make([][]float32, 0, qsize) + s.UpdateFeatures(feature) +} + // GetRect returns the bounding box of the tracked object func (s *STrack) GetRect() *Rect { return &s.rect @@ -155,6 +177,8 @@ func (s *STrack) ReActivate(newTrack *STrack, frameID, newTrackID int) { s.frameID = frameID s.trackletLen = 0 + + s.UpdateFeatures(newTrack.feature) } // Predict predicts the next state of the track @@ -185,6 +209,8 @@ func (s *STrack) Update(newTrack *STrack, frameID int) error { s.frameID = frameID s.trackletLen++ + s.UpdateFeatures(newTrack.feature) + return nil } @@ -205,3 +231,58 @@ func (s *STrack) updateRect() { s.rect.SetX(s.mean[0] - s.rect.Width()/2) s.rect.SetY(s.mean[1] - s.rect.Height()/2) } + +// UpdateFeatures updates an STracks ReID embedded features +func (s *STrack) UpdateFeatures(feat []float32) { + + if !s.hasFeature { + return + } + + normFeat := reid.NormalizeVec(feat) + s.feature = normFeat + + if s.smoothFeature == nil { + s.smoothFeature = make([]float32, len(normFeat)) + copy(s.smoothFeature, normFeat) + + } else { + for i := range normFeat { + s.smoothFeature[i] = s.alpha*s.smoothFeature[i] + (1-s.alpha)*normFeat[i] + } + s.smoothFeature = reid.NormalizeVec(s.smoothFeature) + } + + // Enqueue the feature + s.featureQueue = append(s.featureQueue, normFeat) + + if len(s.featureQueue) > s.maxQueueSize { + s.featureQueue = s.featureQueue[1:] + } +} + +// BestMatchDistance compares a new detection against all stored past features +func (s *STrack) BestMatchDistance(detFeat []float32) float32 { + + if !s.hasFeature { + // feature not set so return max distance + return 1.0 + } + + if len(s.featureQueue) == 0 { + return 1.0 // max distance + } + + detNorm := reid.NormalizeVec(detFeat) + best := float32(1.0) + + for _, f := range s.featureQueue { + d := reid.EuclideanDistance(f, detNorm) + + if d < best { + best = d + } + } + + return best +} From 74e0f57eafae235f82f5efb3f1c89add564e1a09 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:37:21 +1200 Subject: [PATCH 13/16] added note to stream example about ReID --- example/stream/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/example/stream/README.md b/example/stream/README.md index 31c7bd8..3bcc9c5 100644 --- a/example/stream/README.md +++ b/example/stream/README.md @@ -313,6 +313,19 @@ itself is not 100%. Whilst this demo shows a complete solution, you would stil need to do work to train a better model and testing for your own use case. +## Re-Identification (ReID) + +Experimental ReID has been added which follows the implementation of the +[FairMOT](https://github.com/FoundationVision/ByteTrack/tree/main/tutorials/fairmot) tracker, +however this makes use of the OSNet model trained with the Market1501 dataset. + +Usage of ReID is expensive and typically takes around 200ms per frame to complete +on the RK3588 NPU. There is little accuracy improvement over straight ByteTrack +which adds little overhead to the YOLO object detection. + +We need to wait for Rockchips next generation RK36xx SoC before this may be useful. + + ## Background The ByteTrack code is a Go conversion of the [C++ project](https://github.com/ifzhang/ByteTrack). From 034a6972b929e192546ef828d2c10521a7bab065 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 15:43:40 +1200 Subject: [PATCH 14/16] updated go-rknnlite-build commit in toolkit docker file --- toolkit/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolkit/Dockerfile b/toolkit/Dockerfile index a20d4f1..1984a27 100644 --- a/toolkit/Dockerfile +++ b/toolkit/Dockerfile @@ -91,7 +91,7 @@ RUN pip install --no-cache-dir \ # download other onnx models RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build && \ - git -C /opt/go-rknnlite-build fetch --depth 1 origin 15dd75d4bc23486931e860f8ddd5e505c4003aba && \ + git -C /opt/go-rknnlite-build fetch --depth 1 origin ce8b5ce1dc53b1c38324e7506374731ad21070c8 && \ git -C /opt/go-rknnlite-build checkout FETCH_HEAD # copy our modified mobilenet.py script into the rknn_model_zoo directory From 938677f3f4f4bff8ed702f8380d8c5b465bbfe91 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 16:18:17 +1200 Subject: [PATCH 15/16] fix batch benchmark code after refactoring NewBatch() --- batch_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/batch_test.go b/batch_test.go index e029b12..e8aaec8 100644 --- a/batch_test.go +++ b/batch_test.go @@ -134,7 +134,7 @@ func BenchmarkBatchSize(b *testing.B) { } // pre-allocate the batch container - batch := rt.NewBatch(tc.batchSize, height, width, channels) + batch := NewBatch(tc.batchSize, height, width, channels, rt.inputTypeFloat32) defer batch.Close() b.ResetTimer() @@ -175,7 +175,7 @@ func TestBatchAddAndOverflow(t *testing.T) { r := &Runtime{inputTypeFloat32: false} - batch := r.NewBatch(2, 2, 3, 1) + batch := NewBatch(2, 2, 3, 1, r.inputTypeFloat32) defer batch.Close() // create Mats with known data @@ -240,7 +240,7 @@ func TestBatchAddAtAndClear(t *testing.T) { r := &Runtime{inputTypeFloat32: false} - batch := r.NewBatch(3, 2, 2, 1) + batch := NewBatch(3, 2, 2, 1, r.inputTypeFloat32) defer batch.Close() m := gocv.NewMatWithSize(2, 2, gocv.MatTypeCV8U) @@ -281,7 +281,7 @@ func TestGetOutputIntAndF32(t *testing.T) { r := &Runtime{inputTypeFloat32: false} - batch := r.NewBatch(2, 2, 2, 1) + batch := NewBatch(2, 2, 2, 1, r.inputTypeFloat32) defer batch.Close() // Test GetOutputInt bounds From d5fca6e6a1c021360ce9dd3b3cffa8e742278480 Mon Sep 17 00:00:00 2001 From: swdee Date: Mon, 7 Jul 2025 16:19:48 +1200 Subject: [PATCH 16/16] fix go-vet issues on errorf --- tracker/reid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/reid.go b/tracker/reid.go index 3fb5035..e7dcd72 100644 --- a/tracker/reid.go +++ b/tracker/reid.go @@ -158,7 +158,7 @@ func (r *reID) processObjects(objects []Object, frame gocv.Mat) ([][]float32, er // if any error, just bail for e := range errCh { if e != nil { - return nil, fmt.Errorf("ReID error:", e) + return nil, fmt.Errorf("ReID error: %w", e) } }