Merge pull request #43 from swdee/reid

Re-Identification and Batch processing
This commit is contained in:
swdee
2025-07-07 16:39:51 +12:00
committed by GitHub
20 changed files with 2380 additions and 18 deletions

View File

@@ -75,6 +75,7 @@ See the [example](example) directory.
* Image Classification
* [MobileNet Demo](example/mobilenet)
* [Pooled Runtime Usage](example/pool)
* [Batch Input Usage](example/batch)
* Object Detection
* [YOLOv5 Demo](example/yolov5)
* [YOLOv8 Demo](example/yolov8)
@@ -97,6 +98,8 @@ See the [example](example) directory.
* [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text.
* [PPOCR Recognise](example/ppocr#ppocr-recognise) - Takes an area of text and performs OCR on it.
* [PPOCR System](example/ppocr#ppocr-system) - Combines both Detect and Recognise.
* Tracking
* [Re-Identification Demo](example/reid) - Re-Identify (ReID) similar objects for tracking, uses batch processing.
* Streaming
* [HTTP Stream with ByteTrack Tracking](example/stream) - Demo that streams a video over HTTP with YOLO object detection and ByteTrack object tracking.
* Slicing Aided Hyper Inference
@@ -164,6 +167,24 @@ If you use `rknnlite.NewRuntimeByPlatform()` instead this will be automatically
set for you.
## Runtime Inference
Once a Runtime has been created inference is performed by passing the input
tensors.
```
rt.Inference([]gocv.Mat{})
```
The `Inference()` function takes a slice of gocv.Mat's where the number of
elements in the slice corresponds to the total number of input tensors the
Model has. Typically most models only have a single input tensor so only a single
gocv.Mat would be passed here.
If you want to pass multiple images in a single `Inference()` call, then you need
to use [Batching](example/batch).
## CPU Affinity
The performance of the NPU is effected by which CPU cores your program runs on, so

188
batch.go Normal file
View File

@@ -0,0 +1,188 @@
package rknnlite
import (
"fmt"
"gocv.io/x/gocv"
)
// Batch defines a struct used for concatenating a batch of gocv.Mat's
// together into a single gocv.Mat for use with image batching on
// a Model
type Batch struct {
mat gocv.Mat
// size of the batch
size int
// width is the input tensor size width
width int
// height is the input tensor size height
height int
// channels is the input tensor number of channels
channels int
// inputTypeFloat32 sets the runtime.inputTypeFloat32 value
inputTypeFloat32 bool
// matType is the Mat type images must be passed as
matType gocv.MatType
// matCnt is a counter for how many Mats have been added with Add()
matCnt int
// imgSize stores an images size made up from its elements
imgSize int
}
// NewBatch creates a batch of concatenated Mats for the given input tensor
// and batch size
func NewBatch(batchSize, height, width, channels int, inputTypeFloat32 bool) *Batch {
// Choose output Mat type
var matType gocv.MatType
if inputTypeFloat32 {
matType = gocv.MatTypeCV32F
} else {
matType = gocv.MatTypeCV8U
}
shape := []int{batchSize, height, width, channels}
return &Batch{
size: batchSize,
height: height,
width: width,
channels: channels,
mat: gocv.NewMatWithSizes(shape, matType),
inputTypeFloat32: inputTypeFloat32,
matType: matType,
matCnt: 0,
imgSize: height * width * channels,
}
}
// Add a Mat to the batch
func (b *Batch) Add(img gocv.Mat) error {
// check if batch is full
if b.matCnt >= b.size {
return fmt.Errorf("batch full")
}
res := b.addAt(b.matCnt, img)
if res != nil {
return res
}
// increment image counter
b.matCnt++
return nil
}
// AddAt adds a Mat to the batch at the specific index location
func (b *Batch) AddAt(idx int, img gocv.Mat) error {
if idx < 0 || idx >= b.size {
return fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
}
return b.addAt(idx, img)
}
// addAt adds a Mat to the specified index location
func (b *Batch) addAt(idx int, img gocv.Mat) error {
// validate mat dimensions
if img.Rows() != b.height || img.Cols() != b.width ||
img.Channels() != b.channels {
return fmt.Errorf("image does not match batch shape")
}
if !img.IsContinuous() {
img = img.Clone()
}
if b.inputTypeFloat32 {
// pointer of the batch mat
dstAll, err := b.mat.DataPtrFloat32()
if err != nil {
return fmt.Errorf("error accessing float32 batch memory: %w", err)
}
src, err := img.DataPtrFloat32()
if err != nil {
return fmt.Errorf("error getting float32 data from image: %w", err)
}
offset := idx * b.imgSize
copy(dstAll[offset:], src)
} else {
// pointer of the batch mat
dstAll, err := b.mat.DataPtrUint8()
if err != nil {
return fmt.Errorf("error accessing uint8 batch memory: %w", err)
}
src, err := img.DataPtrUint8()
if err != nil {
return fmt.Errorf("error getting uint8 data from image: %w", err)
}
offset := idx * b.imgSize
copy(dstAll[offset:], src)
}
return nil
}
// GetOutputInt returns the tensor output for the specified image number
// as an int8 output. idx starts counting from 1 to (batchsize-1)
func (b *Batch) GetOutputInt(idx int, outputs Output, size int) ([]int8, error) {
if idx < 0 || idx >= b.size {
return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
}
offset := idx * size
if offset+size > int(outputs.Size) {
return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size)
}
return outputs.BufInt[offset : offset+size], nil
}
// GetOutputF32 returns the tensor output for the specified image number
// as an float32 output. idx starts counting from 0 to (batchsize-1)
func (b *Batch) GetOutputF32(idx int, outputs Output, size int) ([]float32, error) {
if idx < 0 || idx >= b.size {
return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
}
offset := idx * size
if offset+size > int(outputs.Size) {
return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size)
}
return outputs.BufFloat[offset : offset+size], nil
}
// Mat returns the concatenated mat
func (b *Batch) Mat() gocv.Mat {
return b.mat
}
// Clear the batch so it can be reused again
func (b *Batch) Clear() {
// just reset the counter, we don't need to clear the underlying b.mat
// as it will be overwritten with Add() is called with new images
b.matCnt = 0
}
// Close the batch and free allocated memory
func (b *Batch) Close() error {
return b.mat.Close()
}

329
batch_test.go Normal file
View File

@@ -0,0 +1,329 @@
package rknnlite
import (
"errors"
"flag"
"fmt"
"gocv.io/x/gocv"
"path/filepath"
"regexp"
"strconv"
"strings"
"testing"
"time"
)
var modelFiles = flag.String("m", "osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn",
"RKNN compiled model files in format <name>-batch{N1,N2,...,Nk}.rknn")
var rkPlatform = flag.String("p", "rk3588",
"Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]")
// ExpandModelPattern takes a pattern like
//
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn"
//
// and returns:
//
// []string{
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch1.rknn",
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch4.rknn",
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch8.rknn",
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch16.rknn",
// }
func expandModelPattern(pattern string) ([]modelBatches, error) {
// split off the directory and file
dir, file := filepath.Split(pattern)
// match exactly "<prefix>-batch{n1,n2,...}.rknn"
re := regexp.MustCompile(`^(.+)-batch\{([\d,]+)\}\.rknn$`)
m := re.FindStringSubmatch(file)
if m == nil {
return nil, errors.New("invalid pattern: must be name-batch{n1,n2,...}.rknn")
}
prefix := m[1] // e.g. "osnet_x1_0_market_256x128-rk3588"
numsCSV := m[2] // e.g. "1,4,8,16"
nums := strings.Split(numsCSV, ",")
out := make([]modelBatches, 0, len(nums))
for _, strNum := range nums {
num, err := strconv.Atoi(strNum)
if err != nil {
return nil, fmt.Errorf("invalid batch size %q: %w", strNum, err)
}
name := fmt.Sprintf("%s-batch%d.rknn", prefix, num)
out = append(out, modelBatches{
batchSize: num,
modelFile: filepath.Join(dir, name),
})
}
return out, nil
}
type modelBatches struct {
batchSize int
modelFile string
}
// BenchmarkBatchSize runs benchmarks against multiple models to work out per
// image inference time.
func BenchmarkBatchSize(b *testing.B) {
flag.Parse()
// from the modelFiles argument create a table of model files and corresponding
// batch sizes
cases, err := expandModelPattern(*modelFiles)
if err != nil {
b.Fatalf("Invalid modelFile syntax: %v", err)
}
const (
height = 256
width = 128
channels = 3
)
for _, tc := range cases {
tc := tc // capture
b.Run(fmt.Sprintf("Batch%02d", tc.batchSize), func(b *testing.B) {
// load the RKNN model for this batch size
err := SetCPUAffinityByPlatform(*rkPlatform, FastCores)
if err != nil {
b.Fatalf("Failed to set CPU Affinity: %v", err)
}
// check if user specified model file or if default is being used. if default
// then pick the default platform model to use.
modelFile := tc.modelFile
if *rkPlatform != "rk3588" {
modelFile = strings.ReplaceAll(modelFile, "rk3588", *rkPlatform)
}
// create rknn runtime instance
rt, err := NewRuntimeByPlatform(*rkPlatform, modelFile)
if err != nil {
b.Fatalf("Error initializing RKNN runtime: %v", err)
}
defer rt.Close()
// set runtime to leave output tensors as int8
rt.SetWantFloat(false)
// prepare zero images
imgs := make([]gocv.Mat, tc.batchSize)
for i := range imgs {
m := gocv.Zeros(height, width, gocv.MatTypeCV8UC3)
defer m.Close()
imgs[i] = m
}
// pre-allocate the batch container
batch := NewBatch(tc.batchSize, height, width, channels, rt.inputTypeFloat32)
defer batch.Close()
b.ResetTimer()
var totalInf time.Duration
for i := 0; i < b.N; i++ {
batch.Clear()
start := time.Now()
for _, img := range imgs {
if err := batch.Add(img); err != nil {
b.Fatalf("Add() error: %v", err)
}
}
if _, err := rt.Inference([]gocv.Mat{batch.Mat()}); err != nil {
b.Fatalf("Inference() error: %v", err)
}
totalInf += time.Since(start)
}
b.StopTimer()
// milliseconds per batch
msBatch := float64(totalInf.Nanoseconds()) / 1e6 / float64(b.N)
b.ReportMetric(msBatch, "ms/batch")
// milliseconds per image
msImg := msBatch / float64(tc.batchSize)
b.ReportMetric(msImg, "ms/img")
})
}
}
func TestBatchAddAndOverflow(t *testing.T) {
r := &Runtime{inputTypeFloat32: false}
batch := NewBatch(2, 2, 3, 1, r.inputTypeFloat32)
defer batch.Close()
// create Mats with known data
m1 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
defer m1.Close()
buf1, _ := m1.DataPtrUint8()
for i := range buf1 {
buf1[i] = uint8(i + 1) // 1,2,3...6
}
m2 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
defer m2.Close()
buf2, _ := m2.DataPtrUint8()
for i := range buf2 {
buf2[i] = uint8((i + 1) * 10) // 10,20,...60
}
// Add two images
if err := batch.Add(m1); err != nil {
t.Fatalf("Add(m1) failed: %v", err)
}
if err := batch.Add(m2); err != nil {
t.Fatalf("Add(m2) failed: %v", err)
}
// Underlying batch mat should contain both
bMat := batch.Mat()
allData, err := bMat.DataPtrUint8()
if err != nil {
t.Fatalf("DataPtrUint8 on batch failed: %v", err)
}
// first 6 from buf1, next 6 from buf2
for i := 0; i < 6; i++ {
if allData[i] != buf1[i] {
t.Errorf("element %d = %d; want %d from img1", i, allData[i], buf1[i])
}
}
for i := 0; i < 6; i++ {
if allData[6+i] != buf2[i] {
t.Errorf("element %d = %d; want %d from img2", 6+i, allData[6+i], buf2[i])
}
}
// third Add should overflow
m3 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
err3 := batch.Add(m3)
if err3 == nil {
t.Fatal("expected overflow error on third Add, got nil")
}
}
func TestBatchAddAtAndClear(t *testing.T) {
r := &Runtime{inputTypeFloat32: false}
batch := NewBatch(3, 2, 2, 1, r.inputTypeFloat32)
defer batch.Close()
m := gocv.NewMatWithSize(2, 2, gocv.MatTypeCV8U)
defer m.Close()
dat, _ := m.DataPtrUint8()
for i := range dat {
dat[i] = uint8(i + 5)
}
// AddAt index 1
if err := batch.AddAt(1, m); err != nil {
t.Fatalf("AddAt failed: %v", err)
}
// matCnt should still be zero
if batch.matCnt != 0 {
t.Errorf("matCnt = %d; want 0 after AddAt", batch.matCnt)
}
// Clear resets matCnt
batch.Clear()
if batch.matCnt != 0 {
t.Errorf("matCnt = %d; want 0 after Clear", batch.matCnt)
}
// Add at invalid index
err := batch.AddAt(5, m)
if err == nil {
t.Error("expected error for AddAt out of range, got nil")
}
}
func TestGetOutputIntAndF32(t *testing.T) {
r := &Runtime{inputTypeFloat32: false}
batch := NewBatch(2, 2, 2, 1, r.inputTypeFloat32)
defer batch.Close()
// Test GetOutputInt bounds
dOut := Output{BufInt: []int8{1, 2, 3, 4}, Size: 4}
if _, err := batch.GetOutputInt(-1, dOut, 2); err == nil {
t.Error("expected error for GetOutputInt idx<0")
}
if _, err := batch.GetOutputInt(2, dOut, 2); err == nil {
t.Error("expected error for GetOutputInt idx>=size")
}
// valid slice
slice, err := batch.GetOutputInt(1, dOut, 2)
if err != nil {
t.Errorf("GetOutputInt failed: %v", err)
}
if len(slice) != 2 {
t.Errorf("len(slice) = %d; want 2", len(slice))
}
// Test GetOutputF32 bounds
dOutF := Output{BufFloat: []float32{1, 2, 3, 4}, Size: 4}
if _, err := batch.GetOutputF32(-1, dOutF, 2); err == nil {
t.Error("expected error for GetOutputF32 idx<0")
}
if _, err := batch.GetOutputF32(2, dOutF, 2); err == nil {
t.Error("expected error for GetOutputF32 idx>=size")
}
sliceF, err := batch.GetOutputF32(0, dOutF, 2)
if err != nil {
t.Errorf("GetOutputF32 failed: %v", err)
}
if len(sliceF) != 2 {
t.Errorf("len(sliceF) = %d; want 2", len(sliceF))
}
}

75
batchpool.go Normal file
View File

@@ -0,0 +1,75 @@
package rknnlite
import (
"sync"
)
// BatchPool is a pool of batches
type BatchPool struct {
// pool of batches
batches chan *Batch
// size of pool
size int
close sync.Once
}
// NewBatchPool returns a pool of Batches
func NewBatchPool(size int, rt *Runtime) *BatchPool {
p := &BatchPool{
batches: make(chan *Batch, size),
size: size,
}
batchSize := int(rt.InputAttrs()[0].Dims[0])
width := int(rt.InputAttrs()[0].Dims[1])
height := int(rt.InputAttrs()[0].Dims[2])
channels := int(rt.InputAttrs()[0].Dims[3])
inputType := rt.GetInputTypeFloat32()
// create batch pool to be the same size as the runtime pool
for i := 0; i < size; i++ {
batch := NewBatch(
batchSize,
height,
width,
channels,
inputType,
)
// attach to pool
p.Return(batch)
}
return p
}
// Gets a batch from the pool
func (p *BatchPool) Get() *Batch {
return <-p.batches
}
// Return a batch to the pool
func (p *BatchPool) Return(batch *Batch) {
batch.Clear()
select {
case p.batches <- batch:
default:
// pool is full or closed
}
}
// Close the pool and all batches in it
func (p *BatchPool) Close() {
p.close.Do(func() {
// close channel
close(p.batches)
// close all runtimes
for next := range p.batches {
_ = next.Close()
}
})
}

230
example/batch/README.md Normal file
View File

@@ -0,0 +1,230 @@
# Batch Models
## Overview
Typically computer vision inference models have a single input tensor in
the shape of `NHWC` such as `[1,224,224,3]`. The rknn-toolkit2 allows you to
build the model with Batch tensor inputs by setting the `rknn_batch_size` parameter
in the following python conversion script.
```
rknn.build(do_quantization=do_quant, dataset=DATASET_PATH, rknn_batch_size=8)
```
This results in a .rknn model with modified tensor input dimensions of `[8,224,244,3]`.
When taking input from a video source frame-by-frame, the use of batching to process
frames has little use case, as your only dealing with a single frame to be
processed as soon as possible. However batching can be useful if you have many
images to process at a single point in time, some examples of this could be;
* Running YOLO object detection on a frame, then passing all detected objects
through a ReIdentification model in batches.
* Some applications will buffer video frames and upon an external signal, it
will then trigger the processing of those buffered frames as a batch.
## Batch Sizing
The NPU's in the different platforms RK356x, RK3576, and RK3588 have different
amounts of SRAM and NPU core numbers, so finding the optimal batch size for your
Model is critical.
A benchmarking tool has been created to test different batch sizes of your own
RKNN Models. Use your python conversion script to compile the ONNX model to RKNN
with various `rknn_batch_size` values you would like to test. Name those RKNN
Models using this format `<name>-batch{N1,N2,...,Nk}.rknn`. For example I wish
to test batch sizes of 1, 4, 8, and 16 of an OSNet model and have created the
following files and placed them in the directory `/tmp/models` on the host OS.
```
osnet-batch1.rknn
osnet-batch4.rknn
osnet-batch8.rknn
osnet-batch16.rknn
```
We can then pass all these Models to the benchmark using the `-m` argument in
the format of `-m "/tmp/models/osnet-batch{1,4,8,16}"`.
To run the benchmark of your models on the rk3588 or replace with your
Platform model.
```
# from project root directory
go test -bench=BenchmarkBatchSize -benchtime=10s \
-args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}.rknn"
```
Similarly using Docker we can mount the `/tmp/models` directory and run.
```
# from project root directory
docker run --rm \
--device /dev/dri:/dev/dri \
-v "$(pwd):/go/src/app" \
-v "$(pwd)/example/data:/go/src/data" \
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
-v "/tmp/models/:/tmp/models/" \
-w /go/src/app \
swdee/go-rknnlite:latest \
go test -bench=BenchmarkBatchSize -benchtime=10s \
-args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}"
```
Running the above benchmark command outputs the following results.
#### rk3588
```
BenchmarkBatchSize/Batch01-8 1897 8806025 ns/op 8.806 ms/batch 8.806 ms/img
BenchmarkBatchSize/Batch04-8 885 21555109 ns/op 21.55 ms/batch 5.389 ms/img
BenchmarkBatchSize/Batch08-8 534 22335645 ns/op 22.34 ms/batch 2.792 ms/img
BenchmarkBatchSize/Batch16-8 303 40253162 ns/op 40.25 ms/batch 2.516 ms/img
```
#### rk3576
```
BenchmarkBatchSize/Batch01-8 1312 8987117 ns/op 8.985 ms/batch 8.985 ms/img
BenchmarkBatchSize/Batch04-8 640 18836090 ns/op 18.83 ms/batch 4.709 ms/img
BenchmarkBatchSize/Batch08-8 385 31702649 ns/op 31.70 ms/batch 3.963 ms/img
BenchmarkBatchSize/Batch16-8 194 63801596 ns/op 63.80 ms/batch 3.988 ms/img
```
#### rk3566
```
BenchmarkBatchSize/Batch01-4 661 18658568 ns/op 18.66 ms/batch 18.66 ms/img
BenchmarkBatchSize/Batch04-4 158 74716574 ns/op 74.71 ms/batch 18.68 ms/img
BenchmarkBatchSize/Batch08-4 70 155374027 ns/op 155.4 ms/batch 19.42 ms/img
BenchmarkBatchSize/Batch16-4 37 294969497 ns/op 295.0 ms/batch 18.44 ms/img
```
### Interpreting Benchmark Results
The `ms/batch` metric represents the number of milliseconds it took for the
whole batch inference to run and `ms/img` represents the average number of
milliseconds it took to run inference per image.
As can be seen in the rk3588 results the ideal batch size is 8 as it gives
a low `2.792` ms/img inference time versus total batch inference time of
`22.34ms`. The same applies to the rk3576.
The rk3566 has a single core NPU, the results show there is no benefit
in running batching at all.
These results were for an OSNet Model, it's possible that different Models perform
differently so you should run these benchmarks for your own application to
optimize accordingly.
## Usage
An example batch program is provided that combines inferencing on a Pool of runtimes,
make sure you have downloaded the data files first for the examples.
You only need to do this once for all examples.
```
cd example/
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
```
Run the batch example on rk3588 or replace with your Platform model.
```
cd example/batch
go run batch.go -s 3 -p rk3588
```
This will result in the output of:
```
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
Model Input Number: 1, Ouput Number: 1
Input tensors:
index=0, name=input, n_dims=4, dims=[8, 224, 224, 3], n_elems=1204224, size=1204224, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
Output tensors:
index=0, name=output, n_dims=2, dims=[8, 1000, 0, 0], n_elems=8000, size=8000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-55, scale=0.141923
Running...
File ../data/imagenet/n01514859_hen.JPEG, inference time 40ms
File ../data/imagenet/n01518878_ostrich.JPEG, inference time 40ms
File ../data/imagenet/n01530575_brambling.JPEG, inference time 40ms
File ../data/imagenet/n01531178_goldfinch.JPEG, inference time 40ms
...snip...
File ../data/imagenet/n13054560_bolete.JPEG, inference time 8ms
File ../data/imagenet/n13133613_ear.JPEG, inference time 8ms
File ../data/imagenet/n15075141_toilet_tissue.JPEG, inference time 8ms
Processed 1000 images in 2.098619346s, average inference per image is 2.10ms
```
See the help for command line parameters.
```
$ go run batch.go -h
Usage of /tmp/go-build1506342544/b001/exe/batch:
-d string
A directory of images to run inference on (default "../data/imagenet/")
-m string
RKNN compiled model file (default "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn")
-p string
Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588")
-q Run in quiet mode, don't display individual inference results
-r int
Repeat processing image directory the specified number of times, use this if you don't have enough images (default 1)
-s int
Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3 (default 1)
```
### Docker
To run the batch example using the prebuilt docker image, make sure the data files have been downloaded first,
then run.
```
# from project root directory
docker run --rm \
--device /dev/dri:/dev/dri \
-v "$(pwd):/go/src/app" \
-v "$(pwd)/example/data:/go/src/data" \
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
-w /go/src/app \
swdee/go-rknnlite:latest \
go run ./example/batch/batch.go -p rk3588 -s 3
```
## API
A convenience function `rknnlite.NewBatch()` is provided to concatenate individual
images into a single input tensor for the Model and then extract their results
from the combined outputs.
```
// create a new batch processor
batch := rt.NewBatch(batchSize, height, width, channels)
defer batch.Close()
for idx, file := range files {
// add files to the batch at the given index
batch.AddAt(idx, file)
// OR you can add images incrementally without specifying an index
batch.Add(file)
}
// pass the concatenated Mat to the runtime for inference
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
// then get a single image result by index
output, err := batch.GetOutputInt(4, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
```
See the full example code for more details.

222
example/batch/batch.go Normal file
View File

@@ -0,0 +1,222 @@
package main
import (
"flag"
"github.com/swdee/go-rknnlite"
"gocv.io/x/gocv"
"image"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
var (
// model input tensor dimensions, these values will be set
// when runtime queries the modelFile being loaded
height, width, channels, batchSize int
)
func main() {
// disable logging timestamps
log.SetFlags(0)
// read in cli flags
modelFile := flag.String("m", "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn", "RKNN compiled model file")
imgDir := flag.String("d", "../data/imagenet/", "A directory of images to run inference on")
poolSize := flag.Int("s", 1, "Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3")
repeat := flag.Int("r", 1, "Repeat processing image directory the specified number of times, use this if you don't have enough images")
quiet := flag.Bool("q", false, "Run in quiet mode, don't display individual inference results")
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
flag.Parse()
// set cpu affinity to run on specific CPU cores
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
if err != nil {
log.Printf("Failed to set CPU Affinity: %v\n", err)
}
// check dir exists
info, err := os.Stat(*imgDir)
if err != nil {
log.Fatalf("No such image directory %s, error: %v\n", *imgDir, err)
}
if !info.IsDir() {
log.Fatal("Image path is not a directory")
}
// check if user specified model file or if default is being used. if default
// then pick the default platform model to use.
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
}
// create new pool, we pass NPUCoreAuto as RKNN does not allow batch Models
// to be pinned to specific NPU cores
useCore := rknnlite.NPUCoreAuto
if strings.HasPrefix(strings.ToLower(*rkPlatform), "rk356") {
useCore = rknnlite.NPUSkipSetCore
}
pool, err := rknnlite.NewPool(*poolSize, *modelFile,
[]rknnlite.CoreMask{useCore})
if err != nil {
log.Fatalf("Error creating RKNN pool: %v\n", err)
}
// set runtime to leave output tensors as int8
pool.SetWantFloat(false)
// get a runtime and query the input tensor dimensions of the model
rt := pool.Get()
// optional querying of model file tensors and SDK version for printing
// to stdout. not necessary for production inference code
err = rt.Query(os.Stdout)
if err != nil {
log.Fatal("Error querying runtime: ", err)
}
batchSize = int(rt.InputAttrs()[0].Dims[0])
width = int(rt.InputAttrs()[0].Dims[1])
height = int(rt.InputAttrs()[0].Dims[2])
channels = int(rt.InputAttrs()[0].Dims[3])
pool.Return(rt)
// get list of all files in the directory
entries, err := os.ReadDir(*imgDir)
if err != nil {
log.Fatalf("Error reading image directory: %v\n", err)
}
var files []string
for _, e := range entries {
if e.IsDir() {
continue
}
files = append(files, filepath.Join(*imgDir, e.Name()))
}
log.Println("Running...")
// waitgroup used to wait for all go-routines to complete before closing
// the pool
const batchSize = 8
var wg sync.WaitGroup
start := time.Now()
// repeat processing image set the specified number of times
for i := 0; i < *repeat; i++ {
// process image files in groups of batchSize
for offset := 0; offset < len(files); offset += batchSize {
end := offset + batchSize
if end > len(files) {
end = len(files)
}
subset := files[offset:end]
// pool.Get() blocks if no runtimes are available in the pool
rt := pool.Get()
wg.Add(1)
go func(rt *rknnlite.Runtime, batchPaths []string) {
defer wg.Done()
processBatch(rt, batchPaths, *quiet)
pool.Return(rt)
}(rt, subset)
}
}
wg.Wait()
// calculate average inference
numFiles := (*repeat * len(files))
end := time.Since(start)
avg := (end.Seconds() / float64(numFiles)) * 1000
log.Printf("Processed %d images in %s, average inference per image is %.2fms\n",
numFiles, end.String(), avg)
pool.Close()
}
func processBatch(rt *rknnlite.Runtime, paths []string, quiet bool) {
// create batch
batch := rknnlite.NewBatch(batchSize, height, width, channels,
rt.GetInputTypeFloat32())
defer batch.Close()
// for each image path, load & preprocess, then Add to batch
for idx, file := range paths {
img := gocv.IMRead(file, gocv.IMReadColor)
if img.Empty() {
log.Printf("Error reading %s\n", file)
continue
}
defer img.Close()
// rgb + resize
rgbImg := gocv.NewMat()
gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB)
defer rgbImg.Close()
cropImg := gocv.NewMat()
gocv.Resize(rgbImg, &cropImg, image.Pt(width, height), 0, 0, gocv.InterpolationArea)
defer cropImg.Close()
if err := batch.AddAt(idx, cropImg); err != nil {
log.Printf("Batch.Add error: %v\n", err)
}
}
// run inference on the entire batch at once
start := time.Now()
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
spent := time.Since(start)
if err != nil {
log.Printf("Inference error: %v\n", err)
return
}
defer outputs.Free()
// unpack per image results
for idx := 0; idx < len(paths); idx++ {
if quiet {
continue
}
// get int8 output tensor for image at idx
_, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
log.Printf("GetOutputInt[%d] error: %v\n", idx, err)
continue
}
log.Printf("File %s, inference time %dms\n", paths[idx], spent.Milliseconds())
}
}

188
example/reid/README.md Normal file
View File

@@ -0,0 +1,188 @@
# Re-Identification (ReID)
## Overview
Object trackers like ByteTrack can be used to track visible objects frametoframe,
but they rely on the assumption that an object's appearance and location change
smoothly over time. If a person goes behind a building or is briefly hidden
by another passerby, the tracker can lose that objects identity. When that same
person reemerges, the tracker often treats them as a new object, assigning a new ID.
This makes analyzing a persons complete path through a scene difficult
or makes counting unique objects much harder.
Re-Identification (ReID) models help solve this problem by using embedding features
which encode an object into a fixed length vector that captures distinctive
patterns, shapes, or other visual signatures. When an object disappears and
then reappears you can compare the newly detected objects embedding against a list of
past objects. If the similarity (using Cosine or Euclidean distance)
exceeds a chosen threshold, you can confidently link the new detection back to the
original track ID.
## Datasets
The [OSNet model](https://paperswithcode.com/paper/omni-scale-feature-learning-for-person-re) is
lite weight and provides good accuracy for reidentification tasks, however
it must be trained using a dataset to identify specific object classes.
This example uses the [Market1501](https://paperswithcode.com/dataset/market-1501)
dataset trained for reidentifying people.
To support other object classifications such as Vehicles, Faces, or Animals, you
will need to source and train these accordingly.
## Occlusion Example
In the [people walking video](https://github.com/swdee/go-rknnlite-data/raw/master/people-walking.mp4)
a lady wearing a CK branded jacket starts
in the beginning of the scene and becomes occluded by passersby. When she reappears Bytetrack
detects them as a new person.
![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/docimg/reid-ck-lady-movement.jpg)
## Usage
Make sure you have downloaded the data files first for the examples.
You only need to do this once for all examples.
```
cd example/
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
```
Command line Usage.
```
$ go run reid.go -h
Usage of /tmp/go-build147978858/b001/exe/reid:
-d string
Data file containing object co-ordinates (default "../data/reid-objects.dat")
-e float
The Euclidean distance [0.0-1.0], a value less than defines a match (default 0.51)
-i string
Image file to run inference on (default "../data/reid-walking.jpg")
-m string
RKNN compiled model file (default "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn")
-p string
Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588")
```
Run the ReID example on rk3588 or replace with your Platform model.
```
cd example/reid/
go run reid.go -p rk3588
```
This will result in the output of:
```
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
Model Input Number: 1, Ouput Number: 1
Input tensors:
index=0, name=input, n_dims=4, dims=[8, 256, 128, 3], n_elems=786432, size=786432, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
Output tensors:
index=0, name=output, n_dims=2, dims=[8, 512, 0, 0], n_elems=4096, size=4096, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.018782
Comparing object 0 at (0,0,134,361)
Object 0 at (0,0,134,361) has euclidean distance: 0.000000 (same person)
Object 1 at (134,0,251,325) has euclidean distance: 0.423271 (same person)
Object 2 at (251,0,326,208) has euclidean distance: 0.465061 (same person)
Object 3 at (326,0,394,187) has euclidean distance: 0.445583 (same person)
Comparing object 1 at (394,0,513,357)
Object 0 at (0,0,134,361) has euclidean distance: 0.781510 (different person)
Object 1 at (134,0,251,325) has euclidean distance: 0.801649 (different person)
Object 2 at (251,0,326,208) has euclidean distance: 0.680299 (different person)
Object 3 at (326,0,394,187) has euclidean distance: 0.686542 (different person)
Comparing object 2 at (513,0,588,246)
Object 0 at (0,0,134,361) has euclidean distance: 0.860921 (different person)
Object 1 at (134,0,251,325) has euclidean distance: 0.873663 (different person)
Object 2 at (251,0,326,208) has euclidean distance: 0.870753 (different person)
Object 3 at (326,0,394,187) has euclidean distance: 0.820761 (different person)
Comparing object 3 at (588,0,728,360)
Object 0 at (0,0,134,361) has euclidean distance: 0.762738 (different person)
Object 1 at (134,0,251,325) has euclidean distance: 0.800668 (different person)
Object 2 at (251,0,326,208) has euclidean distance: 0.763694 (different person)
Object 3 at (326,0,394,187) has euclidean distance: 0.769597 (different person)
Model first run speed: batch preparation=3.900093ms, inference=47.935686ms, post processing=262.203µs, total time=52.097982ms
done
```
### Docker
To run the ReID example using the prebuilt docker image, make sure the data files have been downloaded first,
then run.
```
# from project root directory
docker run --rm \
--device /dev/dri:/dev/dri \
-v "$(pwd):/go/src/app" \
-v "$(pwd)/example/data:/go/src/data" \
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
-w /go/src/app \
swdee/go-rknnlite:latest \
go run ./example/reid/reid.go -p rk3588
```
### Interpreting Results
The above example uses people detected with a YOLOv5 model and then cropped to
create the sample input.
![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/reid-walking.jpg)
Objects A1 to A4 represent the same person and objects B1, C1, and D1 are other
people from the same scene.
The first set of comparisons:
```
Comparing object 0 [A1] at (0,0,134,361)
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.000000 (same person)
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.423271 (same person)
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.465061 (same person)
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.445583 (same person)
```
Object 0 is A1, when compared to itself it has a euclidean distance of 0.0.
Objects 1-3 are A2 to A4, each of these have a similar
distance ranging from 0.42 to 0.46.
A euclidean distance range is from 0.0 (same object) to 1.0 (different object), so
the lower the distance the more similar the object is. A threshold of `0.51`
is used to define what the maximum distance can be for the object to be considered
the same or different. Your use case and datasets may require calibration of
the ideal threshold.
The remaining results compare the people B1, C1, and D1.
```
Comparing object 1 [B1] at (394,0,513,357)
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.781510 (different person)
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.801649 (different person)
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.680299 (different person)
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.686542 (different person)
Comparing object 2 [C1] at (513,0,588,246)
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.860921 (different person)
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.873663 (different person)
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.870753 (different person)
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.820761 (different person)
Comparing object 3 [D1] at (588,0,728,360)
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.762738 (different person)
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.800668 (different person)
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.763694 (different person)
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.769597 (different person)
```
All of these other people have a euclidean distance greater than 0.68 indicating
they are different people.
## Postprocessing
[Convenience functions](https://github.com/swdee/go-rknnlite-data/raw/master/postprocess/reid.go)
are provided for calculating the Euclidean Distance or Cosine Similarity
depending on how the Model has been trained.

524
example/reid/reid.go Normal file
View File

@@ -0,0 +1,524 @@
package main
import (
"bufio"
"flag"
"fmt"
"github.com/swdee/go-rknnlite"
"github.com/swdee/go-rknnlite/postprocess/reid"
"gocv.io/x/gocv"
"image"
"log"
"os"
"strconv"
"strings"
"time"
)
func main() {
// disable logging timestamps
log.SetFlags(0)
// read in cli flags
modelFile := flag.String("m", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled model file")
imgFile := flag.String("i", "../data/reid-walking.jpg", "Image file to run inference on")
objsFile := flag.String("d", "../data/reid-objects.dat", "Data file containing object co-ordinates")
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
euDist := flag.Float64("e", 0.51, "The Euclidean distance [0.0-1.0], a value less than defines a match")
flag.Parse()
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
if err != nil {
log.Printf("Failed to set CPU Affinity: %v", err)
}
// check if user specified model file or if default is being used. if default
// then pick the default platform model to use.
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
}
// create rknn runtime instance
rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile)
if err != nil {
log.Fatal("Error initializing RKNN runtime: ", err)
}
// set runtime to leave output tensors as int8
rt.SetWantFloat(false)
// optional querying of model file tensors and SDK version for printing
// to stdout. not necessary for production inference code
err = rt.Query(os.Stdout)
if err != nil {
log.Fatal("Error querying runtime: ", err)
}
// load objects file
objs, err := ParseObjects(*objsFile)
if err != nil {
log.Fatal("Error parsing objects: ", err)
}
// load image
img := gocv.IMRead(*imgFile, gocv.IMReadColor)
if img.Empty() {
log.Fatal("Error reading image from: ", *imgFile)
}
// convert colorspace
srcImg := gocv.NewMat()
gocv.CvtColor(img, &srcImg, gocv.ColorBGRToRGB)
defer img.Close()
defer srcImg.Close()
start := time.Now()
// create a batch to process all images in the compare and dataset's
// in a single forward pass
batch := rknnlite.NewBatch(
int(rt.InputAttrs()[0].Dims[0]),
int(rt.InputAttrs()[0].Dims[2]),
int(rt.InputAttrs()[0].Dims[1]),
int(rt.InputAttrs()[0].Dims[3]),
rt.GetInputTypeFloat32(),
)
// scale size is the size of the input tensor dimensions to scale the object too
scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
// add the compare images to the batch
for _, cmpObj := range objs.Compare {
err := AddObjectToBatch(batch, srcImg, cmpObj, scaleSize)
if err != nil {
log.Fatal("Error creating batch: ", err)
}
}
// add the dataset images to the batch
for _, dtObj := range objs.Dataset {
err := AddObjectToBatch(batch, srcImg, dtObj, scaleSize)
if err != nil {
log.Fatal("Error creating batch: ", err)
}
}
defer batch.Close()
endBatch := time.Now()
// run inference on the batch
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
endInference := time.Now()
if err != nil {
log.Fatal("Runtime inferencing failed with error: ", err)
}
// get total number of compare objects
totalCmp := len(objs.Compare)
// compare each object to those objects in the dataset for similarity
for i, cmpObj := range objs.Compare {
// get the compare objects output
cmpOutput, err := batch.GetOutputInt(i, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
log.Fatal("Getting output tensor failed with error: ", err)
}
log.Printf("Comparing object %d at (%d,%d,%d,%d)\n", i,
cmpObj.X1, cmpObj.Y1, cmpObj.X2, cmpObj.Y2)
for j, dtObj := range objs.Dataset {
// get each objects outputs
nextOutput, err := batch.GetOutputInt(totalCmp+j, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
log.Fatal("Getting output tensor failed with error: ", err)
}
dist := CompareObjects(
cmpOutput,
nextOutput,
outputs.OutputAttributes().Scales[0],
outputs.OutputAttributes().ZPs[0],
)
// check euclidean distance to determine match of same person or not
objRes := "different person"
if dist < float32(*euDist) {
objRes = "same person"
}
log.Printf(" Object %d at (%d,%d,%d,%d) has euclidean distance: %f (%s)\n",
j,
dtObj.X1, dtObj.Y1, dtObj.X2, dtObj.Y2,
dist, objRes)
}
}
endCompare := time.Now()
log.Printf("Model first run speed: batch preparation=%s, inference=%s, post processing=%s, total time=%s\n",
endBatch.Sub(start).String(),
endInference.Sub(endBatch).String(),
endCompare.Sub(endInference).String(),
endCompare.Sub(start).String(),
)
// free outputs allocated in C memory after you have finished post processing
err = outputs.Free()
if err != nil {
log.Fatal("Error freeing Outputs: ", err)
}
// close runtime and release resources
err = rt.Close()
if err != nil {
log.Fatal("Error closing RKNN runtime: ", err)
}
log.Println("done")
/*
//CompareObject(rt, srcImg, cmpObj, objs.Dataset)
//rgbImg := img.Clone()
frameWidth := 67
frameHeight := 177
roiRect1 := image.Rect(497, 195, 497+frameWidth, 195+frameHeight)
// cklady
//roiRect1 := image.Rect(0, 0, 134, 361)
roiImg1 := rgbImg.Region(roiRect1)
cropImg1 := rgbImg.Clone()
scaleSize1 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
gocv.Resize(roiImg1, &cropImg1, scaleSize1, 0, 0, gocv.InterpolationArea)
defer img.Close()
defer rgbImg.Close()
defer cropImg1.Close()
defer roiImg1.Close()
gocv.IMWrite("/tmp/frame-master.jpg", cropImg1)
batch := rt.NewBatch(
int(rt.InputAttrs()[0].Dims[0]),
int(rt.InputAttrs()[0].Dims[2]),
int(rt.InputAttrs()[0].Dims[1]),
int(rt.InputAttrs()[0].Dims[3]),
)
err = batch.Add(cropImg1)
if err != nil {
log.Fatal("Error creating batch: ", err)
}
defer batch.Close()
// perform inference on image file
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
if err != nil {
log.Fatal("Runtime inferencing failed with error: ", err)
}
output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
log.Fatal("Getting output tensor failed with error: ", err)
}
fingerPrint := DequantizeAndL2Normalize(
output,
outputs.OutputAttributes().Scales[0],
outputs.OutputAttributes().ZPs[0],
)
// seed the EMA fingerprint to the master
emaFP := make([]float32, len(fingerPrint))
copy(emaFP, fingerPrint)
const alpha = 0.9 // smoothing factor
hash, err := FingerprintHash(fingerPrint)
if err != nil {
log.Fatalf("hashing failed: %v", err)
}
log.Println("object fingerprint:", hash)
// free outputs allocated in C memory after you have finished post processing
err = outputs.Free()
if err != nil {
log.Fatal("Error freeing Outputs: ", err)
}
// sample 2 images
yOffsets := []int{1, 195, 388}
xOffsets := []int{497, 565, 633, 701, 769, 836, 904}
images := [][]int{}
for _, ny := range yOffsets {
for _, nx := range xOffsets {
images = append(images, []int{nx, ny})
}
}
// ck lady
// images := [][]int{
// {134, 0, 117, 325},
// {251, 0, 75, 208},
// {326, 0, 68, 187},
// }
// Image 2
for frame, next := range images {
roiRect2 := image.Rect(next[0], next[1], next[0]+frameWidth, next[1]+frameHeight)
// ck lady
//roiRect2 := image.Rect(next[0], next[1], next[0]+next[2], next[1]+next[3])
roiImg2 := rgbImg.Region(roiRect2)
cropImg2 := rgbImg.Clone()
scaleSize2 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
gocv.Resize(roiImg2, &cropImg2, scaleSize2, 0, 0, gocv.InterpolationArea)
defer cropImg2.Close()
defer roiImg2.Close()
gocv.IMWrite(fmt.Sprintf("/tmp/frame-%d.jpg", frame), cropImg2)
start := time.Now()
batch.Clear()
err = batch.Add(cropImg2)
if err != nil {
log.Fatal("Error creating batch: ", err)
}
outputs, err = rt.Inference([]gocv.Mat{batch.Mat()})
if err != nil {
log.Fatal("Runtime inferencing failed with error: ", err)
}
endInference := time.Now()
output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
log.Fatal("Getting output tensor failed with error: ", err)
}
fingerPrint2 := DequantizeAndL2Normalize(
output,
outputs.OutputAttributes().Scales[0],
outputs.OutputAttributes().ZPs[0],
)
// sim := CosineSimilarity(fingerPrint, fingerPrint2)
// dist := CosineDistance(fingerPrint, fingerPrint2)
// fmt.Printf("Frame %d, cosine similarity: %f, distance=%f\n", frame, sim, dist)
// compute Euclidean (L2) distance directly
dist := EuclideanDistance(fingerPrint, fingerPrint2)
// 3) compute vs EMA
emaDist := EuclideanDistance(emaFP, fingerPrint2)
endDetect := time.Now()
objRes := "different person"
if emaDist < 0.51 {
objRes = "same person"
}
fmt.Printf("Frame %d, euclidean distance: %f, ema=%f (%s)\n", frame, dist, emaDist, objRes)
log.Printf(" Inference=%s, detect=%s, total time=%s\n",
endInference.Sub(start).String(),
endDetect.Sub(endInference).String(),
endDetect.Sub(start).String(),
)
// free outputs allocated in C memory after you have finished post processing
err = outputs.Free()
if err != nil {
log.Fatal("Error freeing Outputs: ", err)
}
// 4) update the EMA fingerprint
if frame >= 7 && frame <= 13 {
// emaFP = α*emaFP + (1-α)*fp2
for i := range emaFP {
emaFP[i] = alpha*emaFP[i] + (1-alpha)*fingerPrint2[i]
}
// 5) renormalize emaFP back to unit length
var sum float32
for _, v := range emaFP {
sum += v * v
}
norm := float32(math.Sqrt(float64(sum)))
if norm > 0 {
for i := range emaFP {
emaFP[i] /= norm
}
}
}
}
// close runtime and release resources
err = rt.Close()
if err != nil {
log.Fatal("Error closing RKNN runtime: ", err)
}
log.Println("done")
*/
}
// Box holds object bounding box coordinates (x1, y1, x2, y2)
type Box struct {
X1, Y1, X2, Y2 int
}
// Objects is a struct to represent the compare and dataset objects parsed
// from the objects data file
type Objects struct {
Compare []Box
Dataset []Box
}
// ParseObjects reads the TOML-like objects data file returns the two lists
// of objects and their bounding box coordinates
func ParseObjects(path string) (*Objects, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
objs := &Objects{}
section := "" // either "compare" or "dataset"
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// skip blank or comment
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// section header
if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
section = strings.ToLower(line[1 : len(line)-1])
continue
}
// data line, expect four ints separated by commas
fields := strings.Split(line, ",")
if len(fields) != 4 {
return nil, fmt.Errorf("invalid data line %q", line)
}
nums := make([]int, 4)
for i, fstr := range fields {
v, err := strconv.Atoi(strings.TrimSpace(fstr))
if err != nil {
return nil, fmt.Errorf("parsing %q: %w", fstr, err)
}
nums[i] = v
}
// define box
box := Box{nums[0], nums[1], nums[2], nums[3]}
switch section {
case "compare":
objs.Compare = append(objs.Compare, box)
case "dataset":
objs.Dataset = append(objs.Dataset, box)
default:
return nil, fmt.Errorf("line %q outside of a known section", line)
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return objs, nil
}
// AddObjectToBatch adds the cropped object from source image to the batch for
// running inference on
func AddObjectToBatch(batch *rknnlite.Batch, srcImg gocv.Mat, obj Box,
scaleSize image.Point) error {
// get the objects region of interest from source Mat
objRect := image.Rect(obj.X1, obj.Y1, obj.X2, obj.Y2)
objRoi := srcImg.Region(objRect)
objImg := objRoi.Clone()
gocv.Resize(objRoi, &objImg, scaleSize, 0, 0, gocv.InterpolationArea)
defer objRoi.Close()
defer objImg.Close()
return batch.Add(objImg)
}
// CompareObjects compares the outputs of two objects
func CompareObjects(objA []int8, objB []int8, scales float32,
ZPs int32) float32 {
// get the fingerprint of both objects
fpA := reid.DequantizeAndL2Normalize(objA, scales, ZPs)
fpB := reid.DequantizeAndL2Normalize(objB, scales, ZPs)
// compute Euclidean (L2) distance directly
return reid.EuclideanDistance(fpA, fpB)
}

View File

@@ -313,6 +313,19 @@ itself is not 100%. Whilst this demo shows a complete solution, you would stil
need to do work to train a better model and testing for your own use case.
## Re-Identification (ReID)
Experimental ReID has been added which follows the implementation of the
[FairMOT](https://github.com/FoundationVision/ByteTrack/tree/main/tutorials/fairmot) tracker,
however this makes use of the OSNet model trained with the Market1501 dataset.
Usage of ReID is expensive and typically takes around 200ms per frame to complete
on the RK3588 NPU. There is little accuracy improvement over straight ByteTrack
which adds little overhead to the YOLO object detection.
We need to wait for Rockchips next generation RK36xx SoC before this may be useful.
## Background
The ByteTrack code is a Go conversion of the [C++ project](https://github.com/ifzhang/ByteTrack).

View File

@@ -127,18 +127,27 @@ type Demo struct {
// renderFormat indicates which rendering type to use with instance
// segmentation, outline or mask
renderFormat string
// reidModelFile is the model to use ReID with
reidModelFile string
// reid is a flag to inidicate if reid is being used or not
reid bool
// reidPool of rknnlite runtimes to perform inference in parallel
reidPool *rknnlite.Pool
}
// NewDemo returns and instance of Demo, a streaming HTTP server showing
// video with object detection
func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
modelType string, renderFormat string, rkPlatform string) (*Demo, error) {
modelType string, renderFormat string, rkPlatform string,
reidModelFile string, useReid bool) (*Demo, error) {
var err error
d := &Demo{
vidSrc: vidSrc,
limitObjs: make([]string, 0),
vidSrc: vidSrc,
limitObjs: make([]string, 0),
reidModelFile: reidModelFile,
reid: useReid,
}
if vidSrc.Format == VideoFile {
@@ -220,6 +229,15 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
log.Printf("***WARNING*** %s only has 1 TOPS NPU, downgraded to %d FPS\n", rkPlatform, FPS)
}
if d.reid {
if strings.EqualFold(rkPlatform[:5], "rk356") {
log.Fatal("***WARNING*** ReID is unavailable for RK356x platforms as the 1 TOPS NPU is not powerful enough")
}
FPS = 4
FPSinterval = time.Duration(float64(time.Second) / float64(FPS))
log.Println("***WARNING*** ReID is experimental and requires alot of NPU, downgraded to 4 FPS")
}
// load in Model class names
d.labels, err = rknnlite.LoadLabels(labelFile)
@@ -227,6 +245,19 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
return nil, fmt.Errorf("Error loading model labels: %w", err)
}
// create pool for ReID
if d.reid {
d.reidPool, err = rknnlite.NewPool(poolSize, reidModelFile,
[]rknnlite.CoreMask{rknnlite.NPUCoreAuto})
if err != nil {
log.Fatalf("Error creating ReID RKNN pool: %v\n", err)
}
// set runtime to leave output tensors as int8
d.reidPool.SetWantFloat(false)
}
return d, nil
}
@@ -360,6 +391,10 @@ func (d *Demo) Stream(w http.ResponseWriter, r *http.Request) {
// record of past object detections for tracking
byteTrack := tracker.NewBYTETracker(FPS, FPS*10, 0.5, 0.6, 0.8)
if d.reid {
byteTrack.UseReID(d.reidPool, tracker.Euclidean, 0.51)
}
// create a trails history
trail := tracker.NewTrail(90)
@@ -491,9 +526,18 @@ func (d *Demo) ProcessFrame(img gocv.Mat, retChan chan<- ResultFrame,
// track detected objects
timing.TrackerStart = time.Now()
trackObjs, err := byteTrack.Update(
postprocess.DetectionsToObjects(detectResults),
)
var trackObjs []*tracker.STrack
if d.reid {
trackObjs, err = byteTrack.UpdateWithFrame(
postprocess.DetectionsToObjects(detectResults),
resImg,
)
} else {
trackObjs, err = byteTrack.Update(
postprocess.DetectionsToObjects(detectResults),
)
}
timing.TrackerEnd = time.Now()
@@ -713,6 +757,8 @@ func main() {
renderFormat := flag.String("r", "outline", "The rendering format used for instance segmentation [outline|mask]")
codecFormat := flag.String("codec", "mjpg", "Web Camera codec The rendering format [mjpg|yuyv]")
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
reidModelFile := flag.String("rm", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled OSNet/Re-Identification model file")
useReid := flag.Bool("reid", false, "Enable Re-Identification enhanced tracking")
// Initialize the custom camera resolution flag with a default value
cameraRes := &cameraResFlag{value: "1280x720@30"}
@@ -760,8 +806,12 @@ func main() {
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
}
if f := flag.Lookup("rm"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
*reidModelFile = strings.ReplaceAll(*reidModelFile, "rk3588", *rkPlatform)
}
demo, err := NewDemo(vidSrc, *modelFile, *labelFile, *poolSize,
*modelType, *renderFormat, *rkPlatform)
*modelType, *renderFormat, *rkPlatform, *reidModelFile, *useReid)
if err != nil {
log.Fatalf("Error creating demo: %v", err)

View File

@@ -60,7 +60,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
Index: uint32(idx),
Type: TensorFloat32,
// multiply by 4 for size of float32
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels() * 4),
Size: uint32(len(data) * 4), // bytes = elements * 4
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,
@@ -77,7 +77,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
inputs[idx] = Input{
Index: uint32(idx),
Type: TensorUint8,
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels()),
Size: uint32(len(data)), // bytes = elements
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,

View File

@@ -109,6 +109,11 @@ func (p *Pool) SetWantFloat(val bool) {
}
}
// Size returns the Pool size
func (p *Pool) Size() int {
return p.size
}
// getRuntimeCore takes an integer and returns the core mask value to use from
// the coremask list
func getRuntimeCore(i int, cores []CoreMask) CoreMask {

129
postprocess/reid/reid.go Normal file
View File

@@ -0,0 +1,129 @@
package reid
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"math"
)
// DequantizeAndL2Normalize converts a quantized int8 vector "q" into a float32 vector,
// applies dequantization using the provided scale "s" and zero-point "z",
// and then normalizes the result to unit length using L2 normalization.
//
// This is commonly used to convert quantized embedding vectors back to a
// normalized float form for comparison or similarity calculations.
//
// If the resulting vector has zero magnitude, the function returns the
// unnormalized dequantized vector.
func DequantizeAndL2Normalize(q []int8, s float32, z int32) []float32 {
N := len(q)
x := make([]float32, N)
// dequantize
for i := 0; i < N; i++ {
x[i] = float32(int32(q[i])-z) * s
}
// compute L2 norm
var sumSquares float32
for _, v := range x {
sumSquares += v * v
}
norm := float32(math.Sqrt(float64(sumSquares)))
if norm == 0 {
// avoid /0
return x
}
// normalize
for i := 0; i < N; i++ {
x[i] /= norm
}
return x
}
// FingerprintHash takes an L2-normalized []float32 and returns
// a hex-encoded SHA-256 hash of its binary representation.
func FingerprintHash(feat []float32) (string, error) {
buf := new(bytes.Buffer)
// write each float32 in littleendian
for _, v := range feat {
if err := binary.Write(buf, binary.LittleEndian, v); err != nil {
return "", err
}
}
sum := sha256.Sum256(buf.Bytes())
return hex.EncodeToString(sum[:]), nil
}
// CosineSimilarity returns the cosine of the angle between vectors a and b.
// Assumes len(a)==len(b). If you have already L2normalized them,
// this is just their dot-product.
func CosineSimilarity(a, b []float32) float32 {
var dot float32
for i := range a {
dot += a[i] * b[i]
}
// If not already normalized, youd divide by norms here.
return dot
}
// CosineDistance returns 1 cosine similarity, which is a proper distance metric
// in [0,2]. For L2-normalized vectors this is in [0,2], and small values mean
// "very similar."
func CosineDistance(a, b []float32) float32 {
return 1 - CosineSimilarity(a, b)
}
// EuclideanDistance returns the L2 distance between two vectors.
// Lower means "more similar" when your features are L2-normalized.
func EuclideanDistance(a, b []float32) float32 {
var sum float32
for i := range a {
d := a[i] - b[i]
sum += d * d
}
return float32(math.Sqrt(float64(sum)))
}
// NormalizeVec normalizes the input float32 slice to unit length and returns
// a new slice. If the input vector has zero magnitude, it returns the original
// slice unchanged.
func NormalizeVec(v []float32) []float32 {
norm := float32(0.0)
for _, x := range v {
norm += x * x
}
if norm == 0 {
return v // avoid division by zero
}
norm = float32(math.Sqrt(float64(norm)))
out := make([]float32, len(v))
for i, x := range v {
out[i] = x / norm
}
return out
}

View File

@@ -241,6 +241,11 @@ func (r *Runtime) SetInputTypeFloat32(val bool) {
r.inputTypeFloat32 = val
}
// GetInputTypeFloat32 returns the input type if set as Float32 (true) or Int8 (false)
func (r *Runtime) GetInputTypeFloat32() bool {
return r.inputTypeFloat32
}
// SDKVersion represents the C.rknn_sdk_version struct
type SDKVersion struct {
DriverVersion string

View File

@@ -73,9 +73,6 @@ RUN git clone --depth 1 https://github.com/swdee/lpd-yolov8.git /opt/lpd-yolov8
cp /opt/rknn_model_zoo/examples/yolov8/python/convert.py /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py && \
sed -i "s|^DATASET_PATH *= *['\"].*['\"]|DATASET_PATH = '/opt/lpd-yolov8/subset.txt'|" /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py
# download other onnx models
RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build
# Upgrade pip to the latest version
RUN pip install --upgrade pip
@@ -92,6 +89,15 @@ RUN pip install --no-cache-dir \
pyyaml \
"tensorflow<=2.16.0rc0"
# download other onnx models
RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build && \
git -C /opt/go-rknnlite-build fetch --depth 1 origin ce8b5ce1dc53b1c38324e7506374731ad21070c8 && \
git -C /opt/go-rknnlite-build checkout FETCH_HEAD
# copy our modified mobilenet.py script into the rknn_model_zoo directory
RUN cp /opt/go-rknnlite-build/mobilenet-batch/mobilenet-rknn.py /opt/rknn_model_zoo/examples/mobilenet/python/mobilenet-rknn-batch.py
# By default do nothing
CMD ["bash"]

View File

@@ -30,6 +30,8 @@ MODELS=(
"mobilenet_v1 rknn_convert /opt/models/mobilenet_v1/model_config.yml '' '' mobilenet_v1"
"yolov8 convert-lpd.py /opt/lpd-yolov8/lpd-yolov8n.onnx i8 '' lpd-yolov8n"
"yolov8 convert.py /opt/go-rknnlite-build/yolonas-s.onnx i8 '' yolonas-s"
"mobilenet mobilenet-rknn-batch.py ../model/mobilenetv2-12.onnx i8 --model mobilenetv2-batch8"
"osnet-market1501 build|onnx_to_rknn.py osnet_x1_0_market_256x128.onnx i8 '' osnet-market1501-batch8"
)
# compile all entries (or just filter) for one platform
@@ -75,6 +77,7 @@ compile_for_platform() {
fi
echo "-> building $outprefix for $platform"
local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn"
if [[ "$script" == "rknn_convert" ]]; then
# mobilenet_v1 special: use the CLI and then rename
@@ -83,13 +86,23 @@ compile_for_platform() {
-i "$model" \
-o "/opt/rkmodels/$platform/"
mv "/opt/rkmodels/$platform/${outprefix}.rknn" \
"/opt/rkmodels/$platform/${outprefix}-${platform}.rknn"
"$out"
continue
fi
# build the go-rknnlite-build models
if [[ "$script" == build\|* ]]; then
# strip everything up to (and including) the first pipe to get script name
scriptName="${script#*|}"
# go into the go-rknnlite-build tree
pushd "/opt/go-rknnlite-build/${subdir}" >/dev/null
python "$scriptName" "$model" "$platform" "$dtype" "$out"
popd >/dev/null
continue
fi
# the old examples
pushd "/opt/rknn_model_zoo/examples/${subdir}/python/" >/dev/null
local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn"
if [[ "$subdir" == "mobilenet" ]]; then
python "$script" $extra "$model" \

View File

@@ -25,6 +25,10 @@ type BYTETracker struct {
lostStracks []*STrack
// List of removed objects
removedStracks []*STrack
// reid supported tracking
reid *reID
// useReid is a flag to indicate if ReID supported tracking is to be used
useReid bool
}
// NewBYTETracker initializes and returns a new BYTETracker
@@ -62,6 +66,10 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
strack := NewSTrack(NewRect(object.Rect.X(), object.Rect.Y(), object.Rect.Width(), object.Rect.Height()),
object.Prob, object.ID, object.Label)
if bt.useReid {
strack.WithFeature(object.Feature, 0.9, 30)
}
if object.Prob >= bt.trackThresh {
detStracks = append(detStracks, strack)
} else {
@@ -87,11 +95,18 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
strack.Predict()
}
// Step 2: First association, with IoU
// Step 2: First association, using IoU or feature distance matching
var currentTrackedStracks, remainTrackedStracks, remainDetStracks, refindStracks []*STrack
var costMatrix [][]float32
if bt.useReid {
costMatrix = bt.calcFeatureDistance(strackPool, detStracks)
} else {
costMatrix = bt.calcIouDistance(strackPool, detStracks)
}
matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err := bt.linearAssignment(
bt.calcIouDistance(strackPool, detStracks),
costMatrix,
len(strackPool), len(detStracks), bt.matchThresh,
)
@@ -126,7 +141,8 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
}
}
// Step 3: Second association, using low score dets
// Step 3: IoU fallback matching for unmatched tracks,
// using low score IOU detections
var currentLostStracks []*STrack
matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err = bt.linearAssignment(
@@ -162,6 +178,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
}
// Step 4: Init new stracks
// Match non-active to unmatched remainingDetStracks (high confidence only)
var currentRemovedStracks []*STrack
matchesIdx, unmatchUnconfirmedIdx, unmatchDetectionIdx, err := bt.linearAssignment(
@@ -197,7 +214,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
currentTrackedStracks = append(currentTrackedStracks, track)
}
// Step 5: Update state
// Step 5: Update state - Time-based removal of old lost tracks
for _, lostStrack := range bt.lostStracks {
if bt.frameID-lostStrack.GetFrameID() > bt.maxTimeLost {
lostStrack.MarkAsRemoved()
@@ -508,3 +525,21 @@ func (bt *BYTETracker) execLapjv(cost [][]float32, extendCost bool,
return rowsol, colsol, opt, nil
}
// calcFeatureDistance calculates the distance between two embedded features
// of the specified STracks
func (bt *BYTETracker) calcFeatureDistance(tracks, detections []*STrack) [][]float32 {
cost := make([][]float32, len(tracks))
for i, tr := range tracks {
cost[i] = make([]float32, len(detections))
for j, det := range detections {
cost[i][j] = tr.BestMatchDistance(det.feature)
}
}
return cost
}

View File

@@ -11,6 +11,8 @@ type Object struct {
// ID is a unique ID to give this object which can be used to match
// the input detection object and tracked object
ID int64
// Feature is a ReID embedding feature
Feature []float32
}
// NewObject is a constructor function for the Object struct

246
tracker/reid.go Normal file
View File

@@ -0,0 +1,246 @@
package tracker
import (
"fmt"
"github.com/swdee/go-rknnlite"
"github.com/swdee/go-rknnlite/postprocess/reid"
"gocv.io/x/gocv"
"image"
"sync"
)
// DistanceMethod defines ReID distance calculation methods
type DistanceMethod int
const (
Euclidean DistanceMethod = 1
Cosine DistanceMethod = 2
)
// reID struct holds all ReIdentification processing features
type reID struct {
// pool is the rknnlike runtime pool to run inference on
pool *rknnlite.Pool
// dist is the distance method to apply to calculations to determine similarity
dist DistanceMethod
// threshold is the distance cutoff to determine similar or different objects
threshold float32
// batchSize store model input tensor batch size
batchSize int
width int
height int
channels int
// batchPools holds a pool of batches
batchPool *rknnlite.BatchPool
// scaleSize is the size of the input tensor dimensions to scale the object too
scaleSize image.Point
}
// UseReID sets up Re-Identification processing on the BYTETracker instance
func (bt *BYTETracker) UseReID(pool *rknnlite.Pool, dist DistanceMethod,
threshold float32) {
// query runtime and get tensor dimensions
rt := pool.Get()
batchSize := int(rt.InputAttrs()[0].Dims[0])
width := int(rt.InputAttrs()[0].Dims[1])
height := int(rt.InputAttrs()[0].Dims[2])
channels := int(rt.InputAttrs()[0].Dims[3])
bt.reid = &reID{
pool: pool,
dist: dist,
threshold: threshold,
batchSize: batchSize,
width: width,
height: height,
channels: channels,
scaleSize: image.Pt(width, height),
batchPool: rknnlite.NewBatchPool(pool.Size(), rt),
}
pool.Return(rt)
bt.useReid = true
}
// UpdateWithFrame updates the tracker with new detections and passes the
// image frame so ReID inference can be conducted
func (bt *BYTETracker) UpdateWithFrame(objects []Object, frame gocv.Mat) ([]*STrack, error) {
// check if ReID is enabled and get embedding features for all objects
if bt.useReid {
bufFrame := frame.Clone()
defer bufFrame.Close()
features, err := bt.reid.processObjects(objects, bufFrame)
if err != nil {
return nil, fmt.Errorf("failed to process objects: %w", err)
}
for i := range objects {
objects[i].Feature = features[i]
}
}
// run track update
tracks, err := bt.Update(objects)
if err != nil {
return nil, fmt.Errorf("error updating objects: %w", err)
}
return tracks, nil
}
// Close frees memory from reid instance
func (r *reID) Close() {
r.batchPool.Close()
}
// processObjects takes the detected objects and runs inference on them to get
// their embedded feature fingerprint. Function should be called from a
// Goroutine.
func (r *reID) processObjects(objects []Object, frame gocv.Mat) ([][]float32, error) {
var wg sync.WaitGroup
total := len(objects)
// collect per objects feature embeddings
allEmbeddings := make([][]float32, total)
errCh := make(chan error, (total+r.batchSize-1)/r.batchSize)
for offset := 0; offset < total; offset += r.batchSize {
end := offset + r.batchSize
if end > total {
end = total
}
batchObjs := objects[offset:end]
// capture range variables for closure
capOffset := offset
capCnt := end - offset
wg.Add(1)
batch := r.batchPool.Get()
rt := r.pool.Get()
go func(rt *rknnlite.Runtime, batch *rknnlite.Batch, bobjs []Object, off, cnt int) {
defer wg.Done()
fps, err := r.processBatch(rt, batch, bobjs, frame)
r.pool.Return(rt)
r.batchPool.Return(batch)
if err != nil {
errCh <- err
return
}
// copy this batchs fingerprints into correct offset place for
// all fingerprint results
for i := 0; i < cnt; i++ {
allEmbeddings[off+i] = fps[i]
}
errCh <- nil
}(rt, batch, batchObjs, capOffset, capCnt)
}
wg.Wait()
close(errCh)
// if any error, just bail
for e := range errCh {
if e != nil {
return nil, fmt.Errorf("ReID error: %w", e)
}
}
return allEmbeddings, nil
}
// processBatch adds the objects to a batch and runs inference on them
func (r *reID) processBatch(rt *rknnlite.Runtime, batch *rknnlite.Batch,
bobjs []Object, frame gocv.Mat) ([][]float32, error) {
height := frame.Rows()
width := frame.Cols()
for _, obj := range bobjs {
// clamp and get bounding box coordinates
x1 := clamp(int(obj.Rect.TLX()), 0, width)
y1 := clamp(int(obj.Rect.TLY()), 0, height)
x2 := clamp(int(obj.Rect.BRX()), 0, width)
y2 := clamp(int(obj.Rect.BRY()), 0, height)
objRect := image.Rect(x1, y1, x2, y2)
// get the objects region of interest from source Mat
objRoi := frame.Region(objRect)
objImg := gocv.NewMat()
// resize to input tensor size
gocv.Resize(objRoi, &objImg, r.scaleSize, 0, 0, gocv.InterpolationArea)
objRoi.Close()
err := batch.Add(objImg)
objImg.Close()
if err != nil {
return nil, fmt.Errorf("error adding image to batch")
}
}
// run inference on the batch
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
if err != nil {
return nil, fmt.Errorf("inference failed: %v", err)
}
defer outputs.Free()
// unpack per object results
fingerprints := make([][]float32, len(bobjs))
for idx := 0; idx < len(bobjs); idx++ {
output, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
if err != nil {
return nil, fmt.Errorf("error getting output %d: %v", idx, err)
}
// get object fingerprint
fingerprints[idx] = reid.DequantizeAndL2Normalize(
output,
outputs.OutputAttributes().Scales[0],
outputs.OutputAttributes().ZPs[0],
)
}
return fingerprints, nil
}
// clamp restricts the value x to be within the range min and max
func clamp(val, min, max int) int {
if val > min {
if val < max {
return val // casting the float to int after the comparison
}
return max
}
return min
}

View File

@@ -2,6 +2,7 @@ package tracker
import (
"fmt"
"github.com/swdee/go-rknnlite/postprocess/reid"
"gonum.org/v1/gonum/mat"
)
@@ -47,6 +48,18 @@ type STrack struct {
detectionID int64
// label is the object label/class from yolo inference
label int
// feature embedding used for ReID
feature []float32
// smoothFeature an EMA smoothed feature embedding used for ReID
smoothFeature []float32
// featureQueue is a history of features
featureQueue [][]float32
// maxQueueSize is the featureQueue maximum size, eg: 30
maxQueueSize int
// alpha value use in EMA smoothing calculation
alpha float32
// hasFeature is a flag to indicate if WithFeature() has been set
hasFeature bool
}
// NewSTrack creates a new STrack
@@ -68,6 +81,15 @@ func NewSTrack(rect Rect, score float32, detectionID int64, label int) *STrack {
}
}
// WithFeature adds an objects embedded feature from ReID inference to the STrack
func (s *STrack) WithFeature(feature []float32, alpha float32, qsize int) {
s.hasFeature = true
s.alpha = alpha
s.maxQueueSize = qsize
s.featureQueue = make([][]float32, 0, qsize)
s.UpdateFeatures(feature)
}
// GetRect returns the bounding box of the tracked object
func (s *STrack) GetRect() *Rect {
return &s.rect
@@ -155,6 +177,8 @@ func (s *STrack) ReActivate(newTrack *STrack, frameID, newTrackID int) {
s.frameID = frameID
s.trackletLen = 0
s.UpdateFeatures(newTrack.feature)
}
// Predict predicts the next state of the track
@@ -185,6 +209,8 @@ func (s *STrack) Update(newTrack *STrack, frameID int) error {
s.frameID = frameID
s.trackletLen++
s.UpdateFeatures(newTrack.feature)
return nil
}
@@ -205,3 +231,58 @@ func (s *STrack) updateRect() {
s.rect.SetX(s.mean[0] - s.rect.Width()/2)
s.rect.SetY(s.mean[1] - s.rect.Height()/2)
}
// UpdateFeatures updates an STracks ReID embedded features
func (s *STrack) UpdateFeatures(feat []float32) {
if !s.hasFeature {
return
}
normFeat := reid.NormalizeVec(feat)
s.feature = normFeat
if s.smoothFeature == nil {
s.smoothFeature = make([]float32, len(normFeat))
copy(s.smoothFeature, normFeat)
} else {
for i := range normFeat {
s.smoothFeature[i] = s.alpha*s.smoothFeature[i] + (1-s.alpha)*normFeat[i]
}
s.smoothFeature = reid.NormalizeVec(s.smoothFeature)
}
// Enqueue the feature
s.featureQueue = append(s.featureQueue, normFeat)
if len(s.featureQueue) > s.maxQueueSize {
s.featureQueue = s.featureQueue[1:]
}
}
// BestMatchDistance compares a new detection against all stored past features
func (s *STrack) BestMatchDistance(detFeat []float32) float32 {
if !s.hasFeature {
// feature not set so return max distance
return 1.0
}
if len(s.featureQueue) == 0 {
return 1.0 // max distance
}
detNorm := reid.NormalizeVec(detFeat)
best := float32(1.0)
for _, f := range s.featureQueue {
d := reid.EuclideanDistance(f, detNorm)
if d < best {
best = d
}
}
return best
}