mirror of
https://github.com/swdee/go-rknnlite.git
synced 2025-12-24 10:30:56 +08:00
Merge pull request #43 from swdee/reid
Re-Identification and Batch processing
This commit is contained in:
21
README.md
21
README.md
@@ -75,6 +75,7 @@ See the [example](example) directory.
|
||||
* Image Classification
|
||||
* [MobileNet Demo](example/mobilenet)
|
||||
* [Pooled Runtime Usage](example/pool)
|
||||
* [Batch Input Usage](example/batch)
|
||||
* Object Detection
|
||||
* [YOLOv5 Demo](example/yolov5)
|
||||
* [YOLOv8 Demo](example/yolov8)
|
||||
@@ -97,6 +98,8 @@ See the [example](example) directory.
|
||||
* [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text.
|
||||
* [PPOCR Recognise](example/ppocr#ppocr-recognise) - Takes an area of text and performs OCR on it.
|
||||
* [PPOCR System](example/ppocr#ppocr-system) - Combines both Detect and Recognise.
|
||||
* Tracking
|
||||
* [Re-Identification Demo](example/reid) - Re-Identify (ReID) similar objects for tracking, uses batch processing.
|
||||
* Streaming
|
||||
* [HTTP Stream with ByteTrack Tracking](example/stream) - Demo that streams a video over HTTP with YOLO object detection and ByteTrack object tracking.
|
||||
* Slicing Aided Hyper Inference
|
||||
@@ -164,6 +167,24 @@ If you use `rknnlite.NewRuntimeByPlatform()` instead this will be automatically
|
||||
set for you.
|
||||
|
||||
|
||||
## Runtime Inference
|
||||
|
||||
Once a Runtime has been created inference is performed by passing the input
|
||||
tensors.
|
||||
|
||||
```
|
||||
rt.Inference([]gocv.Mat{})
|
||||
```
|
||||
|
||||
The `Inference()` function takes a slice of gocv.Mat's where the number of
|
||||
elements in the slice corresponds to the total number of input tensors the
|
||||
Model has. Typically most models only have a single input tensor so only a single
|
||||
gocv.Mat would be passed here.
|
||||
|
||||
If you want to pass multiple images in a single `Inference()` call, then you need
|
||||
to use [Batching](example/batch).
|
||||
|
||||
|
||||
## CPU Affinity
|
||||
|
||||
The performance of the NPU is effected by which CPU cores your program runs on, so
|
||||
|
||||
188
batch.go
Normal file
188
batch.go
Normal file
@@ -0,0 +1,188 @@
|
||||
package rknnlite
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gocv.io/x/gocv"
|
||||
)
|
||||
|
||||
// Batch defines a struct used for concatenating a batch of gocv.Mat's
|
||||
// together into a single gocv.Mat for use with image batching on
|
||||
// a Model
|
||||
type Batch struct {
|
||||
mat gocv.Mat
|
||||
// size of the batch
|
||||
size int
|
||||
// width is the input tensor size width
|
||||
width int
|
||||
// height is the input tensor size height
|
||||
height int
|
||||
// channels is the input tensor number of channels
|
||||
channels int
|
||||
// inputTypeFloat32 sets the runtime.inputTypeFloat32 value
|
||||
inputTypeFloat32 bool
|
||||
// matType is the Mat type images must be passed as
|
||||
matType gocv.MatType
|
||||
// matCnt is a counter for how many Mats have been added with Add()
|
||||
matCnt int
|
||||
// imgSize stores an images size made up from its elements
|
||||
imgSize int
|
||||
}
|
||||
|
||||
// NewBatch creates a batch of concatenated Mats for the given input tensor
|
||||
// and batch size
|
||||
func NewBatch(batchSize, height, width, channels int, inputTypeFloat32 bool) *Batch {
|
||||
|
||||
// Choose output Mat type
|
||||
var matType gocv.MatType
|
||||
|
||||
if inputTypeFloat32 {
|
||||
matType = gocv.MatTypeCV32F
|
||||
} else {
|
||||
matType = gocv.MatTypeCV8U
|
||||
}
|
||||
|
||||
shape := []int{batchSize, height, width, channels}
|
||||
|
||||
return &Batch{
|
||||
size: batchSize,
|
||||
height: height,
|
||||
width: width,
|
||||
channels: channels,
|
||||
mat: gocv.NewMatWithSizes(shape, matType),
|
||||
inputTypeFloat32: inputTypeFloat32,
|
||||
matType: matType,
|
||||
matCnt: 0,
|
||||
imgSize: height * width * channels,
|
||||
}
|
||||
}
|
||||
|
||||
// Add a Mat to the batch
|
||||
func (b *Batch) Add(img gocv.Mat) error {
|
||||
|
||||
// check if batch is full
|
||||
if b.matCnt >= b.size {
|
||||
return fmt.Errorf("batch full")
|
||||
}
|
||||
|
||||
res := b.addAt(b.matCnt, img)
|
||||
|
||||
if res != nil {
|
||||
return res
|
||||
}
|
||||
|
||||
// increment image counter
|
||||
b.matCnt++
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddAt adds a Mat to the batch at the specific index location
|
||||
func (b *Batch) AddAt(idx int, img gocv.Mat) error {
|
||||
|
||||
if idx < 0 || idx >= b.size {
|
||||
return fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
|
||||
}
|
||||
|
||||
return b.addAt(idx, img)
|
||||
}
|
||||
|
||||
// addAt adds a Mat to the specified index location
|
||||
func (b *Batch) addAt(idx int, img gocv.Mat) error {
|
||||
|
||||
// validate mat dimensions
|
||||
if img.Rows() != b.height || img.Cols() != b.width ||
|
||||
img.Channels() != b.channels {
|
||||
return fmt.Errorf("image does not match batch shape")
|
||||
}
|
||||
|
||||
if !img.IsContinuous() {
|
||||
img = img.Clone()
|
||||
}
|
||||
|
||||
if b.inputTypeFloat32 {
|
||||
// pointer of the batch mat
|
||||
dstAll, err := b.mat.DataPtrFloat32()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error accessing float32 batch memory: %w", err)
|
||||
}
|
||||
|
||||
src, err := img.DataPtrFloat32()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting float32 data from image: %w", err)
|
||||
}
|
||||
|
||||
offset := idx * b.imgSize
|
||||
copy(dstAll[offset:], src)
|
||||
|
||||
} else {
|
||||
// pointer of the batch mat
|
||||
dstAll, err := b.mat.DataPtrUint8()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error accessing uint8 batch memory: %w", err)
|
||||
}
|
||||
|
||||
src, err := img.DataPtrUint8()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting uint8 data from image: %w", err)
|
||||
}
|
||||
|
||||
offset := idx * b.imgSize
|
||||
copy(dstAll[offset:], src)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetOutputInt returns the tensor output for the specified image number
|
||||
// as an int8 output. idx starts counting from 1 to (batchsize-1)
|
||||
func (b *Batch) GetOutputInt(idx int, outputs Output, size int) ([]int8, error) {
|
||||
|
||||
if idx < 0 || idx >= b.size {
|
||||
return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
|
||||
}
|
||||
|
||||
offset := idx * size
|
||||
|
||||
if offset+size > int(outputs.Size) {
|
||||
return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size)
|
||||
}
|
||||
|
||||
return outputs.BufInt[offset : offset+size], nil
|
||||
}
|
||||
|
||||
// GetOutputF32 returns the tensor output for the specified image number
|
||||
// as an float32 output. idx starts counting from 0 to (batchsize-1)
|
||||
func (b *Batch) GetOutputF32(idx int, outputs Output, size int) ([]float32, error) {
|
||||
|
||||
if idx < 0 || idx >= b.size {
|
||||
return nil, fmt.Errorf("index %d out of range [0-%d)", idx, b.size)
|
||||
}
|
||||
|
||||
offset := idx * size
|
||||
|
||||
if offset+size > int(outputs.Size) {
|
||||
return nil, fmt.Errorf("offset %d out of range [%d,%d)", offset, outputs.Size, offset+size)
|
||||
}
|
||||
|
||||
return outputs.BufFloat[offset : offset+size], nil
|
||||
}
|
||||
|
||||
// Mat returns the concatenated mat
|
||||
func (b *Batch) Mat() gocv.Mat {
|
||||
return b.mat
|
||||
}
|
||||
|
||||
// Clear the batch so it can be reused again
|
||||
func (b *Batch) Clear() {
|
||||
// just reset the counter, we don't need to clear the underlying b.mat
|
||||
// as it will be overwritten with Add() is called with new images
|
||||
b.matCnt = 0
|
||||
}
|
||||
|
||||
// Close the batch and free allocated memory
|
||||
func (b *Batch) Close() error {
|
||||
return b.mat.Close()
|
||||
}
|
||||
329
batch_test.go
Normal file
329
batch_test.go
Normal file
@@ -0,0 +1,329 @@
|
||||
package rknnlite
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"gocv.io/x/gocv"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var modelFiles = flag.String("m", "osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn",
|
||||
"RKNN compiled model files in format <name>-batch{N1,N2,...,Nk}.rknn")
|
||||
var rkPlatform = flag.String("p", "rk3588",
|
||||
"Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]")
|
||||
|
||||
// ExpandModelPattern takes a pattern like
|
||||
//
|
||||
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch{1,4,8,16}.rknn"
|
||||
//
|
||||
// and returns:
|
||||
//
|
||||
// []string{
|
||||
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch1.rknn",
|
||||
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch4.rknn",
|
||||
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch8.rknn",
|
||||
// "/some/dir/osnet_x1_0_market_256x128-rk3588-batch16.rknn",
|
||||
// }
|
||||
func expandModelPattern(pattern string) ([]modelBatches, error) {
|
||||
|
||||
// split off the directory and file
|
||||
dir, file := filepath.Split(pattern)
|
||||
|
||||
// match exactly "<prefix>-batch{n1,n2,...}.rknn"
|
||||
re := regexp.MustCompile(`^(.+)-batch\{([\d,]+)\}\.rknn$`)
|
||||
m := re.FindStringSubmatch(file)
|
||||
|
||||
if m == nil {
|
||||
return nil, errors.New("invalid pattern: must be name-batch{n1,n2,...}.rknn")
|
||||
}
|
||||
|
||||
prefix := m[1] // e.g. "osnet_x1_0_market_256x128-rk3588"
|
||||
numsCSV := m[2] // e.g. "1,4,8,16"
|
||||
nums := strings.Split(numsCSV, ",")
|
||||
out := make([]modelBatches, 0, len(nums))
|
||||
|
||||
for _, strNum := range nums {
|
||||
|
||||
num, err := strconv.Atoi(strNum)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid batch size %q: %w", strNum, err)
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s-batch%d.rknn", prefix, num)
|
||||
|
||||
out = append(out, modelBatches{
|
||||
batchSize: num,
|
||||
modelFile: filepath.Join(dir, name),
|
||||
})
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
type modelBatches struct {
|
||||
batchSize int
|
||||
modelFile string
|
||||
}
|
||||
|
||||
// BenchmarkBatchSize runs benchmarks against multiple models to work out per
|
||||
// image inference time.
|
||||
func BenchmarkBatchSize(b *testing.B) {
|
||||
|
||||
flag.Parse()
|
||||
|
||||
// from the modelFiles argument create a table of model files and corresponding
|
||||
// batch sizes
|
||||
cases, err := expandModelPattern(*modelFiles)
|
||||
|
||||
if err != nil {
|
||||
b.Fatalf("Invalid modelFile syntax: %v", err)
|
||||
}
|
||||
|
||||
const (
|
||||
height = 256
|
||||
width = 128
|
||||
channels = 3
|
||||
)
|
||||
|
||||
for _, tc := range cases {
|
||||
tc := tc // capture
|
||||
|
||||
b.Run(fmt.Sprintf("Batch%02d", tc.batchSize), func(b *testing.B) {
|
||||
|
||||
// load the RKNN model for this batch size
|
||||
err := SetCPUAffinityByPlatform(*rkPlatform, FastCores)
|
||||
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to set CPU Affinity: %v", err)
|
||||
}
|
||||
|
||||
// check if user specified model file or if default is being used. if default
|
||||
// then pick the default platform model to use.
|
||||
modelFile := tc.modelFile
|
||||
|
||||
if *rkPlatform != "rk3588" {
|
||||
modelFile = strings.ReplaceAll(modelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
// create rknn runtime instance
|
||||
rt, err := NewRuntimeByPlatform(*rkPlatform, modelFile)
|
||||
|
||||
if err != nil {
|
||||
b.Fatalf("Error initializing RKNN runtime: %v", err)
|
||||
}
|
||||
|
||||
defer rt.Close()
|
||||
|
||||
// set runtime to leave output tensors as int8
|
||||
rt.SetWantFloat(false)
|
||||
|
||||
// prepare zero images
|
||||
imgs := make([]gocv.Mat, tc.batchSize)
|
||||
|
||||
for i := range imgs {
|
||||
m := gocv.Zeros(height, width, gocv.MatTypeCV8UC3)
|
||||
defer m.Close()
|
||||
imgs[i] = m
|
||||
}
|
||||
|
||||
// pre-allocate the batch container
|
||||
batch := NewBatch(tc.batchSize, height, width, channels, rt.inputTypeFloat32)
|
||||
defer batch.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
var totalInf time.Duration
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
batch.Clear()
|
||||
start := time.Now()
|
||||
|
||||
for _, img := range imgs {
|
||||
if err := batch.Add(img); err != nil {
|
||||
b.Fatalf("Add() error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := rt.Inference([]gocv.Mat{batch.Mat()}); err != nil {
|
||||
b.Fatalf("Inference() error: %v", err)
|
||||
}
|
||||
|
||||
totalInf += time.Since(start)
|
||||
}
|
||||
|
||||
b.StopTimer()
|
||||
|
||||
// milliseconds per batch
|
||||
msBatch := float64(totalInf.Nanoseconds()) / 1e6 / float64(b.N)
|
||||
b.ReportMetric(msBatch, "ms/batch")
|
||||
|
||||
// milliseconds per image
|
||||
msImg := msBatch / float64(tc.batchSize)
|
||||
b.ReportMetric(msImg, "ms/img")
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBatchAddAndOverflow(t *testing.T) {
|
||||
|
||||
r := &Runtime{inputTypeFloat32: false}
|
||||
|
||||
batch := NewBatch(2, 2, 3, 1, r.inputTypeFloat32)
|
||||
defer batch.Close()
|
||||
|
||||
// create Mats with known data
|
||||
m1 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
|
||||
defer m1.Close()
|
||||
|
||||
buf1, _ := m1.DataPtrUint8()
|
||||
|
||||
for i := range buf1 {
|
||||
buf1[i] = uint8(i + 1) // 1,2,3...6
|
||||
}
|
||||
|
||||
m2 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
|
||||
defer m2.Close()
|
||||
|
||||
buf2, _ := m2.DataPtrUint8()
|
||||
|
||||
for i := range buf2 {
|
||||
buf2[i] = uint8((i + 1) * 10) // 10,20,...60
|
||||
}
|
||||
|
||||
// Add two images
|
||||
if err := batch.Add(m1); err != nil {
|
||||
t.Fatalf("Add(m1) failed: %v", err)
|
||||
}
|
||||
|
||||
if err := batch.Add(m2); err != nil {
|
||||
t.Fatalf("Add(m2) failed: %v", err)
|
||||
}
|
||||
|
||||
// Underlying batch mat should contain both
|
||||
bMat := batch.Mat()
|
||||
allData, err := bMat.DataPtrUint8()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("DataPtrUint8 on batch failed: %v", err)
|
||||
}
|
||||
|
||||
// first 6 from buf1, next 6 from buf2
|
||||
for i := 0; i < 6; i++ {
|
||||
if allData[i] != buf1[i] {
|
||||
t.Errorf("element %d = %d; want %d from img1", i, allData[i], buf1[i])
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 6; i++ {
|
||||
if allData[6+i] != buf2[i] {
|
||||
t.Errorf("element %d = %d; want %d from img2", 6+i, allData[6+i], buf2[i])
|
||||
}
|
||||
}
|
||||
|
||||
// third Add should overflow
|
||||
m3 := gocv.NewMatWithSize(2, 3, gocv.MatTypeCV8U)
|
||||
err3 := batch.Add(m3)
|
||||
|
||||
if err3 == nil {
|
||||
t.Fatal("expected overflow error on third Add, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBatchAddAtAndClear(t *testing.T) {
|
||||
|
||||
r := &Runtime{inputTypeFloat32: false}
|
||||
|
||||
batch := NewBatch(3, 2, 2, 1, r.inputTypeFloat32)
|
||||
defer batch.Close()
|
||||
|
||||
m := gocv.NewMatWithSize(2, 2, gocv.MatTypeCV8U)
|
||||
defer m.Close()
|
||||
|
||||
dat, _ := m.DataPtrUint8()
|
||||
|
||||
for i := range dat {
|
||||
dat[i] = uint8(i + 5)
|
||||
}
|
||||
|
||||
// AddAt index 1
|
||||
if err := batch.AddAt(1, m); err != nil {
|
||||
t.Fatalf("AddAt failed: %v", err)
|
||||
}
|
||||
|
||||
// matCnt should still be zero
|
||||
if batch.matCnt != 0 {
|
||||
t.Errorf("matCnt = %d; want 0 after AddAt", batch.matCnt)
|
||||
}
|
||||
|
||||
// Clear resets matCnt
|
||||
batch.Clear()
|
||||
|
||||
if batch.matCnt != 0 {
|
||||
t.Errorf("matCnt = %d; want 0 after Clear", batch.matCnt)
|
||||
}
|
||||
|
||||
// Add at invalid index
|
||||
err := batch.AddAt(5, m)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for AddAt out of range, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetOutputIntAndF32(t *testing.T) {
|
||||
|
||||
r := &Runtime{inputTypeFloat32: false}
|
||||
|
||||
batch := NewBatch(2, 2, 2, 1, r.inputTypeFloat32)
|
||||
defer batch.Close()
|
||||
|
||||
// Test GetOutputInt bounds
|
||||
dOut := Output{BufInt: []int8{1, 2, 3, 4}, Size: 4}
|
||||
|
||||
if _, err := batch.GetOutputInt(-1, dOut, 2); err == nil {
|
||||
t.Error("expected error for GetOutputInt idx<0")
|
||||
}
|
||||
|
||||
if _, err := batch.GetOutputInt(2, dOut, 2); err == nil {
|
||||
t.Error("expected error for GetOutputInt idx>=size")
|
||||
}
|
||||
|
||||
// valid slice
|
||||
slice, err := batch.GetOutputInt(1, dOut, 2)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("GetOutputInt failed: %v", err)
|
||||
}
|
||||
|
||||
if len(slice) != 2 {
|
||||
t.Errorf("len(slice) = %d; want 2", len(slice))
|
||||
}
|
||||
|
||||
// Test GetOutputF32 bounds
|
||||
dOutF := Output{BufFloat: []float32{1, 2, 3, 4}, Size: 4}
|
||||
|
||||
if _, err := batch.GetOutputF32(-1, dOutF, 2); err == nil {
|
||||
t.Error("expected error for GetOutputF32 idx<0")
|
||||
}
|
||||
|
||||
if _, err := batch.GetOutputF32(2, dOutF, 2); err == nil {
|
||||
t.Error("expected error for GetOutputF32 idx>=size")
|
||||
}
|
||||
|
||||
sliceF, err := batch.GetOutputF32(0, dOutF, 2)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("GetOutputF32 failed: %v", err)
|
||||
}
|
||||
|
||||
if len(sliceF) != 2 {
|
||||
t.Errorf("len(sliceF) = %d; want 2", len(sliceF))
|
||||
}
|
||||
}
|
||||
75
batchpool.go
Normal file
75
batchpool.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package rknnlite
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// BatchPool is a pool of batches
|
||||
type BatchPool struct {
|
||||
// pool of batches
|
||||
batches chan *Batch
|
||||
// size of pool
|
||||
size int
|
||||
close sync.Once
|
||||
}
|
||||
|
||||
// NewBatchPool returns a pool of Batches
|
||||
func NewBatchPool(size int, rt *Runtime) *BatchPool {
|
||||
|
||||
p := &BatchPool{
|
||||
batches: make(chan *Batch, size),
|
||||
size: size,
|
||||
}
|
||||
|
||||
batchSize := int(rt.InputAttrs()[0].Dims[0])
|
||||
width := int(rt.InputAttrs()[0].Dims[1])
|
||||
height := int(rt.InputAttrs()[0].Dims[2])
|
||||
channels := int(rt.InputAttrs()[0].Dims[3])
|
||||
inputType := rt.GetInputTypeFloat32()
|
||||
|
||||
// create batch pool to be the same size as the runtime pool
|
||||
for i := 0; i < size; i++ {
|
||||
batch := NewBatch(
|
||||
batchSize,
|
||||
height,
|
||||
width,
|
||||
channels,
|
||||
inputType,
|
||||
)
|
||||
|
||||
// attach to pool
|
||||
p.Return(batch)
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// Gets a batch from the pool
|
||||
func (p *BatchPool) Get() *Batch {
|
||||
return <-p.batches
|
||||
}
|
||||
|
||||
// Return a batch to the pool
|
||||
func (p *BatchPool) Return(batch *Batch) {
|
||||
|
||||
batch.Clear()
|
||||
|
||||
select {
|
||||
case p.batches <- batch:
|
||||
default:
|
||||
// pool is full or closed
|
||||
}
|
||||
}
|
||||
|
||||
// Close the pool and all batches in it
|
||||
func (p *BatchPool) Close() {
|
||||
p.close.Do(func() {
|
||||
// close channel
|
||||
close(p.batches)
|
||||
|
||||
// close all runtimes
|
||||
for next := range p.batches {
|
||||
_ = next.Close()
|
||||
}
|
||||
})
|
||||
}
|
||||
230
example/batch/README.md
Normal file
230
example/batch/README.md
Normal file
@@ -0,0 +1,230 @@
|
||||
|
||||
# Batch Models
|
||||
|
||||
## Overview
|
||||
|
||||
Typically computer vision inference models have a single input tensor in
|
||||
the shape of `NHWC` such as `[1,224,224,3]`. The rknn-toolkit2 allows you to
|
||||
build the model with Batch tensor inputs by setting the `rknn_batch_size` parameter
|
||||
in the following python conversion script.
|
||||
|
||||
```
|
||||
rknn.build(do_quantization=do_quant, dataset=DATASET_PATH, rknn_batch_size=8)
|
||||
```
|
||||
|
||||
This results in a .rknn model with modified tensor input dimensions of `[8,224,244,3]`.
|
||||
|
||||
When taking input from a video source frame-by-frame, the use of batching to process
|
||||
frames has little use case, as your only dealing with a single frame to be
|
||||
processed as soon as possible. However batching can be useful if you have many
|
||||
images to process at a single point in time, some examples of this could be;
|
||||
* Running YOLO object detection on a frame, then passing all detected objects
|
||||
through a ReIdentification model in batches.
|
||||
* Some applications will buffer video frames and upon an external signal, it
|
||||
will then trigger the processing of those buffered frames as a batch.
|
||||
|
||||
|
||||
## Batch Sizing
|
||||
|
||||
The NPU's in the different platforms RK356x, RK3576, and RK3588 have different
|
||||
amounts of SRAM and NPU core numbers, so finding the optimal batch size for your
|
||||
Model is critical.
|
||||
|
||||
A benchmarking tool has been created to test different batch sizes of your own
|
||||
RKNN Models. Use your python conversion script to compile the ONNX model to RKNN
|
||||
with various `rknn_batch_size` values you would like to test. Name those RKNN
|
||||
Models using this format `<name>-batch{N1,N2,...,Nk}.rknn`. For example I wish
|
||||
to test batch sizes of 1, 4, 8, and 16 of an OSNet model and have created the
|
||||
following files and placed them in the directory `/tmp/models` on the host OS.
|
||||
```
|
||||
osnet-batch1.rknn
|
||||
osnet-batch4.rknn
|
||||
osnet-batch8.rknn
|
||||
osnet-batch16.rknn
|
||||
```
|
||||
|
||||
We can then pass all these Models to the benchmark using the `-m` argument in
|
||||
the format of `-m "/tmp/models/osnet-batch{1,4,8,16}"`.
|
||||
|
||||
To run the benchmark of your models on the rk3588 or replace with your
|
||||
Platform model.
|
||||
```
|
||||
# from project root directory
|
||||
|
||||
go test -bench=BenchmarkBatchSize -benchtime=10s \
|
||||
-args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}.rknn"
|
||||
```
|
||||
|
||||
Similarly using Docker we can mount the `/tmp/models` directory and run.
|
||||
```
|
||||
# from project root directory
|
||||
|
||||
docker run --rm \
|
||||
--device /dev/dri:/dev/dri \
|
||||
-v "$(pwd):/go/src/app" \
|
||||
-v "$(pwd)/example/data:/go/src/data" \
|
||||
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
|
||||
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
|
||||
-v "/tmp/models/:/tmp/models/" \
|
||||
-w /go/src/app \
|
||||
swdee/go-rknnlite:latest \
|
||||
go test -bench=BenchmarkBatchSize -benchtime=10s \
|
||||
-args -p rk3588 -m "/tmp/models/osnet-batch{1,4,8,16}"
|
||||
```
|
||||
|
||||
Running the above benchmark command outputs the following results.
|
||||
|
||||
#### rk3588
|
||||
|
||||
```
|
||||
BenchmarkBatchSize/Batch01-8 1897 8806025 ns/op 8.806 ms/batch 8.806 ms/img
|
||||
BenchmarkBatchSize/Batch04-8 885 21555109 ns/op 21.55 ms/batch 5.389 ms/img
|
||||
BenchmarkBatchSize/Batch08-8 534 22335645 ns/op 22.34 ms/batch 2.792 ms/img
|
||||
BenchmarkBatchSize/Batch16-8 303 40253162 ns/op 40.25 ms/batch 2.516 ms/img
|
||||
```
|
||||
|
||||
#### rk3576
|
||||
|
||||
```
|
||||
BenchmarkBatchSize/Batch01-8 1312 8987117 ns/op 8.985 ms/batch 8.985 ms/img
|
||||
BenchmarkBatchSize/Batch04-8 640 18836090 ns/op 18.83 ms/batch 4.709 ms/img
|
||||
BenchmarkBatchSize/Batch08-8 385 31702649 ns/op 31.70 ms/batch 3.963 ms/img
|
||||
BenchmarkBatchSize/Batch16-8 194 63801596 ns/op 63.80 ms/batch 3.988 ms/img
|
||||
```
|
||||
|
||||
#### rk3566
|
||||
|
||||
```
|
||||
BenchmarkBatchSize/Batch01-4 661 18658568 ns/op 18.66 ms/batch 18.66 ms/img
|
||||
BenchmarkBatchSize/Batch04-4 158 74716574 ns/op 74.71 ms/batch 18.68 ms/img
|
||||
BenchmarkBatchSize/Batch08-4 70 155374027 ns/op 155.4 ms/batch 19.42 ms/img
|
||||
BenchmarkBatchSize/Batch16-4 37 294969497 ns/op 295.0 ms/batch 18.44 ms/img
|
||||
```
|
||||
|
||||
|
||||
### Interpreting Benchmark Results
|
||||
|
||||
|
||||
The `ms/batch` metric represents the number of milliseconds it took for the
|
||||
whole batch inference to run and `ms/img` represents the average number of
|
||||
milliseconds it took to run inference per image.
|
||||
|
||||
As can be seen in the rk3588 results the ideal batch size is 8 as it gives
|
||||
a low `2.792` ms/img inference time versus total batch inference time of
|
||||
`22.34ms`. The same applies to the rk3576.
|
||||
|
||||
The rk3566 has a single core NPU, the results show there is no benefit
|
||||
in running batching at all.
|
||||
|
||||
These results were for an OSNet Model, it's possible that different Models perform
|
||||
differently so you should run these benchmarks for your own application to
|
||||
optimize accordingly.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
An example batch program is provided that combines inferencing on a Pool of runtimes,
|
||||
make sure you have downloaded the data files first for the examples.
|
||||
You only need to do this once for all examples.
|
||||
|
||||
|
||||
```
|
||||
cd example/
|
||||
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
|
||||
```
|
||||
|
||||
|
||||
Run the batch example on rk3588 or replace with your Platform model.
|
||||
```
|
||||
cd example/batch
|
||||
go run batch.go -s 3 -p rk3588
|
||||
```
|
||||
|
||||
This will result in the output of:
|
||||
```
|
||||
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
|
||||
Model Input Number: 1, Ouput Number: 1
|
||||
Input tensors:
|
||||
index=0, name=input, n_dims=4, dims=[8, 224, 224, 3], n_elems=1204224, size=1204224, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
|
||||
Output tensors:
|
||||
index=0, name=output, n_dims=2, dims=[8, 1000, 0, 0], n_elems=8000, size=8000, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-55, scale=0.141923
|
||||
Running...
|
||||
File ../data/imagenet/n01514859_hen.JPEG, inference time 40ms
|
||||
File ../data/imagenet/n01518878_ostrich.JPEG, inference time 40ms
|
||||
File ../data/imagenet/n01530575_brambling.JPEG, inference time 40ms
|
||||
File ../data/imagenet/n01531178_goldfinch.JPEG, inference time 40ms
|
||||
...snip...
|
||||
File ../data/imagenet/n13054560_bolete.JPEG, inference time 8ms
|
||||
File ../data/imagenet/n13133613_ear.JPEG, inference time 8ms
|
||||
File ../data/imagenet/n15075141_toilet_tissue.JPEG, inference time 8ms
|
||||
Processed 1000 images in 2.098619346s, average inference per image is 2.10ms
|
||||
```
|
||||
|
||||
See the help for command line parameters.
|
||||
```
|
||||
$ go run batch.go -h
|
||||
|
||||
Usage of /tmp/go-build1506342544/b001/exe/batch:
|
||||
-d string
|
||||
A directory of images to run inference on (default "../data/imagenet/")
|
||||
-m string
|
||||
RKNN compiled model file (default "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn")
|
||||
-p string
|
||||
Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588")
|
||||
-q Run in quiet mode, don't display individual inference results
|
||||
-r int
|
||||
Repeat processing image directory the specified number of times, use this if you don't have enough images (default 1)
|
||||
-s int
|
||||
Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3 (default 1)
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Docker
|
||||
|
||||
To run the batch example using the prebuilt docker image, make sure the data files have been downloaded first,
|
||||
then run.
|
||||
```
|
||||
# from project root directory
|
||||
|
||||
docker run --rm \
|
||||
--device /dev/dri:/dev/dri \
|
||||
-v "$(pwd):/go/src/app" \
|
||||
-v "$(pwd)/example/data:/go/src/data" \
|
||||
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
|
||||
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
|
||||
-w /go/src/app \
|
||||
swdee/go-rknnlite:latest \
|
||||
go run ./example/batch/batch.go -p rk3588 -s 3
|
||||
```
|
||||
|
||||
|
||||
## API
|
||||
|
||||
A convenience function `rknnlite.NewBatch()` is provided to concatenate individual
|
||||
images into a single input tensor for the Model and then extract their results
|
||||
from the combined outputs.
|
||||
|
||||
```
|
||||
// create a new batch processor
|
||||
batch := rt.NewBatch(batchSize, height, width, channels)
|
||||
defer batch.Close()
|
||||
|
||||
|
||||
for idx, file := range files {
|
||||
|
||||
// add files to the batch at the given index
|
||||
batch.AddAt(idx, file)
|
||||
|
||||
// OR you can add images incrementally without specifying an index
|
||||
batch.Add(file)
|
||||
}
|
||||
|
||||
// pass the concatenated Mat to the runtime for inference
|
||||
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
|
||||
// then get a single image result by index
|
||||
output, err := batch.GetOutputInt(4, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
```
|
||||
|
||||
See the full example code for more details.
|
||||
222
example/batch/batch.go
Normal file
222
example/batch/batch.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"github.com/swdee/go-rknnlite"
|
||||
"gocv.io/x/gocv"
|
||||
"image"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
// model input tensor dimensions, these values will be set
|
||||
// when runtime queries the modelFile being loaded
|
||||
height, width, channels, batchSize int
|
||||
)
|
||||
|
||||
func main() {
|
||||
// disable logging timestamps
|
||||
log.SetFlags(0)
|
||||
|
||||
// read in cli flags
|
||||
modelFile := flag.String("m", "../data/models/rk3588/mobilenetv2-batch8-rk3588.rknn", "RKNN compiled model file")
|
||||
imgDir := flag.String("d", "../data/imagenet/", "A directory of images to run inference on")
|
||||
poolSize := flag.Int("s", 1, "Size of RKNN runtime pool, choose 1, 2, 3, or multiples of 3")
|
||||
repeat := flag.Int("r", 1, "Repeat processing image directory the specified number of times, use this if you don't have enough images")
|
||||
quiet := flag.Bool("q", false, "Run in quiet mode, don't display individual inference results")
|
||||
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
// set cpu affinity to run on specific CPU cores
|
||||
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Failed to set CPU Affinity: %v\n", err)
|
||||
}
|
||||
|
||||
// check dir exists
|
||||
info, err := os.Stat(*imgDir)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("No such image directory %s, error: %v\n", *imgDir, err)
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
log.Fatal("Image path is not a directory")
|
||||
}
|
||||
|
||||
// check if user specified model file or if default is being used. if default
|
||||
// then pick the default platform model to use.
|
||||
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
|
||||
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
// create new pool, we pass NPUCoreAuto as RKNN does not allow batch Models
|
||||
// to be pinned to specific NPU cores
|
||||
useCore := rknnlite.NPUCoreAuto
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(*rkPlatform), "rk356") {
|
||||
useCore = rknnlite.NPUSkipSetCore
|
||||
}
|
||||
|
||||
pool, err := rknnlite.NewPool(*poolSize, *modelFile,
|
||||
[]rknnlite.CoreMask{useCore})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating RKNN pool: %v\n", err)
|
||||
}
|
||||
|
||||
// set runtime to leave output tensors as int8
|
||||
pool.SetWantFloat(false)
|
||||
|
||||
// get a runtime and query the input tensor dimensions of the model
|
||||
rt := pool.Get()
|
||||
|
||||
// optional querying of model file tensors and SDK version for printing
|
||||
// to stdout. not necessary for production inference code
|
||||
err = rt.Query(os.Stdout)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error querying runtime: ", err)
|
||||
}
|
||||
|
||||
batchSize = int(rt.InputAttrs()[0].Dims[0])
|
||||
width = int(rt.InputAttrs()[0].Dims[1])
|
||||
height = int(rt.InputAttrs()[0].Dims[2])
|
||||
channels = int(rt.InputAttrs()[0].Dims[3])
|
||||
|
||||
pool.Return(rt)
|
||||
|
||||
// get list of all files in the directory
|
||||
entries, err := os.ReadDir(*imgDir)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error reading image directory: %v\n", err)
|
||||
}
|
||||
|
||||
var files []string
|
||||
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
files = append(files, filepath.Join(*imgDir, e.Name()))
|
||||
}
|
||||
|
||||
log.Println("Running...")
|
||||
|
||||
// waitgroup used to wait for all go-routines to complete before closing
|
||||
// the pool
|
||||
const batchSize = 8
|
||||
var wg sync.WaitGroup
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// repeat processing image set the specified number of times
|
||||
for i := 0; i < *repeat; i++ {
|
||||
// process image files in groups of batchSize
|
||||
for offset := 0; offset < len(files); offset += batchSize {
|
||||
|
||||
end := offset + batchSize
|
||||
|
||||
if end > len(files) {
|
||||
end = len(files)
|
||||
}
|
||||
|
||||
subset := files[offset:end]
|
||||
|
||||
// pool.Get() blocks if no runtimes are available in the pool
|
||||
rt := pool.Get()
|
||||
wg.Add(1)
|
||||
|
||||
go func(rt *rknnlite.Runtime, batchPaths []string) {
|
||||
defer wg.Done()
|
||||
processBatch(rt, batchPaths, *quiet)
|
||||
pool.Return(rt)
|
||||
}(rt, subset)
|
||||
}
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// calculate average inference
|
||||
numFiles := (*repeat * len(files))
|
||||
end := time.Since(start)
|
||||
avg := (end.Seconds() / float64(numFiles)) * 1000
|
||||
|
||||
log.Printf("Processed %d images in %s, average inference per image is %.2fms\n",
|
||||
numFiles, end.String(), avg)
|
||||
|
||||
pool.Close()
|
||||
}
|
||||
|
||||
func processBatch(rt *rknnlite.Runtime, paths []string, quiet bool) {
|
||||
|
||||
// create batch
|
||||
batch := rknnlite.NewBatch(batchSize, height, width, channels,
|
||||
rt.GetInputTypeFloat32())
|
||||
defer batch.Close()
|
||||
|
||||
// for each image path, load & preprocess, then Add to batch
|
||||
for idx, file := range paths {
|
||||
|
||||
img := gocv.IMRead(file, gocv.IMReadColor)
|
||||
|
||||
if img.Empty() {
|
||||
log.Printf("Error reading %s\n", file)
|
||||
continue
|
||||
}
|
||||
|
||||
defer img.Close()
|
||||
|
||||
// rgb + resize
|
||||
rgbImg := gocv.NewMat()
|
||||
gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB)
|
||||
defer rgbImg.Close()
|
||||
|
||||
cropImg := gocv.NewMat()
|
||||
gocv.Resize(rgbImg, &cropImg, image.Pt(width, height), 0, 0, gocv.InterpolationArea)
|
||||
defer cropImg.Close()
|
||||
|
||||
if err := batch.AddAt(idx, cropImg); err != nil {
|
||||
log.Printf("Batch.Add error: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// run inference on the entire batch at once
|
||||
start := time.Now()
|
||||
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
spent := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Inference error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
defer outputs.Free()
|
||||
|
||||
// unpack per image results
|
||||
for idx := 0; idx < len(paths); idx++ {
|
||||
|
||||
if quiet {
|
||||
continue
|
||||
}
|
||||
|
||||
// get int8 output tensor for image at idx
|
||||
_, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
log.Printf("GetOutputInt[%d] error: %v\n", idx, err)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Printf("File %s, inference time %dms\n", paths[idx], spent.Milliseconds())
|
||||
}
|
||||
}
|
||||
188
example/reid/README.md
Normal file
188
example/reid/README.md
Normal file
@@ -0,0 +1,188 @@
|
||||
|
||||
# Re-Identification (ReID)
|
||||
|
||||
## Overview
|
||||
|
||||
Object trackers like ByteTrack can be used to track visible objects frame‐to‐frame,
|
||||
but they rely on the assumption that an object's appearance and location change
|
||||
smoothly over time. If a person goes behind a building or is briefly hidden
|
||||
by another passerby, the tracker can lose that objects identity. When that same
|
||||
person reemerges, the tracker often treats them as a new object, assigning a new ID.
|
||||
This makes analyzing a persons complete path through a scene difficult
|
||||
or makes counting unique objects much harder.
|
||||
|
||||
Re-Identification (ReID) models help solve this problem by using embedding features
|
||||
which encode an object into a fixed length vector that captures distinctive
|
||||
patterns, shapes, or other visual signatures. When an object disappears and
|
||||
then reappears you can compare the newly detected objects embedding against a list of
|
||||
past objects. If the similarity (using Cosine or Euclidean distance)
|
||||
exceeds a chosen threshold, you can confidently link the new detection back to the
|
||||
original track ID.
|
||||
|
||||
|
||||
## Datasets
|
||||
|
||||
The [OSNet model](https://paperswithcode.com/paper/omni-scale-feature-learning-for-person-re) is
|
||||
lite weight and provides good accuracy for reidentification tasks, however
|
||||
it must be trained using a dataset to identify specific object classes.
|
||||
|
||||
This example uses the [Market1501](https://paperswithcode.com/dataset/market-1501)
|
||||
dataset trained for reidentifying people.
|
||||
|
||||
To support other object classifications such as Vehicles, Faces, or Animals, you
|
||||
will need to source and train these accordingly.
|
||||
|
||||
|
||||
## Occlusion Example
|
||||
|
||||
In the [people walking video](https://github.com/swdee/go-rknnlite-data/raw/master/people-walking.mp4)
|
||||
a lady wearing a CK branded jacket starts
|
||||
in the beginning of the scene and becomes occluded by passersby. When she reappears Bytetrack
|
||||
detects them as a new person.
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
Make sure you have downloaded the data files first for the examples.
|
||||
You only need to do this once for all examples.
|
||||
|
||||
```
|
||||
cd example/
|
||||
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
|
||||
```
|
||||
|
||||
|
||||
Command line Usage.
|
||||
```
|
||||
$ go run reid.go -h
|
||||
|
||||
Usage of /tmp/go-build147978858/b001/exe/reid:
|
||||
-d string
|
||||
Data file containing object co-ordinates (default "../data/reid-objects.dat")
|
||||
-e float
|
||||
The Euclidean distance [0.0-1.0], a value less than defines a match (default 0.51)
|
||||
-i string
|
||||
Image file to run inference on (default "../data/reid-walking.jpg")
|
||||
-m string
|
||||
RKNN compiled model file (default "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn")
|
||||
-p string
|
||||
Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588")
|
||||
```
|
||||
|
||||
Run the ReID example on rk3588 or replace with your Platform model.
|
||||
```
|
||||
cd example/reid/
|
||||
go run reid.go -p rk3588
|
||||
```
|
||||
|
||||
|
||||
This will result in the output of:
|
||||
```
|
||||
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
|
||||
Model Input Number: 1, Ouput Number: 1
|
||||
Input tensors:
|
||||
index=0, name=input, n_dims=4, dims=[8, 256, 128, 3], n_elems=786432, size=786432, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
|
||||
Output tensors:
|
||||
index=0, name=output, n_dims=2, dims=[8, 512, 0, 0], n_elems=4096, size=4096, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.018782
|
||||
Comparing object 0 at (0,0,134,361)
|
||||
Object 0 at (0,0,134,361) has euclidean distance: 0.000000 (same person)
|
||||
Object 1 at (134,0,251,325) has euclidean distance: 0.423271 (same person)
|
||||
Object 2 at (251,0,326,208) has euclidean distance: 0.465061 (same person)
|
||||
Object 3 at (326,0,394,187) has euclidean distance: 0.445583 (same person)
|
||||
Comparing object 1 at (394,0,513,357)
|
||||
Object 0 at (0,0,134,361) has euclidean distance: 0.781510 (different person)
|
||||
Object 1 at (134,0,251,325) has euclidean distance: 0.801649 (different person)
|
||||
Object 2 at (251,0,326,208) has euclidean distance: 0.680299 (different person)
|
||||
Object 3 at (326,0,394,187) has euclidean distance: 0.686542 (different person)
|
||||
Comparing object 2 at (513,0,588,246)
|
||||
Object 0 at (0,0,134,361) has euclidean distance: 0.860921 (different person)
|
||||
Object 1 at (134,0,251,325) has euclidean distance: 0.873663 (different person)
|
||||
Object 2 at (251,0,326,208) has euclidean distance: 0.870753 (different person)
|
||||
Object 3 at (326,0,394,187) has euclidean distance: 0.820761 (different person)
|
||||
Comparing object 3 at (588,0,728,360)
|
||||
Object 0 at (0,0,134,361) has euclidean distance: 0.762738 (different person)
|
||||
Object 1 at (134,0,251,325) has euclidean distance: 0.800668 (different person)
|
||||
Object 2 at (251,0,326,208) has euclidean distance: 0.763694 (different person)
|
||||
Object 3 at (326,0,394,187) has euclidean distance: 0.769597 (different person)
|
||||
Model first run speed: batch preparation=3.900093ms, inference=47.935686ms, post processing=262.203µs, total time=52.097982ms
|
||||
done
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
To run the ReID example using the prebuilt docker image, make sure the data files have been downloaded first,
|
||||
then run.
|
||||
```
|
||||
# from project root directory
|
||||
|
||||
docker run --rm \
|
||||
--device /dev/dri:/dev/dri \
|
||||
-v "$(pwd):/go/src/app" \
|
||||
-v "$(pwd)/example/data:/go/src/data" \
|
||||
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
|
||||
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
|
||||
-w /go/src/app \
|
||||
swdee/go-rknnlite:latest \
|
||||
go run ./example/reid/reid.go -p rk3588
|
||||
```
|
||||
|
||||
### Interpreting Results
|
||||
|
||||
The above example uses people detected with a YOLOv5 model and then cropped to
|
||||
create the sample input.
|
||||
|
||||

|
||||
|
||||
Objects A1 to A4 represent the same person and objects B1, C1, and D1 are other
|
||||
people from the same scene.
|
||||
|
||||
The first set of comparisons:
|
||||
```
|
||||
Comparing object 0 [A1] at (0,0,134,361)
|
||||
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.000000 (same person)
|
||||
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.423271 (same person)
|
||||
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.465061 (same person)
|
||||
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.445583 (same person)
|
||||
```
|
||||
|
||||
Object 0 is A1, when compared to itself it has a euclidean distance of 0.0.
|
||||
Objects 1-3 are A2 to A4, each of these have a similar
|
||||
distance ranging from 0.42 to 0.46.
|
||||
|
||||
A euclidean distance range is from 0.0 (same object) to 1.0 (different object), so
|
||||
the lower the distance the more similar the object is. A threshold of `0.51`
|
||||
is used to define what the maximum distance can be for the object to be considered
|
||||
the same or different. Your use case and datasets may require calibration of
|
||||
the ideal threshold.
|
||||
|
||||
The remaining results compare the people B1, C1, and D1.
|
||||
```
|
||||
Comparing object 1 [B1] at (394,0,513,357)
|
||||
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.781510 (different person)
|
||||
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.801649 (different person)
|
||||
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.680299 (different person)
|
||||
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.686542 (different person)
|
||||
Comparing object 2 [C1] at (513,0,588,246)
|
||||
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.860921 (different person)
|
||||
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.873663 (different person)
|
||||
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.870753 (different person)
|
||||
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.820761 (different person)
|
||||
Comparing object 3 [D1] at (588,0,728,360)
|
||||
Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.762738 (different person)
|
||||
Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.800668 (different person)
|
||||
Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.763694 (different person)
|
||||
Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.769597 (different person)
|
||||
```
|
||||
|
||||
All of these other people have a euclidean distance greater than 0.68 indicating
|
||||
they are different people.
|
||||
|
||||
|
||||
## Postprocessing
|
||||
|
||||
[Convenience functions](https://github.com/swdee/go-rknnlite-data/raw/master/postprocess/reid.go)
|
||||
are provided for calculating the Euclidean Distance or Cosine Similarity
|
||||
depending on how the Model has been trained.
|
||||
524
example/reid/reid.go
Normal file
524
example/reid/reid.go
Normal file
@@ -0,0 +1,524 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/swdee/go-rknnlite"
|
||||
"github.com/swdee/go-rknnlite/postprocess/reid"
|
||||
"gocv.io/x/gocv"
|
||||
"image"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// disable logging timestamps
|
||||
log.SetFlags(0)
|
||||
|
||||
// read in cli flags
|
||||
modelFile := flag.String("m", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled model file")
|
||||
imgFile := flag.String("i", "../data/reid-walking.jpg", "Image file to run inference on")
|
||||
objsFile := flag.String("d", "../data/reid-objects.dat", "Data file containing object co-ordinates")
|
||||
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
|
||||
euDist := flag.Float64("e", 0.51, "The Euclidean distance [0.0-1.0], a value less than defines a match")
|
||||
flag.Parse()
|
||||
|
||||
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Failed to set CPU Affinity: %v", err)
|
||||
}
|
||||
|
||||
// check if user specified model file or if default is being used. if default
|
||||
// then pick the default platform model to use.
|
||||
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
|
||||
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
// create rknn runtime instance
|
||||
rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error initializing RKNN runtime: ", err)
|
||||
}
|
||||
|
||||
// set runtime to leave output tensors as int8
|
||||
rt.SetWantFloat(false)
|
||||
|
||||
// optional querying of model file tensors and SDK version for printing
|
||||
// to stdout. not necessary for production inference code
|
||||
err = rt.Query(os.Stdout)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error querying runtime: ", err)
|
||||
}
|
||||
|
||||
// load objects file
|
||||
objs, err := ParseObjects(*objsFile)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error parsing objects: ", err)
|
||||
}
|
||||
|
||||
// load image
|
||||
img := gocv.IMRead(*imgFile, gocv.IMReadColor)
|
||||
|
||||
if img.Empty() {
|
||||
log.Fatal("Error reading image from: ", *imgFile)
|
||||
}
|
||||
|
||||
// convert colorspace
|
||||
srcImg := gocv.NewMat()
|
||||
gocv.CvtColor(img, &srcImg, gocv.ColorBGRToRGB)
|
||||
|
||||
defer img.Close()
|
||||
defer srcImg.Close()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// create a batch to process all images in the compare and dataset's
|
||||
// in a single forward pass
|
||||
batch := rknnlite.NewBatch(
|
||||
int(rt.InputAttrs()[0].Dims[0]),
|
||||
int(rt.InputAttrs()[0].Dims[2]),
|
||||
int(rt.InputAttrs()[0].Dims[1]),
|
||||
int(rt.InputAttrs()[0].Dims[3]),
|
||||
rt.GetInputTypeFloat32(),
|
||||
)
|
||||
|
||||
// scale size is the size of the input tensor dimensions to scale the object too
|
||||
scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
|
||||
|
||||
// add the compare images to the batch
|
||||
for _, cmpObj := range objs.Compare {
|
||||
err := AddObjectToBatch(batch, srcImg, cmpObj, scaleSize)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating batch: ", err)
|
||||
}
|
||||
}
|
||||
|
||||
// add the dataset images to the batch
|
||||
for _, dtObj := range objs.Dataset {
|
||||
err := AddObjectToBatch(batch, srcImg, dtObj, scaleSize)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating batch: ", err)
|
||||
}
|
||||
}
|
||||
|
||||
defer batch.Close()
|
||||
|
||||
endBatch := time.Now()
|
||||
|
||||
// run inference on the batch
|
||||
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
|
||||
endInference := time.Now()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||
}
|
||||
|
||||
// get total number of compare objects
|
||||
totalCmp := len(objs.Compare)
|
||||
|
||||
// compare each object to those objects in the dataset for similarity
|
||||
for i, cmpObj := range objs.Compare {
|
||||
// get the compare objects output
|
||||
cmpOutput, err := batch.GetOutputInt(i, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Getting output tensor failed with error: ", err)
|
||||
}
|
||||
|
||||
log.Printf("Comparing object %d at (%d,%d,%d,%d)\n", i,
|
||||
cmpObj.X1, cmpObj.Y1, cmpObj.X2, cmpObj.Y2)
|
||||
|
||||
for j, dtObj := range objs.Dataset {
|
||||
// get each objects outputs
|
||||
nextOutput, err := batch.GetOutputInt(totalCmp+j, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Getting output tensor failed with error: ", err)
|
||||
}
|
||||
|
||||
dist := CompareObjects(
|
||||
cmpOutput,
|
||||
nextOutput,
|
||||
outputs.OutputAttributes().Scales[0],
|
||||
outputs.OutputAttributes().ZPs[0],
|
||||
)
|
||||
|
||||
// check euclidean distance to determine match of same person or not
|
||||
objRes := "different person"
|
||||
|
||||
if dist < float32(*euDist) {
|
||||
objRes = "same person"
|
||||
}
|
||||
|
||||
log.Printf(" Object %d at (%d,%d,%d,%d) has euclidean distance: %f (%s)\n",
|
||||
j,
|
||||
dtObj.X1, dtObj.Y1, dtObj.X2, dtObj.Y2,
|
||||
dist, objRes)
|
||||
}
|
||||
}
|
||||
|
||||
endCompare := time.Now()
|
||||
|
||||
log.Printf("Model first run speed: batch preparation=%s, inference=%s, post processing=%s, total time=%s\n",
|
||||
endBatch.Sub(start).String(),
|
||||
endInference.Sub(endBatch).String(),
|
||||
endCompare.Sub(endInference).String(),
|
||||
endCompare.Sub(start).String(),
|
||||
)
|
||||
|
||||
// free outputs allocated in C memory after you have finished post processing
|
||||
err = outputs.Free()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error freeing Outputs: ", err)
|
||||
}
|
||||
|
||||
// close runtime and release resources
|
||||
err = rt.Close()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error closing RKNN runtime: ", err)
|
||||
}
|
||||
|
||||
log.Println("done")
|
||||
|
||||
/*
|
||||
//CompareObject(rt, srcImg, cmpObj, objs.Dataset)
|
||||
|
||||
//rgbImg := img.Clone()
|
||||
|
||||
|
||||
|
||||
frameWidth := 67
|
||||
frameHeight := 177
|
||||
|
||||
roiRect1 := image.Rect(497, 195, 497+frameWidth, 195+frameHeight)
|
||||
|
||||
// cklady
|
||||
//roiRect1 := image.Rect(0, 0, 134, 361)
|
||||
|
||||
roiImg1 := rgbImg.Region(roiRect1)
|
||||
|
||||
cropImg1 := rgbImg.Clone()
|
||||
scaleSize1 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
|
||||
gocv.Resize(roiImg1, &cropImg1, scaleSize1, 0, 0, gocv.InterpolationArea)
|
||||
|
||||
defer img.Close()
|
||||
defer rgbImg.Close()
|
||||
defer cropImg1.Close()
|
||||
defer roiImg1.Close()
|
||||
|
||||
gocv.IMWrite("/tmp/frame-master.jpg", cropImg1)
|
||||
|
||||
batch := rt.NewBatch(
|
||||
int(rt.InputAttrs()[0].Dims[0]),
|
||||
int(rt.InputAttrs()[0].Dims[2]),
|
||||
int(rt.InputAttrs()[0].Dims[1]),
|
||||
int(rt.InputAttrs()[0].Dims[3]),
|
||||
)
|
||||
err = batch.Add(cropImg1)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating batch: ", err)
|
||||
}
|
||||
defer batch.Close()
|
||||
|
||||
// perform inference on image file
|
||||
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||
}
|
||||
|
||||
output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Getting output tensor failed with error: ", err)
|
||||
}
|
||||
|
||||
fingerPrint := DequantizeAndL2Normalize(
|
||||
output,
|
||||
outputs.OutputAttributes().Scales[0],
|
||||
outputs.OutputAttributes().ZPs[0],
|
||||
)
|
||||
|
||||
// seed the EMA fingerprint to the master
|
||||
emaFP := make([]float32, len(fingerPrint))
|
||||
copy(emaFP, fingerPrint)
|
||||
const alpha = 0.9 // smoothing factor
|
||||
|
||||
hash, err := FingerprintHash(fingerPrint)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("hashing failed: %v", err)
|
||||
}
|
||||
|
||||
log.Println("object fingerprint:", hash)
|
||||
|
||||
// free outputs allocated in C memory after you have finished post processing
|
||||
err = outputs.Free()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error freeing Outputs: ", err)
|
||||
}
|
||||
|
||||
|
||||
// sample 2 images
|
||||
|
||||
yOffsets := []int{1, 195, 388}
|
||||
xOffsets := []int{497, 565, 633, 701, 769, 836, 904}
|
||||
|
||||
images := [][]int{}
|
||||
|
||||
for _, ny := range yOffsets {
|
||||
for _, nx := range xOffsets {
|
||||
images = append(images, []int{nx, ny})
|
||||
}
|
||||
}
|
||||
|
||||
// ck lady
|
||||
|
||||
// images := [][]int{
|
||||
// {134, 0, 117, 325},
|
||||
// {251, 0, 75, 208},
|
||||
// {326, 0, 68, 187},
|
||||
// }
|
||||
|
||||
|
||||
// Image 2
|
||||
for frame, next := range images {
|
||||
|
||||
roiRect2 := image.Rect(next[0], next[1], next[0]+frameWidth, next[1]+frameHeight)
|
||||
// ck lady
|
||||
//roiRect2 := image.Rect(next[0], next[1], next[0]+next[2], next[1]+next[3])
|
||||
roiImg2 := rgbImg.Region(roiRect2)
|
||||
|
||||
cropImg2 := rgbImg.Clone()
|
||||
scaleSize2 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
|
||||
gocv.Resize(roiImg2, &cropImg2, scaleSize2, 0, 0, gocv.InterpolationArea)
|
||||
|
||||
defer cropImg2.Close()
|
||||
defer roiImg2.Close()
|
||||
|
||||
gocv.IMWrite(fmt.Sprintf("/tmp/frame-%d.jpg", frame), cropImg2)
|
||||
|
||||
start := time.Now()
|
||||
|
||||
batch.Clear()
|
||||
err = batch.Add(cropImg2)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating batch: ", err)
|
||||
}
|
||||
|
||||
outputs, err = rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||
}
|
||||
|
||||
endInference := time.Now()
|
||||
|
||||
output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Getting output tensor failed with error: ", err)
|
||||
}
|
||||
|
||||
fingerPrint2 := DequantizeAndL2Normalize(
|
||||
output,
|
||||
outputs.OutputAttributes().Scales[0],
|
||||
outputs.OutputAttributes().ZPs[0],
|
||||
)
|
||||
|
||||
|
||||
// sim := CosineSimilarity(fingerPrint, fingerPrint2)
|
||||
// dist := CosineDistance(fingerPrint, fingerPrint2)
|
||||
// fmt.Printf("Frame %d, cosine similarity: %f, distance=%f\n", frame, sim, dist)
|
||||
|
||||
|
||||
// compute Euclidean (L2) distance directly
|
||||
dist := EuclideanDistance(fingerPrint, fingerPrint2)
|
||||
|
||||
// 3) compute vs EMA
|
||||
emaDist := EuclideanDistance(emaFP, fingerPrint2)
|
||||
|
||||
endDetect := time.Now()
|
||||
|
||||
objRes := "different person"
|
||||
if emaDist < 0.51 {
|
||||
objRes = "same person"
|
||||
}
|
||||
|
||||
fmt.Printf("Frame %d, euclidean distance: %f, ema=%f (%s)\n", frame, dist, emaDist, objRes)
|
||||
|
||||
log.Printf(" Inference=%s, detect=%s, total time=%s\n",
|
||||
endInference.Sub(start).String(),
|
||||
endDetect.Sub(endInference).String(),
|
||||
endDetect.Sub(start).String(),
|
||||
)
|
||||
|
||||
// free outputs allocated in C memory after you have finished post processing
|
||||
err = outputs.Free()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error freeing Outputs: ", err)
|
||||
}
|
||||
|
||||
// 4) update the EMA fingerprint
|
||||
if frame >= 7 && frame <= 13 {
|
||||
|
||||
// emaFP = α*emaFP + (1-α)*fp2
|
||||
for i := range emaFP {
|
||||
emaFP[i] = alpha*emaFP[i] + (1-alpha)*fingerPrint2[i]
|
||||
}
|
||||
// 5) re‐normalize emaFP back to unit length
|
||||
var sum float32
|
||||
for _, v := range emaFP {
|
||||
sum += v * v
|
||||
}
|
||||
norm := float32(math.Sqrt(float64(sum)))
|
||||
if norm > 0 {
|
||||
for i := range emaFP {
|
||||
emaFP[i] /= norm
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// close runtime and release resources
|
||||
err = rt.Close()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error closing RKNN runtime: ", err)
|
||||
}
|
||||
|
||||
log.Println("done")
|
||||
*/
|
||||
}
|
||||
|
||||
// Box holds object bounding box coordinates (x1, y1, x2, y2)
|
||||
type Box struct {
|
||||
X1, Y1, X2, Y2 int
|
||||
}
|
||||
|
||||
// Objects is a struct to represent the compare and dataset objects parsed
|
||||
// from the objects data file
|
||||
type Objects struct {
|
||||
Compare []Box
|
||||
Dataset []Box
|
||||
}
|
||||
|
||||
// ParseObjects reads the TOML-like objects data file returns the two lists
|
||||
// of objects and their bounding box coordinates
|
||||
func ParseObjects(path string) (*Objects, error) {
|
||||
|
||||
f, err := os.Open(path)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
|
||||
objs := &Objects{}
|
||||
section := "" // either "compare" or "dataset"
|
||||
scanner := bufio.NewScanner(f)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// skip blank or comment
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
// section header
|
||||
if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
|
||||
section = strings.ToLower(line[1 : len(line)-1])
|
||||
continue
|
||||
}
|
||||
|
||||
// data line, expect four ints separated by commas
|
||||
fields := strings.Split(line, ",")
|
||||
|
||||
if len(fields) != 4 {
|
||||
return nil, fmt.Errorf("invalid data line %q", line)
|
||||
}
|
||||
|
||||
nums := make([]int, 4)
|
||||
|
||||
for i, fstr := range fields {
|
||||
v, err := strconv.Atoi(strings.TrimSpace(fstr))
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing %q: %w", fstr, err)
|
||||
}
|
||||
|
||||
nums[i] = v
|
||||
}
|
||||
|
||||
// define box
|
||||
box := Box{nums[0], nums[1], nums[2], nums[3]}
|
||||
|
||||
switch section {
|
||||
|
||||
case "compare":
|
||||
objs.Compare = append(objs.Compare, box)
|
||||
|
||||
case "dataset":
|
||||
objs.Dataset = append(objs.Dataset, box)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("line %q outside of a known section", line)
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return objs, nil
|
||||
}
|
||||
|
||||
// AddObjectToBatch adds the cropped object from source image to the batch for
|
||||
// running inference on
|
||||
func AddObjectToBatch(batch *rknnlite.Batch, srcImg gocv.Mat, obj Box,
|
||||
scaleSize image.Point) error {
|
||||
|
||||
// get the objects region of interest from source Mat
|
||||
objRect := image.Rect(obj.X1, obj.Y1, obj.X2, obj.Y2)
|
||||
objRoi := srcImg.Region(objRect)
|
||||
|
||||
objImg := objRoi.Clone()
|
||||
gocv.Resize(objRoi, &objImg, scaleSize, 0, 0, gocv.InterpolationArea)
|
||||
|
||||
defer objRoi.Close()
|
||||
defer objImg.Close()
|
||||
|
||||
return batch.Add(objImg)
|
||||
}
|
||||
|
||||
// CompareObjects compares the outputs of two objects
|
||||
func CompareObjects(objA []int8, objB []int8, scales float32,
|
||||
ZPs int32) float32 {
|
||||
|
||||
// get the fingerprint of both objects
|
||||
fpA := reid.DequantizeAndL2Normalize(objA, scales, ZPs)
|
||||
fpB := reid.DequantizeAndL2Normalize(objB, scales, ZPs)
|
||||
|
||||
// compute Euclidean (L2) distance directly
|
||||
return reid.EuclideanDistance(fpA, fpB)
|
||||
}
|
||||
@@ -313,6 +313,19 @@ itself is not 100%. Whilst this demo shows a complete solution, you would stil
|
||||
need to do work to train a better model and testing for your own use case.
|
||||
|
||||
|
||||
## Re-Identification (ReID)
|
||||
|
||||
Experimental ReID has been added which follows the implementation of the
|
||||
[FairMOT](https://github.com/FoundationVision/ByteTrack/tree/main/tutorials/fairmot) tracker,
|
||||
however this makes use of the OSNet model trained with the Market1501 dataset.
|
||||
|
||||
Usage of ReID is expensive and typically takes around 200ms per frame to complete
|
||||
on the RK3588 NPU. There is little accuracy improvement over straight ByteTrack
|
||||
which adds little overhead to the YOLO object detection.
|
||||
|
||||
We need to wait for Rockchips next generation RK36xx SoC before this may be useful.
|
||||
|
||||
|
||||
## Background
|
||||
|
||||
The ByteTrack code is a Go conversion of the [C++ project](https://github.com/ifzhang/ByteTrack).
|
||||
|
||||
@@ -127,18 +127,27 @@ type Demo struct {
|
||||
// renderFormat indicates which rendering type to use with instance
|
||||
// segmentation, outline or mask
|
||||
renderFormat string
|
||||
// reidModelFile is the model to use ReID with
|
||||
reidModelFile string
|
||||
// reid is a flag to inidicate if reid is being used or not
|
||||
reid bool
|
||||
// reidPool of rknnlite runtimes to perform inference in parallel
|
||||
reidPool *rknnlite.Pool
|
||||
}
|
||||
|
||||
// NewDemo returns and instance of Demo, a streaming HTTP server showing
|
||||
// video with object detection
|
||||
func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
|
||||
modelType string, renderFormat string, rkPlatform string) (*Demo, error) {
|
||||
modelType string, renderFormat string, rkPlatform string,
|
||||
reidModelFile string, useReid bool) (*Demo, error) {
|
||||
|
||||
var err error
|
||||
|
||||
d := &Demo{
|
||||
vidSrc: vidSrc,
|
||||
limitObjs: make([]string, 0),
|
||||
vidSrc: vidSrc,
|
||||
limitObjs: make([]string, 0),
|
||||
reidModelFile: reidModelFile,
|
||||
reid: useReid,
|
||||
}
|
||||
|
||||
if vidSrc.Format == VideoFile {
|
||||
@@ -220,6 +229,15 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
|
||||
log.Printf("***WARNING*** %s only has 1 TOPS NPU, downgraded to %d FPS\n", rkPlatform, FPS)
|
||||
}
|
||||
|
||||
if d.reid {
|
||||
if strings.EqualFold(rkPlatform[:5], "rk356") {
|
||||
log.Fatal("***WARNING*** ReID is unavailable for RK356x platforms as the 1 TOPS NPU is not powerful enough")
|
||||
}
|
||||
FPS = 4
|
||||
FPSinterval = time.Duration(float64(time.Second) / float64(FPS))
|
||||
log.Println("***WARNING*** ReID is experimental and requires alot of NPU, downgraded to 4 FPS")
|
||||
}
|
||||
|
||||
// load in Model class names
|
||||
d.labels, err = rknnlite.LoadLabels(labelFile)
|
||||
|
||||
@@ -227,6 +245,19 @@ func NewDemo(vidSrc *VideoSource, modelFile, labelFile string, poolSize int,
|
||||
return nil, fmt.Errorf("Error loading model labels: %w", err)
|
||||
}
|
||||
|
||||
// create pool for ReID
|
||||
if d.reid {
|
||||
d.reidPool, err = rknnlite.NewPool(poolSize, reidModelFile,
|
||||
[]rknnlite.CoreMask{rknnlite.NPUCoreAuto})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating ReID RKNN pool: %v\n", err)
|
||||
}
|
||||
|
||||
// set runtime to leave output tensors as int8
|
||||
d.reidPool.SetWantFloat(false)
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
@@ -360,6 +391,10 @@ func (d *Demo) Stream(w http.ResponseWriter, r *http.Request) {
|
||||
// record of past object detections for tracking
|
||||
byteTrack := tracker.NewBYTETracker(FPS, FPS*10, 0.5, 0.6, 0.8)
|
||||
|
||||
if d.reid {
|
||||
byteTrack.UseReID(d.reidPool, tracker.Euclidean, 0.51)
|
||||
}
|
||||
|
||||
// create a trails history
|
||||
trail := tracker.NewTrail(90)
|
||||
|
||||
@@ -491,9 +526,18 @@ func (d *Demo) ProcessFrame(img gocv.Mat, retChan chan<- ResultFrame,
|
||||
// track detected objects
|
||||
timing.TrackerStart = time.Now()
|
||||
|
||||
trackObjs, err := byteTrack.Update(
|
||||
postprocess.DetectionsToObjects(detectResults),
|
||||
)
|
||||
var trackObjs []*tracker.STrack
|
||||
|
||||
if d.reid {
|
||||
trackObjs, err = byteTrack.UpdateWithFrame(
|
||||
postprocess.DetectionsToObjects(detectResults),
|
||||
resImg,
|
||||
)
|
||||
} else {
|
||||
trackObjs, err = byteTrack.Update(
|
||||
postprocess.DetectionsToObjects(detectResults),
|
||||
)
|
||||
}
|
||||
|
||||
timing.TrackerEnd = time.Now()
|
||||
|
||||
@@ -713,6 +757,8 @@ func main() {
|
||||
renderFormat := flag.String("r", "outline", "The rendering format used for instance segmentation [outline|mask]")
|
||||
codecFormat := flag.String("codec", "mjpg", "Web Camera codec The rendering format [mjpg|yuyv]")
|
||||
rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
|
||||
reidModelFile := flag.String("rm", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled OSNet/Re-Identification model file")
|
||||
useReid := flag.Bool("reid", false, "Enable Re-Identification enhanced tracking")
|
||||
|
||||
// Initialize the custom camera resolution flag with a default value
|
||||
cameraRes := &cameraResFlag{value: "1280x720@30"}
|
||||
@@ -760,8 +806,12 @@ func main() {
|
||||
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
if f := flag.Lookup("rm"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
|
||||
*reidModelFile = strings.ReplaceAll(*reidModelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
demo, err := NewDemo(vidSrc, *modelFile, *labelFile, *poolSize,
|
||||
*modelType, *renderFormat, *rkPlatform)
|
||||
*modelType, *renderFormat, *rkPlatform, *reidModelFile, *useReid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error creating demo: %v", err)
|
||||
|
||||
@@ -60,7 +60,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
|
||||
Index: uint32(idx),
|
||||
Type: TensorFloat32,
|
||||
// multiply by 4 for size of float32
|
||||
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels() * 4),
|
||||
Size: uint32(len(data) * 4), // bytes = elements * 4
|
||||
Fmt: TensorNHWC,
|
||||
Buf: unsafe.Pointer(&data[0]),
|
||||
PassThrough: false,
|
||||
@@ -77,7 +77,7 @@ func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
|
||||
inputs[idx] = Input{
|
||||
Index: uint32(idx),
|
||||
Type: TensorUint8,
|
||||
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels()),
|
||||
Size: uint32(len(data)), // bytes = elements
|
||||
Fmt: TensorNHWC,
|
||||
Buf: unsafe.Pointer(&data[0]),
|
||||
PassThrough: false,
|
||||
|
||||
5
pool.go
5
pool.go
@@ -109,6 +109,11 @@ func (p *Pool) SetWantFloat(val bool) {
|
||||
}
|
||||
}
|
||||
|
||||
// Size returns the Pool size
|
||||
func (p *Pool) Size() int {
|
||||
return p.size
|
||||
}
|
||||
|
||||
// getRuntimeCore takes an integer and returns the core mask value to use from
|
||||
// the coremask list
|
||||
func getRuntimeCore(i int, cores []CoreMask) CoreMask {
|
||||
|
||||
129
postprocess/reid/reid.go
Normal file
129
postprocess/reid/reid.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package reid
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"math"
|
||||
)
|
||||
|
||||
// DequantizeAndL2Normalize converts a quantized int8 vector "q" into a float32 vector,
|
||||
// applies dequantization using the provided scale "s" and zero-point "z",
|
||||
// and then normalizes the result to unit length using L2 normalization.
|
||||
//
|
||||
// This is commonly used to convert quantized embedding vectors back to a
|
||||
// normalized float form for comparison or similarity calculations.
|
||||
//
|
||||
// If the resulting vector has zero magnitude, the function returns the
|
||||
// unnormalized dequantized vector.
|
||||
func DequantizeAndL2Normalize(q []int8, s float32, z int32) []float32 {
|
||||
|
||||
N := len(q)
|
||||
x := make([]float32, N)
|
||||
|
||||
// dequantize
|
||||
for i := 0; i < N; i++ {
|
||||
x[i] = float32(int32(q[i])-z) * s
|
||||
}
|
||||
|
||||
// compute L2 norm
|
||||
var sumSquares float32
|
||||
|
||||
for _, v := range x {
|
||||
sumSquares += v * v
|
||||
}
|
||||
|
||||
norm := float32(math.Sqrt(float64(sumSquares)))
|
||||
|
||||
if norm == 0 {
|
||||
// avoid /0
|
||||
return x
|
||||
}
|
||||
|
||||
// normalize
|
||||
for i := 0; i < N; i++ {
|
||||
x[i] /= norm
|
||||
}
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
// FingerprintHash takes an L2-normalized []float32 and returns
|
||||
// a hex-encoded SHA-256 hash of its binary representation.
|
||||
func FingerprintHash(feat []float32) (string, error) {
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
// write each float32 in little‐endian
|
||||
for _, v := range feat {
|
||||
if err := binary.Write(buf, binary.LittleEndian, v); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
sum := sha256.Sum256(buf.Bytes())
|
||||
|
||||
return hex.EncodeToString(sum[:]), nil
|
||||
}
|
||||
|
||||
// CosineSimilarity returns the cosine of the angle between vectors a and b.
|
||||
// Assumes len(a)==len(b). If you have already L2‐normalized them,
|
||||
// this is just their dot-product.
|
||||
func CosineSimilarity(a, b []float32) float32 {
|
||||
|
||||
var dot float32
|
||||
|
||||
for i := range a {
|
||||
dot += a[i] * b[i]
|
||||
}
|
||||
|
||||
// If not already normalized, you’d divide by norms here.
|
||||
return dot
|
||||
}
|
||||
|
||||
// CosineDistance returns 1 – cosine similarity, which is a proper distance metric
|
||||
// in [0,2]. For L2-normalized vectors this is in [0,2], and small values mean
|
||||
// "very similar."
|
||||
func CosineDistance(a, b []float32) float32 {
|
||||
return 1 - CosineSimilarity(a, b)
|
||||
}
|
||||
|
||||
// EuclideanDistance returns the L2 distance between two vectors.
|
||||
// Lower means "more similar" when your features are L2-normalized.
|
||||
func EuclideanDistance(a, b []float32) float32 {
|
||||
var sum float32
|
||||
|
||||
for i := range a {
|
||||
d := a[i] - b[i]
|
||||
sum += d * d
|
||||
}
|
||||
|
||||
return float32(math.Sqrt(float64(sum)))
|
||||
}
|
||||
|
||||
// NormalizeVec normalizes the input float32 slice to unit length and returns
|
||||
// a new slice. If the input vector has zero magnitude, it returns the original
|
||||
// slice unchanged.
|
||||
func NormalizeVec(v []float32) []float32 {
|
||||
|
||||
norm := float32(0.0)
|
||||
|
||||
for _, x := range v {
|
||||
norm += x * x
|
||||
}
|
||||
|
||||
if norm == 0 {
|
||||
return v // avoid division by zero
|
||||
}
|
||||
|
||||
norm = float32(math.Sqrt(float64(norm)))
|
||||
|
||||
out := make([]float32, len(v))
|
||||
|
||||
for i, x := range v {
|
||||
out[i] = x / norm
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
@@ -241,6 +241,11 @@ func (r *Runtime) SetInputTypeFloat32(val bool) {
|
||||
r.inputTypeFloat32 = val
|
||||
}
|
||||
|
||||
// GetInputTypeFloat32 returns the input type if set as Float32 (true) or Int8 (false)
|
||||
func (r *Runtime) GetInputTypeFloat32() bool {
|
||||
return r.inputTypeFloat32
|
||||
}
|
||||
|
||||
// SDKVersion represents the C.rknn_sdk_version struct
|
||||
type SDKVersion struct {
|
||||
DriverVersion string
|
||||
|
||||
@@ -73,9 +73,6 @@ RUN git clone --depth 1 https://github.com/swdee/lpd-yolov8.git /opt/lpd-yolov8
|
||||
cp /opt/rknn_model_zoo/examples/yolov8/python/convert.py /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py && \
|
||||
sed -i "s|^DATASET_PATH *= *['\"].*['\"]|DATASET_PATH = '/opt/lpd-yolov8/subset.txt'|" /opt/rknn_model_zoo/examples/yolov8/python/convert-lpd.py
|
||||
|
||||
# download other onnx models
|
||||
RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build
|
||||
|
||||
# Upgrade pip to the latest version
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
@@ -92,6 +89,15 @@ RUN pip install --no-cache-dir \
|
||||
pyyaml \
|
||||
"tensorflow<=2.16.0rc0"
|
||||
|
||||
# download other onnx models
|
||||
RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build && \
|
||||
git -C /opt/go-rknnlite-build fetch --depth 1 origin ce8b5ce1dc53b1c38324e7506374731ad21070c8 && \
|
||||
git -C /opt/go-rknnlite-build checkout FETCH_HEAD
|
||||
|
||||
# copy our modified mobilenet.py script into the rknn_model_zoo directory
|
||||
RUN cp /opt/go-rknnlite-build/mobilenet-batch/mobilenet-rknn.py /opt/rknn_model_zoo/examples/mobilenet/python/mobilenet-rknn-batch.py
|
||||
|
||||
|
||||
# By default do nothing
|
||||
CMD ["bash"]
|
||||
|
||||
|
||||
@@ -30,6 +30,8 @@ MODELS=(
|
||||
"mobilenet_v1 rknn_convert /opt/models/mobilenet_v1/model_config.yml '' '' mobilenet_v1"
|
||||
"yolov8 convert-lpd.py /opt/lpd-yolov8/lpd-yolov8n.onnx i8 '' lpd-yolov8n"
|
||||
"yolov8 convert.py /opt/go-rknnlite-build/yolonas-s.onnx i8 '' yolonas-s"
|
||||
"mobilenet mobilenet-rknn-batch.py ../model/mobilenetv2-12.onnx i8 --model mobilenetv2-batch8"
|
||||
"osnet-market1501 build|onnx_to_rknn.py osnet_x1_0_market_256x128.onnx i8 '' osnet-market1501-batch8"
|
||||
)
|
||||
|
||||
# compile all entries (or just filter) for one platform
|
||||
@@ -75,6 +77,7 @@ compile_for_platform() {
|
||||
fi
|
||||
|
||||
echo "-> building $outprefix for $platform"
|
||||
local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn"
|
||||
|
||||
if [[ "$script" == "rknn_convert" ]]; then
|
||||
# mobilenet_v1 special: use the CLI and then rename
|
||||
@@ -83,13 +86,23 @@ compile_for_platform() {
|
||||
-i "$model" \
|
||||
-o "/opt/rkmodels/$platform/"
|
||||
mv "/opt/rkmodels/$platform/${outprefix}.rknn" \
|
||||
"/opt/rkmodels/$platform/${outprefix}-${platform}.rknn"
|
||||
"$out"
|
||||
continue
|
||||
fi
|
||||
|
||||
# build the go-rknnlite-build models
|
||||
if [[ "$script" == build\|* ]]; then
|
||||
# strip everything up to (and including) the first pipe to get script name
|
||||
scriptName="${script#*|}"
|
||||
# go into the go-rknnlite-build tree
|
||||
pushd "/opt/go-rknnlite-build/${subdir}" >/dev/null
|
||||
python "$scriptName" "$model" "$platform" "$dtype" "$out"
|
||||
popd >/dev/null
|
||||
continue
|
||||
fi
|
||||
|
||||
# the old examples
|
||||
pushd "/opt/rknn_model_zoo/examples/${subdir}/python/" >/dev/null
|
||||
local out="/opt/rkmodels/${platform}/${outprefix}-${platform}.rknn"
|
||||
|
||||
if [[ "$subdir" == "mobilenet" ]]; then
|
||||
python "$script" $extra "$model" \
|
||||
|
||||
@@ -25,6 +25,10 @@ type BYTETracker struct {
|
||||
lostStracks []*STrack
|
||||
// List of removed objects
|
||||
removedStracks []*STrack
|
||||
// reid supported tracking
|
||||
reid *reID
|
||||
// useReid is a flag to indicate if ReID supported tracking is to be used
|
||||
useReid bool
|
||||
}
|
||||
|
||||
// NewBYTETracker initializes and returns a new BYTETracker
|
||||
@@ -62,6 +66,10 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
|
||||
strack := NewSTrack(NewRect(object.Rect.X(), object.Rect.Y(), object.Rect.Width(), object.Rect.Height()),
|
||||
object.Prob, object.ID, object.Label)
|
||||
|
||||
if bt.useReid {
|
||||
strack.WithFeature(object.Feature, 0.9, 30)
|
||||
}
|
||||
|
||||
if object.Prob >= bt.trackThresh {
|
||||
detStracks = append(detStracks, strack)
|
||||
} else {
|
||||
@@ -87,11 +95,18 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
|
||||
strack.Predict()
|
||||
}
|
||||
|
||||
// Step 2: First association, with IoU
|
||||
// Step 2: First association, using IoU or feature distance matching
|
||||
var currentTrackedStracks, remainTrackedStracks, remainDetStracks, refindStracks []*STrack
|
||||
var costMatrix [][]float32
|
||||
|
||||
if bt.useReid {
|
||||
costMatrix = bt.calcFeatureDistance(strackPool, detStracks)
|
||||
} else {
|
||||
costMatrix = bt.calcIouDistance(strackPool, detStracks)
|
||||
}
|
||||
|
||||
matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err := bt.linearAssignment(
|
||||
bt.calcIouDistance(strackPool, detStracks),
|
||||
costMatrix,
|
||||
len(strackPool), len(detStracks), bt.matchThresh,
|
||||
)
|
||||
|
||||
@@ -126,7 +141,8 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Second association, using low score dets
|
||||
// Step 3: IoU fallback matching for unmatched tracks,
|
||||
// using low score IOU detections
|
||||
var currentLostStracks []*STrack
|
||||
|
||||
matchesIdx, unmatchTrackIdx, unmatchDetectionIdx, err = bt.linearAssignment(
|
||||
@@ -162,6 +178,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
|
||||
}
|
||||
|
||||
// Step 4: Init new stracks
|
||||
// Match non-active to unmatched remainingDetStracks (high confidence only)
|
||||
var currentRemovedStracks []*STrack
|
||||
|
||||
matchesIdx, unmatchUnconfirmedIdx, unmatchDetectionIdx, err := bt.linearAssignment(
|
||||
@@ -197,7 +214,7 @@ func (bt *BYTETracker) Update(objects []Object) ([]*STrack, error) {
|
||||
currentTrackedStracks = append(currentTrackedStracks, track)
|
||||
}
|
||||
|
||||
// Step 5: Update state
|
||||
// Step 5: Update state - Time-based removal of old lost tracks
|
||||
for _, lostStrack := range bt.lostStracks {
|
||||
if bt.frameID-lostStrack.GetFrameID() > bt.maxTimeLost {
|
||||
lostStrack.MarkAsRemoved()
|
||||
@@ -508,3 +525,21 @@ func (bt *BYTETracker) execLapjv(cost [][]float32, extendCost bool,
|
||||
|
||||
return rowsol, colsol, opt, nil
|
||||
}
|
||||
|
||||
// calcFeatureDistance calculates the distance between two embedded features
|
||||
// of the specified STracks
|
||||
func (bt *BYTETracker) calcFeatureDistance(tracks, detections []*STrack) [][]float32 {
|
||||
|
||||
cost := make([][]float32, len(tracks))
|
||||
|
||||
for i, tr := range tracks {
|
||||
|
||||
cost[i] = make([]float32, len(detections))
|
||||
|
||||
for j, det := range detections {
|
||||
cost[i][j] = tr.BestMatchDistance(det.feature)
|
||||
}
|
||||
}
|
||||
|
||||
return cost
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ type Object struct {
|
||||
// ID is a unique ID to give this object which can be used to match
|
||||
// the input detection object and tracked object
|
||||
ID int64
|
||||
// Feature is a ReID embedding feature
|
||||
Feature []float32
|
||||
}
|
||||
|
||||
// NewObject is a constructor function for the Object struct
|
||||
|
||||
246
tracker/reid.go
Normal file
246
tracker/reid.go
Normal file
@@ -0,0 +1,246 @@
|
||||
package tracker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/swdee/go-rknnlite"
|
||||
"github.com/swdee/go-rknnlite/postprocess/reid"
|
||||
"gocv.io/x/gocv"
|
||||
"image"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// DistanceMethod defines ReID distance calculation methods
|
||||
type DistanceMethod int
|
||||
|
||||
const (
|
||||
Euclidean DistanceMethod = 1
|
||||
Cosine DistanceMethod = 2
|
||||
)
|
||||
|
||||
// reID struct holds all ReIdentification processing features
|
||||
type reID struct {
|
||||
// pool is the rknnlike runtime pool to run inference on
|
||||
pool *rknnlite.Pool
|
||||
// dist is the distance method to apply to calculations to determine similarity
|
||||
dist DistanceMethod
|
||||
// threshold is the distance cutoff to determine similar or different objects
|
||||
threshold float32
|
||||
// batchSize store model input tensor batch size
|
||||
batchSize int
|
||||
width int
|
||||
height int
|
||||
channels int
|
||||
// batchPools holds a pool of batches
|
||||
batchPool *rknnlite.BatchPool
|
||||
// scaleSize is the size of the input tensor dimensions to scale the object too
|
||||
scaleSize image.Point
|
||||
}
|
||||
|
||||
// UseReID sets up Re-Identification processing on the BYTETracker instance
|
||||
func (bt *BYTETracker) UseReID(pool *rknnlite.Pool, dist DistanceMethod,
|
||||
threshold float32) {
|
||||
|
||||
// query runtime and get tensor dimensions
|
||||
rt := pool.Get()
|
||||
|
||||
batchSize := int(rt.InputAttrs()[0].Dims[0])
|
||||
width := int(rt.InputAttrs()[0].Dims[1])
|
||||
height := int(rt.InputAttrs()[0].Dims[2])
|
||||
channels := int(rt.InputAttrs()[0].Dims[3])
|
||||
|
||||
bt.reid = &reID{
|
||||
pool: pool,
|
||||
dist: dist,
|
||||
threshold: threshold,
|
||||
batchSize: batchSize,
|
||||
width: width,
|
||||
height: height,
|
||||
channels: channels,
|
||||
scaleSize: image.Pt(width, height),
|
||||
batchPool: rknnlite.NewBatchPool(pool.Size(), rt),
|
||||
}
|
||||
|
||||
pool.Return(rt)
|
||||
|
||||
bt.useReid = true
|
||||
}
|
||||
|
||||
// UpdateWithFrame updates the tracker with new detections and passes the
|
||||
// image frame so ReID inference can be conducted
|
||||
func (bt *BYTETracker) UpdateWithFrame(objects []Object, frame gocv.Mat) ([]*STrack, error) {
|
||||
|
||||
// check if ReID is enabled and get embedding features for all objects
|
||||
if bt.useReid {
|
||||
|
||||
bufFrame := frame.Clone()
|
||||
defer bufFrame.Close()
|
||||
|
||||
features, err := bt.reid.processObjects(objects, bufFrame)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to process objects: %w", err)
|
||||
}
|
||||
|
||||
for i := range objects {
|
||||
objects[i].Feature = features[i]
|
||||
}
|
||||
}
|
||||
|
||||
// run track update
|
||||
tracks, err := bt.Update(objects)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error updating objects: %w", err)
|
||||
}
|
||||
|
||||
return tracks, nil
|
||||
}
|
||||
|
||||
// Close frees memory from reid instance
|
||||
func (r *reID) Close() {
|
||||
r.batchPool.Close()
|
||||
}
|
||||
|
||||
// processObjects takes the detected objects and runs inference on them to get
|
||||
// their embedded feature fingerprint. Function should be called from a
|
||||
// Goroutine.
|
||||
func (r *reID) processObjects(objects []Object, frame gocv.Mat) ([][]float32, error) {
|
||||
|
||||
var wg sync.WaitGroup
|
||||
total := len(objects)
|
||||
|
||||
// collect per objects feature embeddings
|
||||
allEmbeddings := make([][]float32, total)
|
||||
errCh := make(chan error, (total+r.batchSize-1)/r.batchSize)
|
||||
|
||||
for offset := 0; offset < total; offset += r.batchSize {
|
||||
|
||||
end := offset + r.batchSize
|
||||
|
||||
if end > total {
|
||||
end = total
|
||||
}
|
||||
|
||||
batchObjs := objects[offset:end]
|
||||
|
||||
// capture range variables for closure
|
||||
capOffset := offset
|
||||
capCnt := end - offset
|
||||
|
||||
wg.Add(1)
|
||||
batch := r.batchPool.Get()
|
||||
rt := r.pool.Get()
|
||||
|
||||
go func(rt *rknnlite.Runtime, batch *rknnlite.Batch, bobjs []Object, off, cnt int) {
|
||||
defer wg.Done()
|
||||
fps, err := r.processBatch(rt, batch, bobjs, frame)
|
||||
r.pool.Return(rt)
|
||||
r.batchPool.Return(batch)
|
||||
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
|
||||
// copy this batch’s fingerprints into correct offset place for
|
||||
// all fingerprint results
|
||||
for i := 0; i < cnt; i++ {
|
||||
allEmbeddings[off+i] = fps[i]
|
||||
}
|
||||
|
||||
errCh <- nil
|
||||
}(rt, batch, batchObjs, capOffset, capCnt)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
// if any error, just bail
|
||||
for e := range errCh {
|
||||
if e != nil {
|
||||
return nil, fmt.Errorf("ReID error: %w", e)
|
||||
}
|
||||
}
|
||||
|
||||
return allEmbeddings, nil
|
||||
}
|
||||
|
||||
// processBatch adds the objects to a batch and runs inference on them
|
||||
func (r *reID) processBatch(rt *rknnlite.Runtime, batch *rknnlite.Batch,
|
||||
bobjs []Object, frame gocv.Mat) ([][]float32, error) {
|
||||
|
||||
height := frame.Rows()
|
||||
width := frame.Cols()
|
||||
|
||||
for _, obj := range bobjs {
|
||||
|
||||
// clamp and get bounding box coordinates
|
||||
x1 := clamp(int(obj.Rect.TLX()), 0, width)
|
||||
y1 := clamp(int(obj.Rect.TLY()), 0, height)
|
||||
x2 := clamp(int(obj.Rect.BRX()), 0, width)
|
||||
y2 := clamp(int(obj.Rect.BRY()), 0, height)
|
||||
|
||||
objRect := image.Rect(x1, y1, x2, y2)
|
||||
|
||||
// get the objects region of interest from source Mat
|
||||
objRoi := frame.Region(objRect)
|
||||
objImg := gocv.NewMat()
|
||||
|
||||
// resize to input tensor size
|
||||
gocv.Resize(objRoi, &objImg, r.scaleSize, 0, 0, gocv.InterpolationArea)
|
||||
|
||||
objRoi.Close()
|
||||
|
||||
err := batch.Add(objImg)
|
||||
objImg.Close()
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error adding image to batch")
|
||||
}
|
||||
}
|
||||
|
||||
// run inference on the batch
|
||||
outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("inference failed: %v", err)
|
||||
}
|
||||
|
||||
defer outputs.Free()
|
||||
|
||||
// unpack per object results
|
||||
fingerprints := make([][]float32, len(bobjs))
|
||||
|
||||
for idx := 0; idx < len(bobjs); idx++ {
|
||||
|
||||
output, err := batch.GetOutputInt(idx, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting output %d: %v", idx, err)
|
||||
}
|
||||
|
||||
// get object fingerprint
|
||||
fingerprints[idx] = reid.DequantizeAndL2Normalize(
|
||||
output,
|
||||
outputs.OutputAttributes().Scales[0],
|
||||
outputs.OutputAttributes().ZPs[0],
|
||||
)
|
||||
}
|
||||
|
||||
return fingerprints, nil
|
||||
}
|
||||
|
||||
// clamp restricts the value x to be within the range min and max
|
||||
func clamp(val, min, max int) int {
|
||||
|
||||
if val > min {
|
||||
|
||||
if val < max {
|
||||
return val // casting the float to int after the comparison
|
||||
}
|
||||
|
||||
return max
|
||||
}
|
||||
|
||||
return min
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package tracker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/swdee/go-rknnlite/postprocess/reid"
|
||||
"gonum.org/v1/gonum/mat"
|
||||
)
|
||||
|
||||
@@ -47,6 +48,18 @@ type STrack struct {
|
||||
detectionID int64
|
||||
// label is the object label/class from yolo inference
|
||||
label int
|
||||
// feature embedding used for ReID
|
||||
feature []float32
|
||||
// smoothFeature an EMA smoothed feature embedding used for ReID
|
||||
smoothFeature []float32
|
||||
// featureQueue is a history of features
|
||||
featureQueue [][]float32
|
||||
// maxQueueSize is the featureQueue maximum size, eg: 30
|
||||
maxQueueSize int
|
||||
// alpha value use in EMA smoothing calculation
|
||||
alpha float32
|
||||
// hasFeature is a flag to indicate if WithFeature() has been set
|
||||
hasFeature bool
|
||||
}
|
||||
|
||||
// NewSTrack creates a new STrack
|
||||
@@ -68,6 +81,15 @@ func NewSTrack(rect Rect, score float32, detectionID int64, label int) *STrack {
|
||||
}
|
||||
}
|
||||
|
||||
// WithFeature adds an objects embedded feature from ReID inference to the STrack
|
||||
func (s *STrack) WithFeature(feature []float32, alpha float32, qsize int) {
|
||||
s.hasFeature = true
|
||||
s.alpha = alpha
|
||||
s.maxQueueSize = qsize
|
||||
s.featureQueue = make([][]float32, 0, qsize)
|
||||
s.UpdateFeatures(feature)
|
||||
}
|
||||
|
||||
// GetRect returns the bounding box of the tracked object
|
||||
func (s *STrack) GetRect() *Rect {
|
||||
return &s.rect
|
||||
@@ -155,6 +177,8 @@ func (s *STrack) ReActivate(newTrack *STrack, frameID, newTrackID int) {
|
||||
|
||||
s.frameID = frameID
|
||||
s.trackletLen = 0
|
||||
|
||||
s.UpdateFeatures(newTrack.feature)
|
||||
}
|
||||
|
||||
// Predict predicts the next state of the track
|
||||
@@ -185,6 +209,8 @@ func (s *STrack) Update(newTrack *STrack, frameID int) error {
|
||||
s.frameID = frameID
|
||||
s.trackletLen++
|
||||
|
||||
s.UpdateFeatures(newTrack.feature)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -205,3 +231,58 @@ func (s *STrack) updateRect() {
|
||||
s.rect.SetX(s.mean[0] - s.rect.Width()/2)
|
||||
s.rect.SetY(s.mean[1] - s.rect.Height()/2)
|
||||
}
|
||||
|
||||
// UpdateFeatures updates an STracks ReID embedded features
|
||||
func (s *STrack) UpdateFeatures(feat []float32) {
|
||||
|
||||
if !s.hasFeature {
|
||||
return
|
||||
}
|
||||
|
||||
normFeat := reid.NormalizeVec(feat)
|
||||
s.feature = normFeat
|
||||
|
||||
if s.smoothFeature == nil {
|
||||
s.smoothFeature = make([]float32, len(normFeat))
|
||||
copy(s.smoothFeature, normFeat)
|
||||
|
||||
} else {
|
||||
for i := range normFeat {
|
||||
s.smoothFeature[i] = s.alpha*s.smoothFeature[i] + (1-s.alpha)*normFeat[i]
|
||||
}
|
||||
s.smoothFeature = reid.NormalizeVec(s.smoothFeature)
|
||||
}
|
||||
|
||||
// Enqueue the feature
|
||||
s.featureQueue = append(s.featureQueue, normFeat)
|
||||
|
||||
if len(s.featureQueue) > s.maxQueueSize {
|
||||
s.featureQueue = s.featureQueue[1:]
|
||||
}
|
||||
}
|
||||
|
||||
// BestMatchDistance compares a new detection against all stored past features
|
||||
func (s *STrack) BestMatchDistance(detFeat []float32) float32 {
|
||||
|
||||
if !s.hasFeature {
|
||||
// feature not set so return max distance
|
||||
return 1.0
|
||||
}
|
||||
|
||||
if len(s.featureQueue) == 0 {
|
||||
return 1.0 // max distance
|
||||
}
|
||||
|
||||
detNorm := reid.NormalizeVec(detFeat)
|
||||
best := float32(1.0)
|
||||
|
||||
for _, f := range s.featureQueue {
|
||||
d := reid.EuclideanDistance(f, detNorm)
|
||||
|
||||
if d < best {
|
||||
best = d
|
||||
}
|
||||
}
|
||||
|
||||
return best
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user