added ReID example

2025-09-27 03:35:56 +08:00 · 2025-07-07 15:31:18 +12:00
parent 2f87fc87e6
commit d18166968f
4 changed files with 843 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -98,6 +98,8 @@ See the [example](example) directory.
  * [PPOCR Detect](example/ppocr#ppocr-detect) - Takes an image and detects areas of text.
  * [PPOCR Recognise](example/ppocr#ppocr-recognise) - Takes an area of text and performs OCR on it.
  * [PPOCR System](example/ppocr#ppocr-system) - Combines both Detect and Recognise.
+* Tracking
+  * [Re-Identification Demo](example/reid) - Re-Identify (ReID) similar objects for tracking, uses batch processing. 
 * Streaming
  * [HTTP Stream with ByteTrack Tracking](example/stream) - Demo that streams a video over HTTP with YOLO object detection and ByteTrack object tracking.  
 * Slicing Aided Hyper Inference
--- a/example/reid/README.md
+++ b/example/reid/README.md
@@ -0,0 +1,188 @@
+
+# Re-Identification (ReID)
+
+## Overview
+
+Object trackers like ByteTrack can be used to track visible objects frame‐to‐frame, 
+but they rely on the assumption that an object's appearance and location change 
+smoothly over time. If a person goes behind a building or is briefly hidden
+by another passerby, the tracker can lose that objects identity. When that same 
+person reemerges, the tracker often treats them as a new object, assigning a new ID.
+This makes analyzing a persons complete path through a scene difficult
+or makes counting unique objects much harder.
+
+Re-Identification (ReID) models help solve this problem by using embedding features 
+which encode an object into a fixed length vector that captures distinctive
+patterns, shapes, or other visual signatures.  When an object disappears and 
+then reappears you can compare the newly detected objects embedding against a list of
+past objects. If the similarity (using Cosine or Euclidean distance) 
+exceeds a chosen threshold, you can confidently link the new detection back to the 
+original track ID.
+
+
+## Datasets
+
+The [OSNet model](https://paperswithcode.com/paper/omni-scale-feature-learning-for-person-re) is 
+lite weight and provides good accuracy for reidentification tasks, however
+it must be trained using a dataset to identify specific object classes.
+
+This example uses the [Market1501](https://paperswithcode.com/dataset/market-1501) 
+dataset trained for reidentifying people.
+
+To support other object classifications such as Vehicles, Faces, or Animals, you
+will need to source and train these accordingly.
+
+
+## Occlusion Example
+
+In the [people walking video](https://github.com/swdee/go-rknnlite-data/raw/master/people-walking.mp4) 
+a lady wearing a CK branded jacket starts 
+in the beginning of the scene and becomes occluded by passersby.  When she reappears Bytetrack
+detects them as a new person.
+
+![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/docimg/reid-ck-lady-movement.jpg)
+
+
+
+## Usage
+
+Make sure you have downloaded the data files first for the examples.
+You only need to do this once for all examples.
+
+```
+cd example/
+git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
+```
+
+
+Command line Usage.
+```
+$ go run reid.go -h
+
+Usage of /tmp/go-build147978858/b001/exe/reid:
+  -d string
+        Data file containing object co-ordinates (default "../data/reid-objects.dat")
+  -e float
+        The Euclidean distance [0.0-1.0], a value less than defines a match (default 0.51)
+  -i string
+        Image file to run inference on (default "../data/reid-walking.jpg")
+  -m string
+        RKNN compiled model file (default "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn")
+  -p string
+        Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588] (default "rk3588")
+```
+
+Run the ReID example on rk3588 or replace with your Platform model.
+```
+cd example/reid/
+go run reid.go -p rk3588
+```
+
+
+This will result in the output of:
+```
+Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
+Model Input Number: 1, Ouput Number: 1
+Input tensors:
+  index=0, name=input, n_dims=4, dims=[8, 256, 128, 3], n_elems=786432, size=786432, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=-14, scale=0.018658
+Output tensors:
+  index=0, name=output, n_dims=2, dims=[8, 512, 0, 0], n_elems=4096, size=4096, fmt=UNDEFINED, type=INT8, qnt_type=AFFINE, zp=-128, scale=0.018782
+Comparing object 0 at (0,0,134,361)
+  Object 0 at (0,0,134,361) has euclidean distance: 0.000000 (same person)
+  Object 1 at (134,0,251,325) has euclidean distance: 0.423271 (same person)
+  Object 2 at (251,0,326,208) has euclidean distance: 0.465061 (same person)
+  Object 3 at (326,0,394,187) has euclidean distance: 0.445583 (same person)
+Comparing object 1 at (394,0,513,357)
+  Object 0 at (0,0,134,361) has euclidean distance: 0.781510 (different person)
+  Object 1 at (134,0,251,325) has euclidean distance: 0.801649 (different person)
+  Object 2 at (251,0,326,208) has euclidean distance: 0.680299 (different person)
+  Object 3 at (326,0,394,187) has euclidean distance: 0.686542 (different person)
+Comparing object 2 at (513,0,588,246)
+  Object 0 at (0,0,134,361) has euclidean distance: 0.860921 (different person)
+  Object 1 at (134,0,251,325) has euclidean distance: 0.873663 (different person)
+  Object 2 at (251,0,326,208) has euclidean distance: 0.870753 (different person)
+  Object 3 at (326,0,394,187) has euclidean distance: 0.820761 (different person)
+Comparing object 3 at (588,0,728,360)
+  Object 0 at (0,0,134,361) has euclidean distance: 0.762738 (different person)
+  Object 1 at (134,0,251,325) has euclidean distance: 0.800668 (different person)
+  Object 2 at (251,0,326,208) has euclidean distance: 0.763694 (different person)
+  Object 3 at (326,0,394,187) has euclidean distance: 0.769597 (different person)
+Model first run speed: batch preparation=3.900093ms, inference=47.935686ms, post processing=262.203µs, total time=52.097982ms
+done
+```
+
+### Docker
+
+To run the ReID example using the prebuilt docker image, make sure the data files have been downloaded first,
+then run.
+```
+# from project root directory
+
+docker run --rm \
+  --device /dev/dri:/dev/dri \
+  -v "$(pwd):/go/src/app" \
+  -v "$(pwd)/example/data:/go/src/data" \
+  -v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
+  -v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
+  -w /go/src/app \
+  swdee/go-rknnlite:latest \
+  go run ./example/reid/reid.go -p rk3588
+```
+
+### Interpreting Results
+
+The above example uses people detected with a YOLOv5 model and then cropped to
+create the sample input.
+
+![CK Lady](https://github.com/swdee/go-rknnlite-data/raw/master/reid-walking.jpg)
+
+Objects A1 to A4 represent the same person and objects B1, C1, and D1 are other
+people from the same scene.
+
+The first set of comparisons:
+```
+Comparing object 0 [A1] at (0,0,134,361)
+  Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.000000 (same person)
+  Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.423271 (same person)
+  Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.465061 (same person)
+  Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.445583 (same person)
+```
+
+Object 0 is A1, when compared to itself it has a euclidean distance of 0.0.  
+Objects 1-3 are A2 to A4, each of these have a similar
+distance ranging from 0.42 to 0.46.
+
+A euclidean distance range is from 0.0 (same object) to 1.0 (different object), so
+the lower the distance the more similar the object is.    A threshold of `0.51` 
+is used to define what the maximum distance can be for the object to be considered
+the same or different.    Your use case and datasets may require calibration of
+the ideal threshold.
+
+The remaining results compare the people B1, C1, and D1.
+```
+Comparing object 1 [B1] at (394,0,513,357)
+  Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.781510 (different person)
+  Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.801649 (different person)
+  Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.680299 (different person)
+  Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.686542 (different person)
+Comparing object 2 [C1] at (513,0,588,246)
+  Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.860921 (different person)
+  Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.873663 (different person)
+  Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.870753 (different person)
+  Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.820761 (different person)
+Comparing object 3 [D1] at (588,0,728,360)
+  Object 0 [A1] at (0,0,134,361) has euclidean distance: 0.762738 (different person)
+  Object 1 [A2] at (134,0,251,325) has euclidean distance: 0.800668 (different person)
+  Object 2 [A3] at (251,0,326,208) has euclidean distance: 0.763694 (different person)
+  Object 3 [A4] at (326,0,394,187) has euclidean distance: 0.769597 (different person)
+```
+
+All of these other people have a euclidean distance greater than 0.68 indicating
+they are different people.
+
+
+## Postprocessing
+
+[Convenience functions](https://github.com/swdee/go-rknnlite-data/raw/master/postprocess/reid.go) 
+are provided for calculating the Euclidean Distance or Cosine Similarity 
+depending on how the Model has been trained.
--- a/example/reid/reid.go
+++ b/example/reid/reid.go
@@ -0,0 +1,524 @@
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"github.com/swdee/go-rknnlite"
+	"github.com/swdee/go-rknnlite/postprocess/reid"
+	"gocv.io/x/gocv"
+	"image"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func main() {
+	// disable logging timestamps
+	log.SetFlags(0)
+
+	// read in cli flags
+	modelFile := flag.String("m", "../data/models/rk3588/osnet-market1501-batch8-rk3588.rknn", "RKNN compiled model file")
+	imgFile := flag.String("i", "../data/reid-walking.jpg", "Image file to run inference on")
+	objsFile := flag.String("d", "../data/reid-objects.dat", "Data file containing object co-ordinates")
+	rkPlatform := flag.String("p", "rk3588", "Rockchip CPU Model number [rk3562|rk3566|rk3568|rk3576|rk3582|rk3582|rk3588]")
+	euDist := flag.Float64("e", 0.51, "The Euclidean distance [0.0-1.0], a value less than defines a match")
+	flag.Parse()
+
+	err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
+
+	if err != nil {
+		log.Printf("Failed to set CPU Affinity: %v", err)
+	}
+
+	// check if user specified model file or if default is being used.  if default
+	// then pick the default platform model to use.
+	if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
+		*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
+	}
+
+	// create rknn runtime instance
+	rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile)
+
+	if err != nil {
+		log.Fatal("Error initializing RKNN runtime: ", err)
+	}
+
+	// set runtime to leave output tensors as int8
+	rt.SetWantFloat(false)
+
+	// optional querying of model file tensors and SDK version for printing
+	// to stdout.  not necessary for production inference code
+	err = rt.Query(os.Stdout)
+
+	if err != nil {
+		log.Fatal("Error querying runtime: ", err)
+	}
+
+	// load objects file
+	objs, err := ParseObjects(*objsFile)
+
+	if err != nil {
+		log.Fatal("Error parsing objects: ", err)
+	}
+
+	// load image
+	img := gocv.IMRead(*imgFile, gocv.IMReadColor)
+
+	if img.Empty() {
+		log.Fatal("Error reading image from: ", *imgFile)
+	}
+
+	// convert colorspace
+	srcImg := gocv.NewMat()
+	gocv.CvtColor(img, &srcImg, gocv.ColorBGRToRGB)
+
+	defer img.Close()
+	defer srcImg.Close()
+
+	start := time.Now()
+
+	// create a batch to process all images in the compare and dataset's
+	// in a single forward pass
+	batch := rknnlite.NewBatch(
+		int(rt.InputAttrs()[0].Dims[0]),
+		int(rt.InputAttrs()[0].Dims[2]),
+		int(rt.InputAttrs()[0].Dims[1]),
+		int(rt.InputAttrs()[0].Dims[3]),
+		rt.GetInputTypeFloat32(),
+	)
+
+	// scale size is the size of the input tensor dimensions to scale the object too
+	scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
+
+	// add the compare images to the batch
+	for _, cmpObj := range objs.Compare {
+		err := AddObjectToBatch(batch, srcImg, cmpObj, scaleSize)
+
+		if err != nil {
+			log.Fatal("Error creating batch: ", err)
+		}
+	}
+
+	// add the dataset images to the batch
+	for _, dtObj := range objs.Dataset {
+		err := AddObjectToBatch(batch, srcImg, dtObj, scaleSize)
+
+		if err != nil {
+			log.Fatal("Error creating batch: ", err)
+		}
+	}
+
+	defer batch.Close()
+
+	endBatch := time.Now()
+
+	// run inference on the batch
+	outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
+
+	endInference := time.Now()
+
+	if err != nil {
+		log.Fatal("Runtime inferencing failed with error: ", err)
+	}
+
+	// get total number of compare objects
+	totalCmp := len(objs.Compare)
+
+	// compare each object to those objects in the dataset for similarity
+	for i, cmpObj := range objs.Compare {
+		// get the compare objects output
+		cmpOutput, err := batch.GetOutputInt(i, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
+
+		if err != nil {
+			log.Fatal("Getting output tensor failed with error: ", err)
+		}
+
+		log.Printf("Comparing object %d at (%d,%d,%d,%d)\n", i,
+			cmpObj.X1, cmpObj.Y1, cmpObj.X2, cmpObj.Y2)
+
+		for j, dtObj := range objs.Dataset {
+			// get each objects outputs
+			nextOutput, err := batch.GetOutputInt(totalCmp+j, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
+
+			if err != nil {
+				log.Fatal("Getting output tensor failed with error: ", err)
+			}
+
+			dist := CompareObjects(
+				cmpOutput,
+				nextOutput,
+				outputs.OutputAttributes().Scales[0],
+				outputs.OutputAttributes().ZPs[0],
+			)
+
+			// check euclidean distance to determine match of same person or not
+			objRes := "different person"
+
+			if dist < float32(*euDist) {
+				objRes = "same person"
+			}
+
+			log.Printf("  Object %d at (%d,%d,%d,%d) has euclidean distance: %f (%s)\n",
+				j,
+				dtObj.X1, dtObj.Y1, dtObj.X2, dtObj.Y2,
+				dist, objRes)
+		}
+	}
+
+	endCompare := time.Now()
+
+	log.Printf("Model first run speed: batch preparation=%s, inference=%s, post processing=%s, total time=%s\n",
+		endBatch.Sub(start).String(),
+		endInference.Sub(endBatch).String(),
+		endCompare.Sub(endInference).String(),
+		endCompare.Sub(start).String(),
+	)
+
+	// free outputs allocated in C memory after you have finished post processing
+	err = outputs.Free()
+
+	if err != nil {
+		log.Fatal("Error freeing Outputs: ", err)
+	}
+
+	// close runtime and release resources
+	err = rt.Close()
+
+	if err != nil {
+		log.Fatal("Error closing RKNN runtime: ", err)
+	}
+
+	log.Println("done")
+
+	/*
+		//CompareObject(rt, srcImg, cmpObj, objs.Dataset)
+
+		//rgbImg := img.Clone()
+
+
+
+		frameWidth := 67
+		frameHeight := 177
+
+		roiRect1 := image.Rect(497, 195, 497+frameWidth, 195+frameHeight)
+
+		// cklady
+		//roiRect1 := image.Rect(0, 0, 134, 361)
+
+		roiImg1 := rgbImg.Region(roiRect1)
+
+		cropImg1 := rgbImg.Clone()
+		scaleSize1 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
+		gocv.Resize(roiImg1, &cropImg1, scaleSize1, 0, 0, gocv.InterpolationArea)
+
+		defer img.Close()
+		defer rgbImg.Close()
+		defer cropImg1.Close()
+		defer roiImg1.Close()
+
+		gocv.IMWrite("/tmp/frame-master.jpg", cropImg1)
+
+		batch := rt.NewBatch(
+			int(rt.InputAttrs()[0].Dims[0]),
+			int(rt.InputAttrs()[0].Dims[2]),
+			int(rt.InputAttrs()[0].Dims[1]),
+			int(rt.InputAttrs()[0].Dims[3]),
+		)
+		err = batch.Add(cropImg1)
+
+		if err != nil {
+			log.Fatal("Error creating batch: ", err)
+		}
+		defer batch.Close()
+
+		// perform inference on image file
+		outputs, err := rt.Inference([]gocv.Mat{batch.Mat()})
+
+		if err != nil {
+			log.Fatal("Runtime inferencing failed with error: ", err)
+		}
+
+		output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
+
+		if err != nil {
+			log.Fatal("Getting output tensor failed with error: ", err)
+		}
+
+		fingerPrint := DequantizeAndL2Normalize(
+			output,
+			outputs.OutputAttributes().Scales[0],
+			outputs.OutputAttributes().ZPs[0],
+		)
+
+		// seed the EMA fingerprint to the master
+		emaFP := make([]float32, len(fingerPrint))
+		copy(emaFP, fingerPrint)
+		const alpha = 0.9 // smoothing factor
+
+		hash, err := FingerprintHash(fingerPrint)
+
+		if err != nil {
+			log.Fatalf("hashing failed: %v", err)
+		}
+
+		log.Println("object fingerprint:", hash)
+
+		// free outputs allocated in C memory after you have finished post processing
+		err = outputs.Free()
+
+		if err != nil {
+			log.Fatal("Error freeing Outputs: ", err)
+		}
+
+
+		// sample 2 images
+
+		yOffsets := []int{1, 195, 388}
+		xOffsets := []int{497, 565, 633, 701, 769, 836, 904}
+
+		images := [][]int{}
+
+		for _, ny := range yOffsets {
+			for _, nx := range xOffsets {
+				images = append(images, []int{nx, ny})
+			}
+		}
+
+		// ck lady
+
+		//	images := [][]int{
+		//		{134, 0, 117, 325},
+		//		{251, 0, 75, 208},
+		//		{326, 0, 68, 187},
+		//	}
+
+
+		// Image 2
+		for frame, next := range images {
+
+			roiRect2 := image.Rect(next[0], next[1], next[0]+frameWidth, next[1]+frameHeight)
+			// ck lady
+			//roiRect2 := image.Rect(next[0], next[1], next[0]+next[2], next[1]+next[3])
+			roiImg2 := rgbImg.Region(roiRect2)
+
+			cropImg2 := rgbImg.Clone()
+			scaleSize2 := image.Pt(int(rt.InputAttrs()[0].Dims[1]), int(rt.InputAttrs()[0].Dims[2]))
+			gocv.Resize(roiImg2, &cropImg2, scaleSize2, 0, 0, gocv.InterpolationArea)
+
+			defer cropImg2.Close()
+			defer roiImg2.Close()
+
+			gocv.IMWrite(fmt.Sprintf("/tmp/frame-%d.jpg", frame), cropImg2)
+
+			start := time.Now()
+
+			batch.Clear()
+			err = batch.Add(cropImg2)
+
+			if err != nil {
+				log.Fatal("Error creating batch: ", err)
+			}
+
+			outputs, err = rt.Inference([]gocv.Mat{batch.Mat()})
+
+			if err != nil {
+				log.Fatal("Runtime inferencing failed with error: ", err)
+			}
+
+			endInference := time.Now()
+
+			output, err := batch.GetOutputInt(0, outputs.Output[0], int(outputs.OutputAttributes().DimForDFL))
+
+			if err != nil {
+				log.Fatal("Getting output tensor failed with error: ", err)
+			}
+
+			fingerPrint2 := DequantizeAndL2Normalize(
+				output,
+				outputs.OutputAttributes().Scales[0],
+				outputs.OutputAttributes().ZPs[0],
+			)
+
+
+			//	sim := CosineSimilarity(fingerPrint, fingerPrint2)
+			//	dist := CosineDistance(fingerPrint, fingerPrint2)
+			//	fmt.Printf("Frame %d, cosine similarity: %f,  distance=%f\n", frame, sim, dist)
+
+
+			// compute Euclidean (L2) distance directly
+			dist := EuclideanDistance(fingerPrint, fingerPrint2)
+
+			// 3) compute vs EMA
+			emaDist := EuclideanDistance(emaFP, fingerPrint2)
+
+			endDetect := time.Now()
+
+			objRes := "different person"
+			if emaDist < 0.51 {
+				objRes = "same person"
+			}
+
+			fmt.Printf("Frame %d, euclidean distance: %f, ema=%f (%s)\n", frame, dist, emaDist, objRes)
+
+			log.Printf(" Inference=%s, detect=%s, total time=%s\n",
+				endInference.Sub(start).String(),
+				endDetect.Sub(endInference).String(),
+				endDetect.Sub(start).String(),
+			)
+
+			// free outputs allocated in C memory after you have finished post processing
+			err = outputs.Free()
+
+			if err != nil {
+				log.Fatal("Error freeing Outputs: ", err)
+			}
+
+			// 4) update the EMA fingerprint
+			if frame >= 7 && frame <= 13 {
+
+				//    emaFP = α*emaFP + (1-α)*fp2
+				for i := range emaFP {
+					emaFP[i] = alpha*emaFP[i] + (1-alpha)*fingerPrint2[i]
+				}
+				// 5) re‐normalize emaFP back to unit length
+				var sum float32
+				for _, v := range emaFP {
+					sum += v * v
+				}
+				norm := float32(math.Sqrt(float64(sum)))
+				if norm > 0 {
+					for i := range emaFP {
+						emaFP[i] /= norm
+					}
+				}
+			}
+
+		}
+
+		// close runtime and release resources
+		err = rt.Close()
+
+		if err != nil {
+			log.Fatal("Error closing RKNN runtime: ", err)
+		}
+
+		log.Println("done")
+	*/
+}
+
+// Box holds object bounding box coordinates (x1, y1, x2, y2)
+type Box struct {
+	X1, Y1, X2, Y2 int
+}
+
+// Objects is a struct to represent the compare and dataset objects parsed
+// from the objects data file
+type Objects struct {
+	Compare []Box
+	Dataset []Box
+}
+
+// ParseObjects reads the TOML-like objects data file returns the two lists
+// of objects and their bounding box coordinates
+func ParseObjects(path string) (*Objects, error) {
+
+	f, err := os.Open(path)
+
+	if err != nil {
+		return nil, err
+	}
+
+	defer f.Close()
+
+	objs := &Objects{}
+	section := "" // either "compare" or "dataset"
+	scanner := bufio.NewScanner(f)
+
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+
+		// skip blank or comment
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+
+		// section header
+		if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
+			section = strings.ToLower(line[1 : len(line)-1])
+			continue
+		}
+
+		// data line, expect four ints separated by commas
+		fields := strings.Split(line, ",")
+
+		if len(fields) != 4 {
+			return nil, fmt.Errorf("invalid data line %q", line)
+		}
+
+		nums := make([]int, 4)
+
+		for i, fstr := range fields {
+			v, err := strconv.Atoi(strings.TrimSpace(fstr))
+
+			if err != nil {
+				return nil, fmt.Errorf("parsing %q: %w", fstr, err)
+			}
+
+			nums[i] = v
+		}
+
+		// define box
+		box := Box{nums[0], nums[1], nums[2], nums[3]}
+
+		switch section {
+
+		case "compare":
+			objs.Compare = append(objs.Compare, box)
+
+		case "dataset":
+			objs.Dataset = append(objs.Dataset, box)
+
+		default:
+			return nil, fmt.Errorf("line %q outside of a known section", line)
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, err
+	}
+
+	return objs, nil
+}
+
+// AddObjectToBatch adds the cropped object from source image to the batch for
+// running inference on
+func AddObjectToBatch(batch *rknnlite.Batch, srcImg gocv.Mat, obj Box,
+	scaleSize image.Point) error {
+
+	// get the objects region of interest from source Mat
+	objRect := image.Rect(obj.X1, obj.Y1, obj.X2, obj.Y2)
+	objRoi := srcImg.Region(objRect)
+
+	objImg := objRoi.Clone()
+	gocv.Resize(objRoi, &objImg, scaleSize, 0, 0, gocv.InterpolationArea)
+
+	defer objRoi.Close()
+	defer objImg.Close()
+
+	return batch.Add(objImg)
+}
+
+// CompareObjects compares the outputs of two objects
+func CompareObjects(objA []int8, objB []int8, scales float32,
+	ZPs int32) float32 {
+
+	// get the fingerprint of both objects
+	fpA := reid.DequantizeAndL2Normalize(objA, scales, ZPs)
+	fpB := reid.DequantizeAndL2Normalize(objB, scales, ZPs)
+
+	// compute Euclidean (L2) distance directly
+	return reid.EuclideanDistance(fpA, fpB)
+}
--- a/postprocess/reid/reid.go
+++ b/postprocess/reid/reid.go
@@ -0,0 +1,129 @@
+package reid
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/binary"
+	"encoding/hex"
+	"math"
+)
+
+// DequantizeAndL2Normalize converts a quantized int8 vector "q" into a float32 vector,
+// applies dequantization using the provided scale "s" and zero-point "z",
+// and then normalizes the result to unit length using L2 normalization.
+//
+// This is commonly used to convert quantized embedding vectors back to a
+// normalized float form for comparison or similarity calculations.
+//
+// If the resulting vector has zero magnitude, the function returns the
+// unnormalized dequantized vector.
+func DequantizeAndL2Normalize(q []int8, s float32, z int32) []float32 {
+
+	N := len(q)
+	x := make([]float32, N)
+
+	// dequantize
+	for i := 0; i < N; i++ {
+		x[i] = float32(int32(q[i])-z) * s
+	}
+
+	// compute L2 norm
+	var sumSquares float32
+
+	for _, v := range x {
+		sumSquares += v * v
+	}
+
+	norm := float32(math.Sqrt(float64(sumSquares)))
+
+	if norm == 0 {
+		// avoid /0
+		return x
+	}
+
+	// normalize
+	for i := 0; i < N; i++ {
+		x[i] /= norm
+	}
+
+	return x
+}
+
+// FingerprintHash takes an L2-normalized []float32 and returns
+// a hex-encoded SHA-256 hash of its binary representation.
+func FingerprintHash(feat []float32) (string, error) {
+
+	buf := new(bytes.Buffer)
+
+	// write each float32 in little‐endian
+	for _, v := range feat {
+		if err := binary.Write(buf, binary.LittleEndian, v); err != nil {
+			return "", err
+		}
+	}
+
+	sum := sha256.Sum256(buf.Bytes())
+
+	return hex.EncodeToString(sum[:]), nil
+}
+
+// CosineSimilarity returns the cosine of the angle between vectors a and b.
+// Assumes len(a)==len(b). If you have already L2‐normalized them,
+// this is just their dot-product.
+func CosineSimilarity(a, b []float32) float32 {
+
+	var dot float32
+
+	for i := range a {
+		dot += a[i] * b[i]
+	}
+
+	// If not already normalized, you’d divide by norms here.
+	return dot
+}
+
+// CosineDistance returns 1 – cosine similarity, which is a proper distance metric
+// in [0,2]. For L2-normalized vectors this is in [0,2], and small values mean
+// "very similar."
+func CosineDistance(a, b []float32) float32 {
+	return 1 - CosineSimilarity(a, b)
+}
+
+// EuclideanDistance returns the L2 distance between two vectors.
+// Lower means "more similar" when your features are L2-normalized.
+func EuclideanDistance(a, b []float32) float32 {
+	var sum float32
+
+	for i := range a {
+		d := a[i] - b[i]
+		sum += d * d
+	}
+
+	return float32(math.Sqrt(float64(sum)))
+}
+
+// NormalizeVec normalizes the input float32 slice to unit length and returns
+// a new slice. If the input vector has zero magnitude, it returns the original
+// slice unchanged.
+func NormalizeVec(v []float32) []float32 {
+
+	norm := float32(0.0)
+
+	for _, x := range v {
+		norm += x * x
+	}
+
+	if norm == 0 {
+		return v // avoid division by zero
+	}
+
+	norm = float32(math.Sqrt(float64(norm)))
+
+	out := make([]float32, len(v))
+
+	for i, x := range v {
+		out[i] = x / norm
+	}
+
+	return out
+}