Files
go-rknnlite/postprocess/common.go
2025-04-20 15:16:58 +12:00

313 lines
7.2 KiB
Go

package postprocess
import (
"gocv.io/x/gocv"
"image"
"math"
"sync"
)
// deqntAffineToF32 converts a quantized int8 value back to a float32 using
// the provided zero point and scale
func deqntAffineToF32(qnt int8, zp int32, scale float32) float32 {
return (float32(qnt) - float32(zp)) * scale
}
// qntF32ToAffine converts a float32 value to an int8 using quantization
// parameters: zero point and scale
func qntF32ToAffine(f32 float32, zp int32, scale float32) int8 {
dstVal := (f32 / scale) + float32(zp)
res := clip(dstVal, -128, 127)
return int8(res)
}
// sigmoid perform sigmoid function on given float
func sigmoid(x float32) float32 {
return 1 / (1 + float32(math.Exp(float64(-x))))
}
// unsigmoid function (reverse of sigmoid)
func unsigmoid(x float32) float32 {
return -float32(math.Log(float64(1/x - 1)))
}
// softmax function for []float32 input
func softmax(input []float32, size int) {
// Find the maximum value to subtract for numerical stability
maxVal := input[0]
for i := 1; i < size; i++ {
if input[i] > maxVal {
maxVal = input[i]
}
}
// Calculate the sum of exponentials
var sumExp float32 = 0.0
for i := 0; i < size; i++ {
sumExp += float32(math.Exp(float64(input[i] - maxVal)))
}
// Normalize the values to form a probability distribution
for i := 0; i < size; i++ {
input[i] = float32(math.Exp(float64(input[i]-maxVal))) / sumExp
}
}
// clip restricts the value x to be within the range min and max and converts
// the result to int
func clip(val, min, max float32) int {
if val <= min {
return int(min)
}
if val >= max {
return int(max)
}
return int(val)
}
// clamp restricts the value x to be within the range min and max and converts
// the result to float32
func clamp(val float32, min, max uint32) float32 {
if val > float32(min) {
if val < float32(max) {
return val // casting the float to int after the comparison
}
return float32(max)
}
return float32(min)
}
// quickSortIndiceInverse is a quick sort algorithm that sorts the objProbs
// vector and synchronously updates the indices vector to track the reordering
// of elements
func quickSortIndiceInverse(input []float32, left int, right int, indices []int) int {
var key float32
var keyIndex int
low := left
high := right
if left < right {
keyIndex = indices[left]
key = input[left]
for low < high {
for low < high && input[high] <= key {
high--
}
input[low] = input[high]
indices[low] = indices[high]
for low < high && input[low] >= key {
low++
}
input[high] = input[low]
indices[high] = indices[low]
}
input[low] = key
indices[low] = keyIndex
quickSortIndiceInverse(input, left, low-1, indices)
quickSortIndiceInverse(input, low+1, right, indices)
}
return low
}
// nms implements a Non-Maximum Suppression (NMS) algorithm
func nms(validCount int, outputLocations []float32, classIds, order []int,
filterId int, threshold float32, pos int) {
for i := 0; i < validCount; i++ {
if order[i] == -1 || classIds[i] != filterId {
continue
}
n := order[i]
for j := i + 1; j < validCount; j++ {
m := order[j]
if m == -1 || classIds[i] != filterId {
continue
}
xmin0 := outputLocations[n*pos+0]
ymin0 := outputLocations[n*pos+1]
xmax0 := xmin0 + outputLocations[n*pos+2]
ymax0 := ymin0 + outputLocations[n*pos+3]
xmin1 := outputLocations[m*pos+0]
ymin1 := outputLocations[m*pos+1]
xmax1 := xmin1 + outputLocations[m*pos+2]
ymax1 := ymin1 + outputLocations[m*pos+3]
iou := calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1)
if iou > threshold {
order[j] = -1
}
}
}
}
// calculateOverlap works out the Intersection of Union (IoU) value of two
// boxes dimensions
func calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1,
xmax1, ymax1 float32) float32 {
w := math.Max(0.0, math.Min(float64(xmax0), float64(xmax1))-math.Max(float64(xmin0), float64(xmin1))+1.0)
h := math.Max(0.0, math.Min(float64(ymax0), float64(ymax1))-math.Max(float64(ymin0), float64(ymin1))+1.0)
intersection := w * h
// Calculate the area of both rectangles with added 1.0 for inclusive pixel calculation
area0 := (xmax0 - xmin0 + 1) * (ymax0 - ymin0 + 1)
area1 := (xmax1 - xmin1 + 1) * (ymax1 - ymin1 + 1)
// Calculate union
union := area0 + area1 - float32(intersection)
if union <= 0 {
return 0.0
}
// Return Intersection of Union (IoU)
return float32(intersection) / union
}
// computeDFL calculates the Distribution Focal Loss (DFL)
func computeDFL(tensor []float32, dflLen int) []float32 {
box := make([]float32, 4)
for b := 0; b < 4; b++ {
expT := make([]float32, dflLen)
expSum := float32(0)
accSum := float32(0)
for i := 0; i < dflLen; i++ {
expT[i] = float32(math.Exp(float64(tensor[i+b*dflLen])))
expSum += expT[i]
}
for i := 0; i < dflLen; i++ {
accSum += expT[i] / expSum * float32(i)
}
box[b] = accSum
}
return box
}
// idGenerator is a struct to hold a counter for generating the next incremental
// ID number
type idGenerator struct {
id int64
sync.Mutex
}
func NewIDGenerator() *idGenerator {
return &idGenerator{}
}
// Getnext next incremental number
func (id *idGenerator) GetNext() int64 {
id.Lock()
defer id.Unlock()
id.id++
return id.id
}
// resizeByOpenCVUint8 takes image data in uint8 format and resizes it using GoCV
func resizeByOpenCVUint8(inputImage []uint8, inputWidth, inputHeight, boxesNum int,
outputImage []uint8, targetWidth, targetHeight int) {
dstImage := gocv.NewMat()
defer dstImage.Close()
for b := 0; b < boxesNum; b++ {
startIdx := b * inputWidth * inputHeight
endIdx := (b + 1) * inputWidth * inputHeight
if endIdx > len(inputImage) {
// index out of range, skipping this box
continue
}
// create a new Mat from the input image slice for each box
srcImage, err := gocv.NewMatFromBytes(inputHeight, inputWidth,
gocv.MatTypeCV8U, inputImage[startIdx:endIdx])
if err != nil {
continue
}
if srcImage.Empty() {
// source image matrix is empty, skipping
continue
}
// resize image
gocv.Resize(srcImage, &dstImage, image.Point{X: targetWidth, Y: targetHeight},
0, 0, gocv.InterpolationLinear)
// copy resized image data back to the output slice
copy(outputImage[b*targetWidth*targetHeight:], dstImage.ToBytes())
srcImage.Close()
}
}
// boxReverse scales detection box back to box for use on original image dimensions
func boxReverse(pos int, pad int, scale float32) int {
return int(float32(pos-pad) / scale)
}
// segReverse scales the segment mask back to the size of the original image dimensions
func segReverse(segMask, croppedSeg, segMaskReal []uint8,
modelInHeight, modelInWidth, croppedHeight, croppedWidth,
oriInHeight, oriInWidth, yPad, xPad int) {
if yPad == 0 && xPad == 0 && oriInHeight == modelInHeight && oriInWidth == modelInWidth {
copy(segMaskReal, segMask)
return
}
croppedIndex := 0
for i := 0; i < modelInHeight; i++ {
for j := 0; j < modelInWidth; j++ {
if i >= yPad && i < modelInHeight-yPad && j >= xPad && j < modelInWidth-xPad {
segIndex := i*modelInWidth + j
if croppedIndex < len(croppedSeg) {
croppedSeg[croppedIndex] = segMask[segIndex]
croppedIndex++
}
}
}
}
resizeByOpenCVUint8(croppedSeg, croppedWidth, croppedHeight, 1,
segMaskReal, oriInWidth, oriInHeight)
}