go-rknnlite/postprocess/common.go

package postprocess

import (
	"gocv.io/x/gocv"
	"image"
	"math"
	"sync"
)

// deqntAffineToF32 converts a quantized int8 value back to a float32 using
// the provided zero point and scale
func deqntAffineToF32(qnt int8, zp int32, scale float32) float32 {
	return (float32(qnt) - float32(zp)) * scale
}

// qntF32ToAffine converts a float32 value to an int8 using quantization
// parameters: zero point and scale
func qntF32ToAffine(f32 float32, zp int32, scale float32) int8 {

	dstVal := (f32 / scale) + float32(zp)
	res := clip(dstVal, -128, 127)

	return int8(res)
}

// sigmoid perform sigmoid function on given float
func sigmoid(x float32) float32 {
	return 1 / (1 + float32(math.Exp(float64(-x))))
}

// unsigmoid function (reverse of sigmoid)
func unsigmoid(x float32) float32 {
	return -float32(math.Log(float64(1/x - 1)))
}

// softmax function for []float32 input
func softmax(input []float32, size int) {
	// Find the maximum value to subtract for numerical stability
	maxVal := input[0]
	for i := 1; i < size; i++ {
		if input[i] > maxVal {
			maxVal = input[i]
		}
	}

	// Calculate the sum of exponentials
	var sumExp float32 = 0.0
	for i := 0; i < size; i++ {
		sumExp += float32(math.Exp(float64(input[i] - maxVal)))
	}

	// Normalize the values to form a probability distribution
	for i := 0; i < size; i++ {
		input[i] = float32(math.Exp(float64(input[i]-maxVal))) / sumExp
	}
}

// clip restricts the value x to be within the range min and max and converts
// the result to int
func clip(val, min, max float32) int {

	if val <= min {
		return int(min)
	}

	if val >= max {
		return int(max)
	}

	return int(val)
}

// clamp restricts the value x to be within the range min and max and converts
// the result to float32
func clamp(val float32, min, max uint32) float32 {

	if val > float32(min) {

		if val < float32(max) {
			return val // casting the float to int after the comparison
		}

		return float32(max)
	}

	return float32(min)
}

// quickSortIndiceInverse is a quick sort algorithm that sorts the objProbs
// vector and synchronously updates the indices vector to track the reordering
// of elements
func quickSortIndiceInverse(input []float32, left int, right int, indices []int) int {

	var key float32
	var keyIndex int

	low := left
	high := right

	if left < right {
		keyIndex = indices[left]
		key = input[left]

		for low < high {
			for low < high && input[high] <= key {
				high--
			}

			input[low] = input[high]
			indices[low] = indices[high]

			for low < high && input[low] >= key {
				low++
			}

			input[high] = input[low]
			indices[high] = indices[low]
		}

		input[low] = key
		indices[low] = keyIndex

		quickSortIndiceInverse(input, left, low-1, indices)
		quickSortIndiceInverse(input, low+1, right, indices)
	}

	return low
}

// nms implements a Non-Maximum Suppression (NMS) algorithm
func nms(validCount int, outputLocations []float32, classIds, order []int,
	filterId int, threshold float32, pos int) {

	for i := 0; i < validCount; i++ {

		if order[i] == -1 || classIds[i] != filterId {
			continue
		}

		n := order[i]

		for j := i + 1; j < validCount; j++ {
			m := order[j]

			if m == -1 || classIds[i] != filterId {
				continue
			}

			xmin0 := outputLocations[n*pos+0]
			ymin0 := outputLocations[n*pos+1]
			xmax0 := xmin0 + outputLocations[n*pos+2]
			ymax0 := ymin0 + outputLocations[n*pos+3]

			xmin1 := outputLocations[m*pos+0]
			ymin1 := outputLocations[m*pos+1]
			xmax1 := xmin1 + outputLocations[m*pos+2]
			ymax1 := ymin1 + outputLocations[m*pos+3]

			iou := calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1)

			if iou > threshold {
				order[j] = -1
			}
		}
	}
}

// calculateOverlap works out the Intersection of Union (IoU) value of two
// boxes dimensions
func calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1,
	xmax1, ymax1 float32) float32 {

	w := math.Max(0.0, math.Min(float64(xmax0), float64(xmax1))-math.Max(float64(xmin0), float64(xmin1))+1.0)
	h := math.Max(0.0, math.Min(float64(ymax0), float64(ymax1))-math.Max(float64(ymin0), float64(ymin1))+1.0)
	intersection := w * h

	// Calculate the area of both rectangles with added 1.0 for inclusive pixel calculation
	area0 := (xmax0 - xmin0 + 1) * (ymax0 - ymin0 + 1)
	area1 := (xmax1 - xmin1 + 1) * (ymax1 - ymin1 + 1)

	// Calculate union
	union := area0 + area1 - float32(intersection)

	if union <= 0 {
		return 0.0
	}

	// Return Intersection of Union (IoU)
	return float32(intersection) / union
}

// computeDFL calculates the Distribution Focal Loss (DFL)
func computeDFL(tensor []float32, dflLen int) []float32 {

	box := make([]float32, 4)

	for b := 0; b < 4; b++ {

		expT := make([]float32, dflLen)
		expSum := float32(0)
		accSum := float32(0)

		for i := 0; i < dflLen; i++ {
			expT[i] = float32(math.Exp(float64(tensor[i+b*dflLen])))
			expSum += expT[i]
		}

		for i := 0; i < dflLen; i++ {
			accSum += expT[i] / expSum * float32(i)
		}

		box[b] = accSum
	}

	return box
}

// idGenerator is a struct to hold a counter for generating the next incremental
// ID number
type idGenerator struct {
	id int64
	sync.Mutex
}

func NewIDGenerator() *idGenerator {
	return &idGenerator{}
}

// Getnext next incremental number
func (id *idGenerator) GetNext() int64 {
	id.Lock()
	defer id.Unlock()
	id.id++
	return id.id
}

// resizeByOpenCVUint8 takes image data in uint8 format and resizes it using GoCV
func resizeByOpenCVUint8(inputImage []uint8, inputWidth, inputHeight, boxesNum int,
	outputImage []uint8, targetWidth, targetHeight int) {

	dstImage := gocv.NewMat()
	defer dstImage.Close()

	for b := 0; b < boxesNum; b++ {

		startIdx := b * inputWidth * inputHeight
		endIdx := (b + 1) * inputWidth * inputHeight

		if endIdx > len(inputImage) {
			// index out of range, skipping this box
			continue
		}

		// create a new Mat from the input image slice for each box
		srcImage, err := gocv.NewMatFromBytes(inputHeight, inputWidth,
			gocv.MatTypeCV8U, inputImage[startIdx:endIdx])

		if err != nil {
			continue
		}

		if srcImage.Empty() {
			// source image matrix is empty, skipping
			continue
		}

		// resize image
		gocv.Resize(srcImage, &dstImage, image.Point{X: targetWidth, Y: targetHeight},
			0, 0, gocv.InterpolationLinear)

		// copy resized image data back to the output slice
		copy(outputImage[b*targetWidth*targetHeight:], dstImage.ToBytes())

		srcImage.Close()
	}
}

// boxReverse scales detection box back to box for use on original image dimensions
func boxReverse(pos int, pad int, scale float32) int {
	return int(float32(pos-pad) / scale)
}

// segReverse scales the segment mask back to the size of the original image dimensions
func segReverse(segMask, croppedSeg, segMaskReal []uint8,
	modelInHeight, modelInWidth, croppedHeight, croppedWidth,
	oriInHeight, oriInWidth, yPad, xPad int) {

	if yPad == 0 && xPad == 0 && oriInHeight == modelInHeight && oriInWidth == modelInWidth {
		copy(segMaskReal, segMask)
		return
	}

	croppedIndex := 0

	for i := 0; i < modelInHeight; i++ {
		for j := 0; j < modelInWidth; j++ {

			if i >= yPad && i < modelInHeight-yPad && j >= xPad && j < modelInWidth-xPad {

				segIndex := i*modelInWidth + j

				if croppedIndex < len(croppedSeg) {
					croppedSeg[croppedIndex] = segMask[segIndex]
					croppedIndex++
				}
			}
		}
	}

	resizeByOpenCVUint8(croppedSeg, croppedWidth, croppedHeight, 1,
		segMaskReal, oriInWidth, oriInHeight)
}