mirror of
https://github.com/swdee/go-rknnlite.git
synced 2025-11-01 11:12:41 +08:00
313 lines
7.2 KiB
Go
313 lines
7.2 KiB
Go
package postprocess
|
|
|
|
import (
|
|
"gocv.io/x/gocv"
|
|
"image"
|
|
"math"
|
|
"sync"
|
|
)
|
|
|
|
// deqntAffineToF32 converts a quantized int8 value back to a float32 using
|
|
// the provided zero point and scale
|
|
func deqntAffineToF32(qnt int8, zp int32, scale float32) float32 {
|
|
return (float32(qnt) - float32(zp)) * scale
|
|
}
|
|
|
|
// qntF32ToAffine converts a float32 value to an int8 using quantization
|
|
// parameters: zero point and scale
|
|
func qntF32ToAffine(f32 float32, zp int32, scale float32) int8 {
|
|
|
|
dstVal := (f32 / scale) + float32(zp)
|
|
res := clip(dstVal, -128, 127)
|
|
|
|
return int8(res)
|
|
}
|
|
|
|
// sigmoid perform sigmoid function on given float
|
|
func sigmoid(x float32) float32 {
|
|
return 1 / (1 + float32(math.Exp(float64(-x))))
|
|
}
|
|
|
|
// unsigmoid function (reverse of sigmoid)
|
|
func unsigmoid(x float32) float32 {
|
|
return -float32(math.Log(float64(1/x - 1)))
|
|
}
|
|
|
|
// softmax function for []float32 input
|
|
func softmax(input []float32, size int) {
|
|
// Find the maximum value to subtract for numerical stability
|
|
maxVal := input[0]
|
|
for i := 1; i < size; i++ {
|
|
if input[i] > maxVal {
|
|
maxVal = input[i]
|
|
}
|
|
}
|
|
|
|
// Calculate the sum of exponentials
|
|
var sumExp float32 = 0.0
|
|
for i := 0; i < size; i++ {
|
|
sumExp += float32(math.Exp(float64(input[i] - maxVal)))
|
|
}
|
|
|
|
// Normalize the values to form a probability distribution
|
|
for i := 0; i < size; i++ {
|
|
input[i] = float32(math.Exp(float64(input[i]-maxVal))) / sumExp
|
|
}
|
|
}
|
|
|
|
// clip restricts the value x to be within the range min and max and converts
|
|
// the result to int
|
|
func clip(val, min, max float32) int {
|
|
|
|
if val <= min {
|
|
return int(min)
|
|
}
|
|
|
|
if val >= max {
|
|
return int(max)
|
|
}
|
|
|
|
return int(val)
|
|
}
|
|
|
|
// clamp restricts the value x to be within the range min and max and converts
|
|
// the result to float32
|
|
func clamp(val float32, min, max uint32) float32 {
|
|
|
|
if val > float32(min) {
|
|
|
|
if val < float32(max) {
|
|
return val // casting the float to int after the comparison
|
|
}
|
|
|
|
return float32(max)
|
|
}
|
|
|
|
return float32(min)
|
|
}
|
|
|
|
// quickSortIndiceInverse is a quick sort algorithm that sorts the objProbs
|
|
// vector and synchronously updates the indices vector to track the reordering
|
|
// of elements
|
|
func quickSortIndiceInverse(input []float32, left int, right int, indices []int) int {
|
|
|
|
var key float32
|
|
var keyIndex int
|
|
|
|
low := left
|
|
high := right
|
|
|
|
if left < right {
|
|
keyIndex = indices[left]
|
|
key = input[left]
|
|
|
|
for low < high {
|
|
for low < high && input[high] <= key {
|
|
high--
|
|
}
|
|
|
|
input[low] = input[high]
|
|
indices[low] = indices[high]
|
|
|
|
for low < high && input[low] >= key {
|
|
low++
|
|
}
|
|
|
|
input[high] = input[low]
|
|
indices[high] = indices[low]
|
|
}
|
|
|
|
input[low] = key
|
|
indices[low] = keyIndex
|
|
|
|
quickSortIndiceInverse(input, left, low-1, indices)
|
|
quickSortIndiceInverse(input, low+1, right, indices)
|
|
}
|
|
|
|
return low
|
|
}
|
|
|
|
// nms implements a Non-Maximum Suppression (NMS) algorithm
|
|
func nms(validCount int, outputLocations []float32, classIds, order []int,
|
|
filterId int, threshold float32, pos int) {
|
|
|
|
for i := 0; i < validCount; i++ {
|
|
|
|
if order[i] == -1 || classIds[i] != filterId {
|
|
continue
|
|
}
|
|
|
|
n := order[i]
|
|
|
|
for j := i + 1; j < validCount; j++ {
|
|
m := order[j]
|
|
|
|
if m == -1 || classIds[i] != filterId {
|
|
continue
|
|
}
|
|
|
|
xmin0 := outputLocations[n*pos+0]
|
|
ymin0 := outputLocations[n*pos+1]
|
|
xmax0 := xmin0 + outputLocations[n*pos+2]
|
|
ymax0 := ymin0 + outputLocations[n*pos+3]
|
|
|
|
xmin1 := outputLocations[m*pos+0]
|
|
ymin1 := outputLocations[m*pos+1]
|
|
xmax1 := xmin1 + outputLocations[m*pos+2]
|
|
ymax1 := ymin1 + outputLocations[m*pos+3]
|
|
|
|
iou := calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1)
|
|
|
|
if iou > threshold {
|
|
order[j] = -1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculateOverlap works out the Intersection of Union (IoU) value of two
|
|
// boxes dimensions
|
|
func calculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1,
|
|
xmax1, ymax1 float32) float32 {
|
|
|
|
w := math.Max(0.0, math.Min(float64(xmax0), float64(xmax1))-math.Max(float64(xmin0), float64(xmin1))+1.0)
|
|
h := math.Max(0.0, math.Min(float64(ymax0), float64(ymax1))-math.Max(float64(ymin0), float64(ymin1))+1.0)
|
|
intersection := w * h
|
|
|
|
// Calculate the area of both rectangles with added 1.0 for inclusive pixel calculation
|
|
area0 := (xmax0 - xmin0 + 1) * (ymax0 - ymin0 + 1)
|
|
area1 := (xmax1 - xmin1 + 1) * (ymax1 - ymin1 + 1)
|
|
|
|
// Calculate union
|
|
union := area0 + area1 - float32(intersection)
|
|
|
|
if union <= 0 {
|
|
return 0.0
|
|
}
|
|
|
|
// Return Intersection of Union (IoU)
|
|
return float32(intersection) / union
|
|
}
|
|
|
|
// computeDFL calculates the Distribution Focal Loss (DFL)
|
|
func computeDFL(tensor []float32, dflLen int) []float32 {
|
|
|
|
box := make([]float32, 4)
|
|
|
|
for b := 0; b < 4; b++ {
|
|
|
|
expT := make([]float32, dflLen)
|
|
expSum := float32(0)
|
|
accSum := float32(0)
|
|
|
|
for i := 0; i < dflLen; i++ {
|
|
expT[i] = float32(math.Exp(float64(tensor[i+b*dflLen])))
|
|
expSum += expT[i]
|
|
}
|
|
|
|
for i := 0; i < dflLen; i++ {
|
|
accSum += expT[i] / expSum * float32(i)
|
|
}
|
|
|
|
box[b] = accSum
|
|
}
|
|
|
|
return box
|
|
}
|
|
|
|
// idGenerator is a struct to hold a counter for generating the next incremental
|
|
// ID number
|
|
type idGenerator struct {
|
|
id int64
|
|
sync.Mutex
|
|
}
|
|
|
|
func NewIDGenerator() *idGenerator {
|
|
return &idGenerator{}
|
|
}
|
|
|
|
// Getnext next incremental number
|
|
func (id *idGenerator) GetNext() int64 {
|
|
id.Lock()
|
|
defer id.Unlock()
|
|
id.id++
|
|
return id.id
|
|
}
|
|
|
|
// resizeByOpenCVUint8 takes image data in uint8 format and resizes it using GoCV
|
|
func resizeByOpenCVUint8(inputImage []uint8, inputWidth, inputHeight, boxesNum int,
|
|
outputImage []uint8, targetWidth, targetHeight int) {
|
|
|
|
dstImage := gocv.NewMat()
|
|
defer dstImage.Close()
|
|
|
|
for b := 0; b < boxesNum; b++ {
|
|
|
|
startIdx := b * inputWidth * inputHeight
|
|
endIdx := (b + 1) * inputWidth * inputHeight
|
|
|
|
if endIdx > len(inputImage) {
|
|
// index out of range, skipping this box
|
|
continue
|
|
}
|
|
|
|
// create a new Mat from the input image slice for each box
|
|
srcImage, err := gocv.NewMatFromBytes(inputHeight, inputWidth,
|
|
gocv.MatTypeCV8U, inputImage[startIdx:endIdx])
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if srcImage.Empty() {
|
|
// source image matrix is empty, skipping
|
|
continue
|
|
}
|
|
|
|
// resize image
|
|
gocv.Resize(srcImage, &dstImage, image.Point{X: targetWidth, Y: targetHeight},
|
|
0, 0, gocv.InterpolationLinear)
|
|
|
|
// copy resized image data back to the output slice
|
|
copy(outputImage[b*targetWidth*targetHeight:], dstImage.ToBytes())
|
|
|
|
srcImage.Close()
|
|
}
|
|
}
|
|
|
|
// boxReverse scales detection box back to box for use on original image dimensions
|
|
func boxReverse(pos int, pad int, scale float32) int {
|
|
return int(float32(pos-pad) / scale)
|
|
}
|
|
|
|
// segReverse scales the segment mask back to the size of the original image dimensions
|
|
func segReverse(segMask, croppedSeg, segMaskReal []uint8,
|
|
modelInHeight, modelInWidth, croppedHeight, croppedWidth,
|
|
oriInHeight, oriInWidth, yPad, xPad int) {
|
|
|
|
if yPad == 0 && xPad == 0 && oriInHeight == modelInHeight && oriInWidth == modelInWidth {
|
|
copy(segMaskReal, segMask)
|
|
return
|
|
}
|
|
|
|
croppedIndex := 0
|
|
|
|
for i := 0; i < modelInHeight; i++ {
|
|
for j := 0; j < modelInWidth; j++ {
|
|
|
|
if i >= yPad && i < modelInHeight-yPad && j >= xPad && j < modelInWidth-xPad {
|
|
|
|
segIndex := i*modelInWidth + j
|
|
|
|
if croppedIndex < len(croppedSeg) {
|
|
croppedSeg[croppedIndex] = segMask[segIndex]
|
|
croppedIndex++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
resizeByOpenCVUint8(croppedSeg, croppedWidth, croppedHeight, 1,
|
|
segMaskReal, oriInWidth, oriInHeight)
|
|
}
|