Files
pigo/core/pigo.go
2022-11-24 11:13:59 +02:00

309 lines
9.3 KiB
Go

package pigo
import (
"encoding/binary"
"math"
"sort"
"sync"
"unsafe"
)
// CascadeParams contains the basic parameters to run the analyzer function over the defined image.
// MinSize: represents the minimum size of the face.
// MaxSize: represents the maximum size of the face.
// ShiftFactor: determines to what percentage to move the detection window over its size.
// ScaleFactor: defines in percentage the resize value of the detection window when moving to a higher scale.
type CascadeParams struct {
ImageParams
MinSize int
MaxSize int
ShiftFactor float64
ScaleFactor float64
}
// ImageParams is a struct for image related settings.
// Pixels: contains the grayscale converted image pixel data.
// Rows: the number of image rows.
// Cols: the number of image columns.
// Dim: the image dimension.
type ImageParams struct {
Pixels []uint8
Rows int
Cols int
Dim int
}
// Pigo struct defines the basic binary tree components.
type Pigo struct {
treeCodes []int8
treePred []float32
treeThreshold []float32
treeDepth uint32
treeNum uint32
}
// NewPigo initializes the Pigo constructor method.
func NewPigo() *Pigo {
return &Pigo{}
}
// Unpack unpack the binary face classification file.
func (pg *Pigo) Unpack(packet []byte) (*Pigo, error) {
var (
treeDepth uint32
treeNum uint32
treeCodes []int8
treePred []float32
treeThreshold []float32
)
// We skip the first 8 bytes of the cascade file.
pos := 8
// Obtain the depth of each tree from the binary data.
treeDepth = binary.LittleEndian.Uint32(packet[pos:])
pos += 4
// Get the number of cascade trees as 32-bit unsigned integer.
treeNum = binary.LittleEndian.Uint32(packet[pos:])
// To avoid constant memory allocation on each append we predefine the slice capacity.
treeThreshold = make([]float32, 0, treeNum)
treeCodes = make([]int8, 0, 119808)
treePred = make([]float32, 0, 29952)
pos += 4
for t := 0; t < int(treeNum); t++ {
// Obtain the tree codes of each tree nodes.
treeCodes = append(treeCodes, []int8{0, 0, 0, 0}...)
code := packet[pos : pos+int(4*pow(2, int(treeDepth))-4)]
// Convert unsigned bytecodes to signed ones.
signedCode := *(*[]int8)(unsafe.Pointer(&code))
treeCodes = append(treeCodes, signedCode...)
pos += int(4*pow(2, int(treeDepth)) - 4)
// Read prediction from tree's leaf nodes.
for i := 0; i < int(pow(2, int(treeDepth))); i++ {
u32pred := binary.LittleEndian.Uint32(packet[pos:])
// Convert uint32 to float32
f32pred := *(*float32)(unsafe.Pointer(&u32pred))
treePred = append(treePred, f32pred)
pos += 4
}
u32thr := binary.LittleEndian.Uint32(packet[pos:])
// Convert uint32 to float32
f32thr := *(*float32)(unsafe.Pointer(&u32thr))
treeThreshold = append(treeThreshold, f32thr)
pos += 4
}
return &Pigo{
treeCodes,
treePred,
treeThreshold,
treeDepth,
treeNum,
}, nil
}
// classifyRegion constructs the classification function based on the parsed binary data.
func (pg *Pigo) classifyRegion(r, c, s, treeDepth int, pixels []uint8, dim int) float32 {
var (
root int
out float32
)
r = r * 256
c = c * 256
if pg.treeNum > 0 {
for i := 0; i < int(pg.treeNum); i++ {
idx := 1
for j := 0; j < int(pg.treeDepth); j++ {
x1 := ((r+int(pg.treeCodes[root+4*idx+0])*s)>>8)*dim + ((c + int(pg.treeCodes[root+4*idx+1])*s) >> 8)
x2 := ((r+int(pg.treeCodes[root+4*idx+2])*s)>>8)*dim + ((c + int(pg.treeCodes[root+4*idx+3])*s) >> 8)
bintest := func(px1, px2 uint8) int {
if px1 <= px2 {
return 1
}
return 0
}
idx = 2*idx + bintest(pixels[x1], pixels[x2])
}
out += pg.treePred[treeDepth*i+idx-treeDepth]
if out <= pg.treeThreshold[i] {
return -1.0
}
root += 4 * treeDepth
}
return out - pg.treeThreshold[pg.treeNum-1]
}
return 0.0
}
// classifyRotatedRegion applies the face classification function over a rotated image based on the parsed binary data.
func (pg *Pigo) classifyRotatedRegion(r, c, s, treeDepth int, a float64, nrows, ncols int, pixels []uint8, dim int) float32 {
var (
root int
out float32
)
qCosTable := []int{256, 251, 236, 212, 181, 142, 97, 49, 0, -49, -97, -142, -181, -212, -236, -251, -256, -251, -236, -212, -181, -142, -97, -49, 0, 49, 97, 142, 181, 212, 236, 251, 256}
qSinTable := []int{0, 49, 97, 142, 181, 212, 236, 251, 256, 251, 236, 212, 181, 142, 97, 49, 0, -49, -97, -142, -181, -212, -236, -251, -256, -251, -236, -212, -181, -142, -97, -49, 0}
qsin := s * qSinTable[int(32.0*a)] //s*(256.0*math.Sin(2*math.Pi*a))
qcos := s * qCosTable[int(32.0*a)] //s*(256.0*math.Cos(2*math.Pi*a))
if pg.treeNum > 0 {
for i := 0; i < int(pg.treeNum); i++ {
var idx = 1
for j := 0; j < int(pg.treeDepth); j++ {
r1 := abs(min(nrows-1, max(0, 65536*r+qcos*int(pg.treeCodes[root+4*idx+0])-qsin*int(pg.treeCodes[root+4*idx+1]))>>16))
c1 := abs(min(nrows-1, max(0, 65536*c+qsin*int(pg.treeCodes[root+4*idx+0])+qcos*int(pg.treeCodes[root+4*idx+1]))>>16))
r2 := abs(min(nrows-1, max(0, 65536*r+qcos*int(pg.treeCodes[root+4*idx+2])-qsin*int(pg.treeCodes[root+4*idx+3]))>>16))
c2 := abs(min(nrows-1, max(0, 65536*c+qsin*int(pg.treeCodes[root+4*idx+2])+qcos*int(pg.treeCodes[root+4*idx+3]))>>16))
bintest := func(px1, px2 uint8) int {
if px1 <= px2 {
return 1
}
return 0
}
idx = 2*idx + bintest(pixels[r1*dim+c1], pixels[r2*dim+c2])
}
out += pg.treePred[treeDepth*i+idx-treeDepth]
if out <= pg.treeThreshold[i] {
return -1.0
}
root += 4 * treeDepth
}
return out - pg.treeThreshold[pg.treeNum-1]
}
return 0.0
}
// Detection struct contains the detection results composed of
// the row, column, scale factor and the detection score.
type Detection struct {
Row int
Col int
Scale int
Q float32
}
// We are using sync.Pool to avoid memory allocation on the heap
// in order to keep the GC overhead as small as possible.
var detpool = sync.Pool{
New: func() interface{} {
return &Detection{}
},
}
// RunCascade analyze the grayscale converted image pixel data and run the classification function over the detection window.
// It will return a slice containing the detection row, column, it's center and the detection score (in case this is greater than 0.0).
func (pg *Pigo) RunCascade(cp CascadeParams, angle float64) []Detection {
var (
detections []Detection
pixels = cp.Pixels
treeDepth = int(pow(2, int(pg.treeDepth)))
q float32
)
scale := cp.MinSize
det := detpool.Get().(*Detection)
defer detpool.Put(det)
// Run the classification function over the detection window
// and check if the false positive rate is above a certain value.
for scale <= cp.MaxSize {
step := int(math.Max(cp.ShiftFactor*float64(scale), 1))
offset := (scale/2 + 1)
for row := offset; row <= cp.Rows-offset; row += step {
for col := offset; col <= cp.Cols-offset; col += step {
if angle > 0.0 {
if angle > 1.0 {
angle = 1.0
}
q = pg.classifyRotatedRegion(row, col, scale, treeDepth, angle, cp.Rows, cp.Cols, pixels, cp.Dim)
} else {
q = pg.classifyRegion(row, col, scale, treeDepth, pixels, cp.Dim)
}
det.Row = row
det.Col = col
det.Scale = scale
det.Q = q
if q > 0.0 {
detections = append(detections, *det)
}
}
}
// We need to avoid running into an infinite loop because of float to int conversion
// in cases when scaleFactor == 1.1 and minSize == 9 as example.
// When the scale is 9, the factor would come up with 9.9, which again becomes 9 because of the int() conversion.
// This approach gives the same speed without having an impact on the detection score.
scale = int(float64(scale) + math.Max(2, (float64(scale)*cp.ScaleFactor)-float64(scale)))
}
return detections
}
// ClusterDetections returns the intersection over union of multiple clusters.
// We need to make this comparison to filter out multiple face detection regions.
func (pg *Pigo) ClusterDetections(detections []Detection, iouThreshold float64) []Detection {
// Sort detections by their score
sort.Slice(detections, func(i, j int) bool {
return detections[i].Q < detections[j].Q
})
calcIoU := func(det1, det2 Detection) float64 {
// Unpack the position and size of each detection.
r1, c1, s1 := float64(det1.Row), float64(det1.Col), float64(det1.Scale)
r2, c2, s2 := float64(det2.Row), float64(det2.Col), float64(det2.Scale)
overRow := math.Max(0, math.Min(r1+s1/2, r2+s2/2)-math.Max(r1-s1/2, r2-s2/2))
overCol := math.Max(0, math.Min(c1+s1/2, c2+s2/2)-math.Max(c1-s1/2, c2-s2/2))
// Return intersection over union.
return overRow * overCol / (s1*s1 + s2*s2 - overRow*overCol)
}
assignments := make([]bool, len(detections))
clusters := []Detection{}
for i := 0; i < len(detections); i++ {
// Compare the intersection over union only for two different clusters.
// Skip the comparison in case there already exists a cluster A in the bucket.
if !assignments[i] {
var (
r, c, s, n int
q float32
)
for j := 0; j < len(detections); j++ {
// Check if the comparison result is above a certain threshold.
// In this case we union the detections.
if calcIoU(detections[i], detections[j]) > iouThreshold {
assignments[j] = true
r += detections[j].Row
c += detections[j].Col
s += detections[j].Scale
q += detections[j].Q
n++
}
}
if n > 0 {
clusters = append(clusters, Detection{r / n, c / n, s / n, q})
}
}
}
return clusters
}