feat: added yoloface

2025-09-26 13:11:27 +08:00 · 2024-07-03 13:13:00 +00:00
parent 5d6050f78e
commit 4e245c8fad
8 changed files with 328 additions and 0 deletions
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/dev6699/face
 go 1.22.4

 require (
+	gocv.io/x/gocv v0.37.0
 	google.golang.org/grpc v1.64.0
 	google.golang.org/protobuf v1.34.2
 )
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,7 @@
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+gocv.io/x/gocv v0.37.0 h1:sISHvnApErjoJodz1Dxb8UAkFdITOB3vXGslbVu6Knk=
+gocv.io/x/gocv v0.37.0/go.mod h1:lmS802zoQmnNvXETpmGriBqWrENPei2GxYx5KUxJsMA=
 golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
 golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
--- a/model/yoloface/README.md
+++ b/model/yoloface/README.md
@@ -0,0 +1,41 @@
+## Yoloface with face landmark5 detection
+
+<img src="output.jpg">
+
+---
+Model description 
+[Get model](https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx)
+```
+{
+  "name": "yoloface",
+  "versions": [
+    "1"
+  ],
+  "platform": "onnxruntime_onnx",
+  "inputs": [
+    {
+      "name": "images",
+      "datatype": "FP32",
+      "shape": [
+        1,
+        3,
+        640,
+        640
+      ]
+    }
+  ],
+  "outputs": [
+    {
+      "name": "output0",
+      "datatype": "FP32",
+      "shape": [
+        1,
+        20,
+        8400
+      ]
+    }
+  ]
+}
+```
+
+
--- a/model/yoloface/output.jpg
+++ b/model/yoloface/output.jpg
--- a/model/yoloface/post.go
+++ b/model/yoloface/post.go
@@ -0,0 +1,155 @@
+package yoloface
+
+import (
+	"math"
+	"sort"
+
+	"github.com/dev6699/face/model"
+	"gocv.io/x/gocv"
+)
+
+type Detection struct {
+	BoundingBox   model.BoundingBox
+	FaceLandmark5 []gocv.Point2f
+	Confidence    float32
+}
+
+func (m *Model) PostProcess(rawOutputContents [][]byte) (*Output, error) {
+	// outputs": [
+	// 	{
+	// 	"name": "output0",
+	// 	"datatype": "FP32",
+	// 	"shape": [
+	// 		1,
+	// 		20,
+	// 		8400
+	// 	]
+	// 	}
+	// ]
+	outputCount := 8400
+	rawDetections, err := model.BytesToFloat32Slice(rawOutputContents[0])
+	if err != nil {
+		return nil, err
+	}
+	ratioWidth := m.ratioWidth
+	ratioHeight := m.ratioHeight
+
+	var detections []Detection
+
+	boundingBoxRaw := rawDetections[:4*outputCount]
+	scoreRaw := rawDetections[4*outputCount : 5*outputCount]
+	faceLandmark5Raw := rawDetections[5*outputCount:]
+
+	for i := 0; i < outputCount; i++ {
+		score := scoreRaw[i]
+		if score < m.faceDetectorScore {
+			continue
+		}
+
+		d := Detection{
+			Confidence: score,
+		}
+
+		bboxRaw := []float32{
+			boundingBoxRaw[i],
+			boundingBoxRaw[i+outputCount],
+			boundingBoxRaw[i+outputCount*2],
+			boundingBoxRaw[i+outputCount*3],
+		}
+
+		d.BoundingBox = model.BoundingBox{
+			X1: float64(bboxRaw[0]-bboxRaw[2]/2) * float64(ratioWidth),
+			Y1: float64(bboxRaw[1]-bboxRaw[3]/2) * float64(ratioHeight),
+			X2: float64(bboxRaw[0]+bboxRaw[2]/2) * float64(ratioWidth),
+			Y2: float64(bboxRaw[1]+bboxRaw[3]/2) * float64(ratioHeight),
+		}
+
+		faceLandmark5Extract := []float32{}
+		for j := 0; j < 15; j++ {
+			if (j-2)%3 == 0 {
+				continue
+			}
+
+			idx := j*outputCount + i
+			fl := faceLandmark5Raw[idx]
+			if j%3 == 0 {
+				fl *= ratioWidth
+			}
+			if (j-1)%3 == 0 {
+				fl *= ratioHeight
+			}
+
+			faceLandmark5Extract = append(faceLandmark5Extract, fl)
+		}
+
+		faceLandmark5 := []gocv.Point2f{}
+		for j := 0; j < len(faceLandmark5Extract); j += 2 {
+			faceLandmark5 = append(faceLandmark5,
+				gocv.Point2f{
+					X: faceLandmark5Extract[j],
+					Y: faceLandmark5Extract[j+1],
+				})
+		}
+		d.FaceLandmark5 = faceLandmark5
+		detections = append(detections, d)
+	}
+
+	keepIndices := applyNMS(detections, m.iouThreshold)
+	keepDetections := make([]Detection, len(keepIndices))
+	for i, idx := range keepIndices {
+		keepDetections[i] = detections[idx]
+	}
+
+	sort.Slice(keepDetections, func(i, j int) bool {
+		return keepDetections[i].Confidence > keepDetections[j].Confidence
+	})
+
+	return &Output{
+		Detections: keepDetections,
+	}, nil
+}
+
+// applyNMS performs non-maximum suppression to eliminate duplicate detections.
+func applyNMS(detections []Detection, iouThreshold float64) []int {
+	boundingBoxList := []model.BoundingBox{}
+	for _, d := range detections {
+		boundingBoxList = append(boundingBoxList, d.BoundingBox)
+	}
+
+	var keepIndices []int
+	indices := make([]int, len(boundingBoxList))
+	for i := range boundingBoxList {
+		indices[i] = i
+	}
+
+	areas := make([]float64, len(boundingBoxList))
+	for i, box := range boundingBoxList {
+		areas[i] = (box.X2 - box.X1 + 1) * (box.Y2 - box.Y1 + 1)
+	}
+
+	for len(indices) > 0 {
+		index := indices[0]
+		keepIndices = append(keepIndices, index)
+		var remainIndices []int
+
+		for _, i := range indices[1:] {
+			xx1 := math.Max(boundingBoxList[index].X1, boundingBoxList[i].X1)
+			yy1 := math.Max(boundingBoxList[index].Y1, boundingBoxList[i].Y1)
+			xx2 := math.Min(boundingBoxList[index].X2, boundingBoxList[i].X2)
+			yy2 := math.Min(boundingBoxList[index].Y2, boundingBoxList[i].Y2)
+
+			width := math.Max(0, xx2-xx1+1)
+			height := math.Max(0, yy2-yy1+1)
+			intersection := width * height
+			union := areas[index] + areas[i] - intersection
+			iou := intersection / union
+
+			if iou <= iouThreshold {
+				remainIndices = append(remainIndices, i)
+			}
+		}
+		indices = remainIndices
+	}
+
+	return keepIndices
+}
--- a/model/yoloface/pre.go
+++ b/model/yoloface/pre.go
@@ -0,0 +1,81 @@
+package yoloface
+
+import (
+	"image"
+	"math"
+
+	"github.com/dev6699/face/protobuf"
+	"gocv.io/x/gocv"
+)
+
+func (m *Model) PreProcess(i *Input) ([]*protobuf.InferTensorContents, error) {
+	img := i.Img
+	width := img.Cols()
+	height := img.Rows()
+
+	faceDetectorSize := Resolution{Width: 640, Height: 640}
+	resizedVisionFrame, newWidth, newHeight := resizeFrameResolution(img.Clone(), faceDetectorSize)
+	defer resizedVisionFrame.Close()
+
+	ratioHeight := float32(height) / float32(newHeight)
+	ratioWidth := float32(width) / float32(newWidth)
+	m.ratioHeight = ratioHeight
+	m.ratioWidth = ratioWidth
+
+	contents := &protobuf.InferTensorContents{
+		Fp32Contents: prepareDetectFrame(resizedVisionFrame, faceDetectorSize),
+	}
+	return []*protobuf.InferTensorContents{contents}, nil
+}
+
+type Resolution struct {
+	Width  uint
+	Height uint
+}
+
+// resizeFrameResolution resize visionFrame where its resolution will be capped at maxResolution.
+func resizeFrameResolution(visionFrame gocv.Mat, maxResolution Resolution) (gocv.Mat, uint, uint) {
+	width := visionFrame.Cols()
+	height := visionFrame.Rows()
+
+	maxHeight := int(maxResolution.Height)
+	maxWidth := int(maxResolution.Width)
+
+	if height > maxHeight || width > maxWidth {
+		scale := math.Min(float64(maxHeight)/float64(height), float64(maxWidth)/float64(width))
+		newWidth := int(float64(width) * scale)
+		newHeight := int(float64(height) * scale)
+
+		gocv.Resize(visionFrame, &visionFrame, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationDefault)
+		return visionFrame, uint(newWidth), uint(newHeight)
+	}
+
+	return visionFrame, uint(width), uint(height)
+}
+
+func prepareDetectFrame(visionFrame gocv.Mat, faceDetectorSize Resolution) []float32 {
+	faceDetectorWidth := int(faceDetectorSize.Width)
+	faceDetectorHeight := int(faceDetectorSize.Height)
+
+	detectVisionFrame := gocv.NewMatWithSize(faceDetectorHeight, faceDetectorWidth, gocv.MatTypeCV8UC3)
+	defer detectVisionFrame.Close()
+
+	roi := detectVisionFrame.Region(image.Rect(0, 0, visionFrame.Cols(), visionFrame.Rows()))
+	defer roi.Close()
+	visionFrame.CopyTo(&roi)
+
+	output := make([]float32, 3*faceDetectorHeight*faceDetectorWidth)
+	idx := 0
+	for y := 0; y < faceDetectorHeight; y++ {
+		for x := 0; x < faceDetectorWidth; x++ {
+			pixel := detectVisionFrame.GetVecbAt(y, x)
+
+			output[idx] = (float32(pixel[0]) - 127.5) / 128.0
+			output[faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[1]) - 127.5) / 128.0
+			output[2*faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[2]) - 127.5) / 128.0
+			idx++
+		}
+	}
+
+	return output
+}
--- a/model/yoloface/yoloface.go
+++ b/model/yoloface/yoloface.go
@@ -0,0 +1,46 @@
+package yoloface
+
+import (
+	"github.com/dev6699/face/model"
+	"gocv.io/x/gocv"
+)
+
+type Model struct {
+	faceDetectorScore float32
+	iouThreshold      float64
+	ratioHeight       float32
+	ratioWidth        float32
+}
+
+type Input struct {
+	Img gocv.Mat
+}
+
+type Output struct {
+	Detections []Detection
+}
+
+type ModelT = model.Model[*Input, *Output]
+
+var _ ModelT = &Model{}
+
+func NewFactory(faceDetectorScore float32, iouThreshold float64) func() ModelT {
+	return func() ModelT {
+		return New(faceDetectorScore, iouThreshold)
+	}
+}
+
+func New(faceDetectorScore float32, iouThreshold float64) *Model {
+	return &Model{
+		faceDetectorScore: faceDetectorScore,
+		iouThreshold:      iouThreshold,
+	}
+}
+
+func (m *Model) ModelName() string {
+	return "yoloface"
+}
+
+func (m *Model) ModelVersion() string {
+	return "1"
+}
--- a/model_repository/yoloface/config.pbtxt
+++ b/model_repository/yoloface/config.pbtxt
@@ -0,0 +1,2 @@
+name: "yoloface"
+platform: "onnxruntime_onnx"