diff --git a/go.mod b/go.mod index dbf5ec4..17d15c9 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/dev6699/face go 1.22.4 require ( + gocv.io/x/gocv v0.37.0 google.golang.org/grpc v1.64.0 google.golang.org/protobuf v1.34.2 ) diff --git a/go.sum b/go.sum index 9240e22..b3448b7 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +gocv.io/x/gocv v0.37.0 h1:sISHvnApErjoJodz1Dxb8UAkFdITOB3vXGslbVu6Knk= +gocv.io/x/gocv v0.37.0/go.mod h1:lmS802zoQmnNvXETpmGriBqWrENPei2GxYx5KUxJsMA= golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= diff --git a/model/yoloface/README.md b/model/yoloface/README.md new file mode 100644 index 0000000..fe660a8 --- /dev/null +++ b/model/yoloface/README.md @@ -0,0 +1,41 @@ +## Yoloface with face landmark5 detection + + + +--- +Model description +[Get model](https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx) +``` +{ + "name": "yoloface", + "versions": [ + "1" + ], + "platform": "onnxruntime_onnx", + "inputs": [ + { + "name": "images", + "datatype": "FP32", + "shape": [ + 1, + 3, + 640, + 640 + ] + } + ], + "outputs": [ + { + "name": "output0", + "datatype": "FP32", + "shape": [ + 1, + 20, + 8400 + ] + } + ] +} +``` + + diff --git a/model/yoloface/output.jpg b/model/yoloface/output.jpg new file mode 100644 index 0000000..7797d86 Binary files /dev/null and b/model/yoloface/output.jpg differ diff --git a/model/yoloface/post.go b/model/yoloface/post.go new file mode 100644 index 0000000..40ac2bd --- /dev/null +++ b/model/yoloface/post.go @@ -0,0 +1,155 @@ +package yoloface + +import ( + "math" + "sort" + + "github.com/dev6699/face/model" + "gocv.io/x/gocv" +) + +type Detection struct { + BoundingBox model.BoundingBox + FaceLandmark5 []gocv.Point2f + Confidence float32 +} + +func (m *Model) PostProcess(rawOutputContents [][]byte) (*Output, error) { + // outputs": [ + // { + // "name": "output0", + // "datatype": "FP32", + // "shape": [ + // 1, + // 20, + // 8400 + // ] + // } + // ] + outputCount := 8400 + rawDetections, err := model.BytesToFloat32Slice(rawOutputContents[0]) + if err != nil { + return nil, err + } + ratioWidth := m.ratioWidth + ratioHeight := m.ratioHeight + + var detections []Detection + + boundingBoxRaw := rawDetections[:4*outputCount] + scoreRaw := rawDetections[4*outputCount : 5*outputCount] + faceLandmark5Raw := rawDetections[5*outputCount:] + + for i := 0; i < outputCount; i++ { + score := scoreRaw[i] + if score < m.faceDetectorScore { + continue + } + + d := Detection{ + Confidence: score, + } + + bboxRaw := []float32{ + boundingBoxRaw[i], + boundingBoxRaw[i+outputCount], + boundingBoxRaw[i+outputCount*2], + boundingBoxRaw[i+outputCount*3], + } + + d.BoundingBox = model.BoundingBox{ + X1: float64(bboxRaw[0]-bboxRaw[2]/2) * float64(ratioWidth), + Y1: float64(bboxRaw[1]-bboxRaw[3]/2) * float64(ratioHeight), + X2: float64(bboxRaw[0]+bboxRaw[2]/2) * float64(ratioWidth), + Y2: float64(bboxRaw[1]+bboxRaw[3]/2) * float64(ratioHeight), + } + + faceLandmark5Extract := []float32{} + for j := 0; j < 15; j++ { + if (j-2)%3 == 0 { + continue + } + + idx := j*outputCount + i + fl := faceLandmark5Raw[idx] + if j%3 == 0 { + fl *= ratioWidth + } + if (j-1)%3 == 0 { + fl *= ratioHeight + } + + faceLandmark5Extract = append(faceLandmark5Extract, fl) + } + + faceLandmark5 := []gocv.Point2f{} + for j := 0; j < len(faceLandmark5Extract); j += 2 { + faceLandmark5 = append(faceLandmark5, + gocv.Point2f{ + X: faceLandmark5Extract[j], + Y: faceLandmark5Extract[j+1], + }) + } + d.FaceLandmark5 = faceLandmark5 + detections = append(detections, d) + } + + keepIndices := applyNMS(detections, m.iouThreshold) + keepDetections := make([]Detection, len(keepIndices)) + for i, idx := range keepIndices { + keepDetections[i] = detections[idx] + } + + sort.Slice(keepDetections, func(i, j int) bool { + return keepDetections[i].Confidence > keepDetections[j].Confidence + }) + + return &Output{ + Detections: keepDetections, + }, nil +} + +// applyNMS performs non-maximum suppression to eliminate duplicate detections. +func applyNMS(detections []Detection, iouThreshold float64) []int { + boundingBoxList := []model.BoundingBox{} + for _, d := range detections { + boundingBoxList = append(boundingBoxList, d.BoundingBox) + } + + var keepIndices []int + indices := make([]int, len(boundingBoxList)) + for i := range boundingBoxList { + indices[i] = i + } + + areas := make([]float64, len(boundingBoxList)) + for i, box := range boundingBoxList { + areas[i] = (box.X2 - box.X1 + 1) * (box.Y2 - box.Y1 + 1) + } + + for len(indices) > 0 { + index := indices[0] + keepIndices = append(keepIndices, index) + var remainIndices []int + + for _, i := range indices[1:] { + xx1 := math.Max(boundingBoxList[index].X1, boundingBoxList[i].X1) + yy1 := math.Max(boundingBoxList[index].Y1, boundingBoxList[i].Y1) + xx2 := math.Min(boundingBoxList[index].X2, boundingBoxList[i].X2) + yy2 := math.Min(boundingBoxList[index].Y2, boundingBoxList[i].Y2) + + width := math.Max(0, xx2-xx1+1) + height := math.Max(0, yy2-yy1+1) + intersection := width * height + union := areas[index] + areas[i] - intersection + iou := intersection / union + + if iou <= iouThreshold { + remainIndices = append(remainIndices, i) + } + } + indices = remainIndices + } + + return keepIndices +} diff --git a/model/yoloface/pre.go b/model/yoloface/pre.go new file mode 100644 index 0000000..6bb3089 --- /dev/null +++ b/model/yoloface/pre.go @@ -0,0 +1,81 @@ +package yoloface + +import ( + "image" + "math" + + "github.com/dev6699/face/protobuf" + "gocv.io/x/gocv" +) + +func (m *Model) PreProcess(i *Input) ([]*protobuf.InferTensorContents, error) { + img := i.Img + width := img.Cols() + height := img.Rows() + + faceDetectorSize := Resolution{Width: 640, Height: 640} + resizedVisionFrame, newWidth, newHeight := resizeFrameResolution(img.Clone(), faceDetectorSize) + defer resizedVisionFrame.Close() + + ratioHeight := float32(height) / float32(newHeight) + ratioWidth := float32(width) / float32(newWidth) + m.ratioHeight = ratioHeight + m.ratioWidth = ratioWidth + + contents := &protobuf.InferTensorContents{ + Fp32Contents: prepareDetectFrame(resizedVisionFrame, faceDetectorSize), + } + return []*protobuf.InferTensorContents{contents}, nil +} + +type Resolution struct { + Width uint + Height uint +} + +// resizeFrameResolution resize visionFrame where its resolution will be capped at maxResolution. +func resizeFrameResolution(visionFrame gocv.Mat, maxResolution Resolution) (gocv.Mat, uint, uint) { + width := visionFrame.Cols() + height := visionFrame.Rows() + + maxHeight := int(maxResolution.Height) + maxWidth := int(maxResolution.Width) + + if height > maxHeight || width > maxWidth { + scale := math.Min(float64(maxHeight)/float64(height), float64(maxWidth)/float64(width)) + newWidth := int(float64(width) * scale) + newHeight := int(float64(height) * scale) + + gocv.Resize(visionFrame, &visionFrame, image.Point{X: newWidth, Y: newHeight}, 0, 0, gocv.InterpolationDefault) + return visionFrame, uint(newWidth), uint(newHeight) + } + + return visionFrame, uint(width), uint(height) +} + +func prepareDetectFrame(visionFrame gocv.Mat, faceDetectorSize Resolution) []float32 { + faceDetectorWidth := int(faceDetectorSize.Width) + faceDetectorHeight := int(faceDetectorSize.Height) + + detectVisionFrame := gocv.NewMatWithSize(faceDetectorHeight, faceDetectorWidth, gocv.MatTypeCV8UC3) + defer detectVisionFrame.Close() + + roi := detectVisionFrame.Region(image.Rect(0, 0, visionFrame.Cols(), visionFrame.Rows())) + defer roi.Close() + visionFrame.CopyTo(&roi) + + output := make([]float32, 3*faceDetectorHeight*faceDetectorWidth) + idx := 0 + for y := 0; y < faceDetectorHeight; y++ { + for x := 0; x < faceDetectorWidth; x++ { + pixel := detectVisionFrame.GetVecbAt(y, x) + + output[idx] = (float32(pixel[0]) - 127.5) / 128.0 + output[faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[1]) - 127.5) / 128.0 + output[2*faceDetectorHeight*faceDetectorWidth+idx] = (float32(pixel[2]) - 127.5) / 128.0 + idx++ + } + } + + return output +} diff --git a/model/yoloface/yoloface.go b/model/yoloface/yoloface.go new file mode 100644 index 0000000..1bf6e0a --- /dev/null +++ b/model/yoloface/yoloface.go @@ -0,0 +1,46 @@ +package yoloface + +import ( + "github.com/dev6699/face/model" + "gocv.io/x/gocv" +) + +type Model struct { + faceDetectorScore float32 + iouThreshold float64 + ratioHeight float32 + ratioWidth float32 +} + +type Input struct { + Img gocv.Mat +} + +type Output struct { + Detections []Detection +} + +type ModelT = model.Model[*Input, *Output] + +var _ ModelT = &Model{} + +func NewFactory(faceDetectorScore float32, iouThreshold float64) func() ModelT { + return func() ModelT { + return New(faceDetectorScore, iouThreshold) + } +} + +func New(faceDetectorScore float32, iouThreshold float64) *Model { + return &Model{ + faceDetectorScore: faceDetectorScore, + iouThreshold: iouThreshold, + } +} + +func (m *Model) ModelName() string { + return "yoloface" +} + +func (m *Model) ModelVersion() string { + return "1" +} diff --git a/model_repository/yoloface/config.pbtxt b/model_repository/yoloface/config.pbtxt new file mode 100644 index 0000000..094b869 --- /dev/null +++ b/model_repository/yoloface/config.pbtxt @@ -0,0 +1,2 @@ +name: "yoloface" +platform: "onnxruntime_onnx" \ No newline at end of file