diff --git a/cascade/lps/lp312 b/cascade/lps/lp312 new file mode 100644 index 0000000..af33fa7 Binary files /dev/null and b/cascade/lps/lp312 differ diff --git a/cascade/lps/lp38 b/cascade/lps/lp38 new file mode 100644 index 0000000..ac843d3 Binary files /dev/null and b/cascade/lps/lp38 differ diff --git a/cascade/lps/lp42 b/cascade/lps/lp42 new file mode 100644 index 0000000..25aef58 Binary files /dev/null and b/cascade/lps/lp42 differ diff --git a/cascade/lps/lp44 b/cascade/lps/lp44 new file mode 100644 index 0000000..281e441 Binary files /dev/null and b/cascade/lps/lp44 differ diff --git a/cascade/lps/lp46 b/cascade/lps/lp46 new file mode 100644 index 0000000..4d0ae7d Binary files /dev/null and b/cascade/lps/lp46 differ diff --git a/cascade/lps/lp81 b/cascade/lps/lp81 new file mode 100644 index 0000000..f6acf1a Binary files /dev/null and b/cascade/lps/lp81 differ diff --git a/cascade/lps/lp82 b/cascade/lps/lp82 new file mode 100644 index 0000000..5210cf7 Binary files /dev/null and b/cascade/lps/lp82 differ diff --git a/cascade/lps/lp84 b/cascade/lps/lp84 new file mode 100644 index 0000000..b22eb3b Binary files /dev/null and b/cascade/lps/lp84 differ diff --git a/cascade/lps/lp93 b/cascade/lps/lp93 new file mode 100644 index 0000000..b131380 Binary files /dev/null and b/cascade/lps/lp93 differ diff --git a/cmd/pigo/main.go b/cmd/pigo/main.go index 533b699..cd9d339 100644 --- a/cmd/pigo/main.go +++ b/cmd/pigo/main.go @@ -35,9 +35,15 @@ var Version string var ( dc *gg.Context - plc *pigo.PuplocCascade - imgParams *pigo.ImageParams fd *faceDetector + plc *pigo.PuplocCascade + flpcs map[string][]*pigo.FlpCascade + imgParams *pigo.ImageParams +) + +var ( + eyeCascades = []string{"lp46", "lp44", "lp42", "lp38", "lp312"} + mouthCascade = []string{"lp93", "lp84", "lp82", "lp81"} ) // faceDetector struct contains Pigo face detector general settings. @@ -50,8 +56,10 @@ type faceDetector struct { shiftFactor float64 scaleFactor float64 iouThreshold float64 - doPuploc bool + puploc bool puplocCascade string + flploc bool + flplocDir string markDetEyes bool } @@ -72,10 +80,12 @@ func main() { scaleFactor = flag.Float64("scale", 1.1, "Scale detection window by percentage") angle = flag.Float64("angle", 0.0, "0.0 is 0 radians and 1.0 is 2*pi radians") iouThreshold = flag.Float64("iou", 0.2, "Intersection over union (IoU) threshold") - circleMarker = flag.Bool("circle", false, "Use circle as detection marker") - doPuploc = flag.Bool("pl", false, "Pupils/eyes localization") + isCircle = flag.Bool("circle", false, "Use circle as detection marker") + puploc = flag.Bool("pl", false, "Pupils/eyes localization") puplocCascade = flag.String("plc", "", "Pupil localization cascade file") - markDetEyes = flag.Bool("rect", true, "Mark detected eyes") + markEyes = flag.Bool("mark", true, "Mark detected eyes") + flploc = flag.Bool("flp", false, "Use facial landmark points localization") + flplocDir = flag.String("flpdir", "", "The facial landmark points base directory") outputAsJSON = flag.Bool("json", false, "Output face box coordinates into a json file") ) @@ -89,10 +99,14 @@ func main() { log.Fatal("Usage: pigo -in input.jpg -out out.png -cf cascade/facefinder") } - if *doPuploc && len(*puplocCascade) == 0 { + if *puploc && len(*puplocCascade) == 0 { log.Fatal("Missing the cascade binary file for pupils localization") } + if *flploc && len(*flplocDir) == 0 { + //log.Fatal("Please specify the base directory of the facial landmark points binary files") + } + fileTypes := []string{".jpg", ".jpeg", ".png"} ext := filepath.Ext(*destination) @@ -118,16 +132,18 @@ func main() { shiftFactor: *shiftFactor, scaleFactor: *scaleFactor, iouThreshold: *iouThreshold, - doPuploc: *doPuploc, + puploc: *puploc, puplocCascade: *puplocCascade, - markDetEyes: *markDetEyes, + flploc: *flploc, + flplocDir: *flplocDir, + markDetEyes: *markEyes, } faces, err := fd.detectFaces(*source) if err != nil { log.Fatalf("Detection error: %v", err) } - _, rects, err := fd.drawFaces(faces, *circleMarker) + _, rects, err := fd.drawFaces(faces, *isCircle) if err != nil { log.Fatalf("Error creating the image output: %s", err) @@ -188,8 +204,8 @@ func (fd *faceDetector) detectFaces(source string) ([]pigo.Detection, error) { return nil, err } - if fd.doPuploc { - pl := pigo.PuplocCascade{} + if fd.puploc { + pl := pigo.NewPuplocCascade() cascade, err := ioutil.ReadFile(fd.puplocCascade) if err != nil { @@ -199,6 +215,13 @@ func (fd *faceDetector) detectFaces(source string) ([]pigo.Detection, error) { if err != nil { return nil, err } + + if fd.flploc { + flpcs, err = pl.ReadCascadeDir(fd.flplocDir) + if err != nil { + return nil, err + } + } } // Run the classifier over the obtained leaf nodes and return the detection results. @@ -248,7 +271,7 @@ func (fd *faceDetector) drawFaces(faces []pigo.Detection, isCircle bool) ([]byte dc.SetStrokeStyle(gg.NewSolidPattern(color.RGBA{R: 255, G: 0, B: 0, A: 255})) dc.Stroke() - if fd.doPuploc && face.Scale > 50 { + if fd.puploc && face.Scale > 50 { rect := image.Rect( face.Col-face.Scale/2, face.Row-face.Scale/2, @@ -263,16 +286,18 @@ func (fd *faceDetector) drawFaces(faces []pigo.Detection, isCircle bool) ([]byte puploc = &pigo.Puploc{ Row: face.Row - int(0.075*float32(face.Scale)), Col: face.Col - int(0.175*float32(face.Scale)), - Scale: float32(face.Scale) * 0.25, + Scale: float32(face.Scale) * 0.15, Perturbs: perturb, } - det := plc.RunDetector(*puploc, *imgParams, fd.angle) - if det.Row > 0 && det.Col > 0 { + leftEye := plc.RunDetector(*puploc, *imgParams, fd.angle, false) + if leftEye.Row > 0 && leftEye.Col > 0 { if fd.angle > 0 { drawDetections(ctx, - float64(cols/2-(face.Col-det.Col)), - float64(rows/2-(face.Row-det.Row)), - float64(det.Scale), + float64(cols/2-(face.Col-leftEye.Col)), + float64(rows/2-(face.Row-leftEye.Row)), + float64(leftEye.Scale), + color.RGBA{R: 255, G: 0, B: 0, A: 255}, + fd.markDetEyes, ) angle := (fd.angle * 180) / math.Pi rotated := imaging.Rotate(faceZone, 2*angle, color.Transparent) @@ -281,9 +306,11 @@ func (fd *faceDetector) drawFaces(faces []pigo.Detection, isCircle bool) ([]byte dc.DrawImage(final, face.Col-face.Scale/2, face.Row-face.Scale/2) } else { drawDetections(dc, - float64(det.Col), - float64(det.Row), - float64(det.Scale), + float64(leftEye.Col), + float64(leftEye.Row), + float64(leftEye.Scale), + color.RGBA{R: 255, G: 0, B: 0, A: 255}, + fd.markDetEyes, ) } } @@ -292,17 +319,19 @@ func (fd *faceDetector) drawFaces(faces []pigo.Detection, isCircle bool) ([]byte puploc = &pigo.Puploc{ Row: face.Row - int(0.075*float32(face.Scale)), Col: face.Col + int(0.185*float32(face.Scale)), - Scale: float32(face.Scale) * 0.25, + Scale: float32(face.Scale) * 0.15, Perturbs: perturb, } - det = plc.RunDetector(*puploc, *imgParams, fd.angle) - if det.Row > 0 && det.Col > 0 { + rightEye := plc.RunDetector(*puploc, *imgParams, fd.angle, false) + if rightEye.Row > 0 && rightEye.Col > 0 { if fd.angle > 0 { drawDetections(ctx, - float64(cols/2-(face.Col-det.Col)), - float64(rows/2-(face.Row-det.Row)), - float64(det.Scale), + float64(cols/2-(face.Col-rightEye.Col)), + float64(rows/2-(face.Row-rightEye.Row)), + float64(rightEye.Scale), + color.RGBA{R: 255, G: 0, B: 0, A: 255}, + fd.markDetEyes, ) // convert radians to angle angle := (fd.angle * 180) / math.Pi @@ -312,9 +341,63 @@ func (fd *faceDetector) drawFaces(faces []pigo.Detection, isCircle bool) ([]byte dc.DrawImage(final, face.Col-face.Scale/2, face.Row-face.Scale/2) } else { drawDetections(dc, - float64(det.Col), - float64(det.Row), - float64(det.Scale), + float64(rightEye.Col), + float64(rightEye.Row), + float64(rightEye.Scale), + color.RGBA{R: 255, G: 0, B: 0, A: 255}, + fd.markDetEyes, + ) + } + } + if fd.flploc { + for _, eye := range eyeCascades { + for _, flpc := range flpcs[eye] { + flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, perturb, "left") + if flp.Row > 0 && flp.Col > 0 { + drawDetections(dc, + float64(flp.Col), + float64(flp.Row), + float64(flp.Scale*0.15), + color.RGBA{R: 0, G: 0, B: 255, A: 255}, + false, + ) + } + + flp = flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, perturb, "right") + if flp.Row > 0 && flp.Col > 0 { + drawDetections(dc, + float64(flp.Col), + float64(flp.Row), + float64(flp.Scale*0.15), + color.RGBA{R: 0, G: 0, B: 255, A: 255}, + false, + ) + } + } + } + + for _, mouth := range mouthCascade { + for _, flpc := range flpcs[mouth] { + flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, perturb, "left") + if flp.Row > 0 && flp.Col > 0 { + drawDetections(dc, + float64(flp.Col), + float64(flp.Row), + float64(flp.Scale*0.15), + color.RGBA{R: 0, G: 0, B: 255, A: 255}, + false, + ) + } + } + } + flp := flpcs["lp84"][0].FindLandmarkPoints(leftEye, rightEye, *imgParams, perturb, "right") + if flp.Row > 0 && flp.Col > 0 { + drawDetections(dc, + float64(flp.Col), + float64(flp.Row), + float64(flp.Scale*0.15), + color.RGBA{R: 0, G: 0, B: 255, A: 255}, + false, ) } } @@ -370,10 +453,10 @@ func (s *spinner) stop() { s.stopChan <- struct{}{} } -// inSlice check if a slice contains the string value. -func inSlice(ext string, types []string) bool { - for _, t := range types { - if t == ext { +// inSlice checks if the item exists in the slice. +func inSlice(item string, slice []string) bool { + for _, it := range slice { + if it == item { return true } } @@ -381,12 +464,12 @@ func inSlice(ext string, types []string) bool { } // drawDetections helper function to draw the detection marks -func drawDetections(ctx *gg.Context, x, y, r float64) { +func drawDetections(ctx *gg.Context, x, y, r float64, c color.RGBA, markDet bool) { ctx.DrawArc(x, y, r*0.5, 0, 2*math.Pi) - ctx.SetFillStyle(gg.NewSolidPattern(color.RGBA{R: 255, G: 0, B: 0, A: 255})) + ctx.SetFillStyle(gg.NewSolidPattern(c)) ctx.Fill() - if fd.markDetEyes { + if markDet { ctx.DrawRectangle(x-(r*1.5), y-(r*1.5), r*3, r*3) ctx.SetLineWidth(2.0) ctx.SetStrokeStyle(gg.NewSolidPattern(color.RGBA{R: 255, G: 255, B: 0, A: 255})) diff --git a/core/flploc.go b/core/flploc.go new file mode 100644 index 0000000..e60a796 --- /dev/null +++ b/core/flploc.go @@ -0,0 +1,70 @@ +package pigo + +import ( + "errors" + "io/ioutil" + "math" + "path/filepath" +) + +// FlpCascade holds the binary representation of the facial landmark points cascade files +type FlpCascade struct { + *PuplocCascade + error +} + +// UnpackFlp unpacks the facial landmark points cascade file. +// This will return the binary representation of the cascade file. +func (plc *PuplocCascade) UnpackFlp(cf string) (*PuplocCascade, error) { + flpc, err := ioutil.ReadFile(cf) + if err != nil { + return nil, err + } + return plc.UnpackCascade(flpc) +} + +// FindLandmarkPoints detects the facial landmark points based on the pupil localization results. +func (plc *PuplocCascade) FindLandmarkPoints(leftEye, rightEye *Puploc, img ImageParams, perturb int, position string) *Puploc { + var flploc *Puploc + dist1 := (leftEye.Row - rightEye.Row) * (leftEye.Row - rightEye.Row) + dist2 := (leftEye.Col - rightEye.Col) * (leftEye.Col - rightEye.Col) + dist := math.Sqrt(float64(dist1 + dist2)) + + row := float64(leftEye.Row+rightEye.Row)/2.0 + 0.25*dist + col := float64(leftEye.Col+rightEye.Col)/2.0 + 0.15*dist + scale := 3.0 * dist + + flploc = &Puploc{ + Row: int(row), + Col: int(col), + Scale: float32(scale), + Perturbs: perturb, + } + + if position == "right" { + return plc.RunDetector(*flploc, img, 0.0, true) + } + return plc.RunDetector(*flploc, img, 0.0, false) +} + +// ReadCascadeDir reads the facial landmark points cascade files from the provided directory. +func (plc *PuplocCascade) ReadCascadeDir(path string) (map[string][]*FlpCascade, error) { + cascades, err := ioutil.ReadDir(path) + if len(cascades) == 0 { + return nil, errors.New("the provided directory is empty") + } + flpcs := make(map[string][]*FlpCascade) + + if err != nil { + return nil, err + } + for _, cascade := range cascades { + cf, err := filepath.Abs(path + "/" + cascade.Name()) + if err != nil { + return nil, err + } + flpc, err := plc.UnpackFlp(cf) + flpcs[cascade.Name()] = append(flpcs[cascade.Name()], &FlpCascade{flpc, err}) + } + return flpcs, err +} diff --git a/core/pigo.go b/core/pigo.go index d707501..a6354e5 100644 --- a/core/pigo.go +++ b/core/pigo.go @@ -42,7 +42,7 @@ type Pigo struct { treeThreshold []float32 } -// NewPigo instantiate a new pigo struct. +// NewPigo initializes the Pigo constructor method. func NewPigo() *Pigo { return &Pigo{} } diff --git a/core/puploc.go b/core/puploc.go index f62bd1b..b9b8261 100644 --- a/core/puploc.go +++ b/core/puploc.go @@ -29,6 +29,11 @@ type PuplocCascade struct { treePreds []float32 } +// NewPuplocCascade initializes the PuplocCascade constructor method. +func NewPuplocCascade() *PuplocCascade { + return &PuplocCascade{} +} + // UnpackCascade unpacks the pupil localization cascade file. func (plc *PuplocCascade) UnpackCascade(packet []byte) (*PuplocCascade, error) { var ( @@ -127,7 +132,9 @@ func (plc *PuplocCascade) UnpackCascade(packet []byte) (*PuplocCascade, error) { } // classifyRegion applies the face classification function over an image. -func (plc *PuplocCascade) classifyRegion(r, c, s float32, nrows, ncols int, pixels []uint8, dim int) []float32 { +func (plc *PuplocCascade) classifyRegion(r, c, s float32, nrows, ncols int, pixels []uint8, dim int, flipV bool) []float32 { + var c1, c2 int + root := 0 treeDepth := int(math.Pow(2, float64(plc.treeDepth))) @@ -138,10 +145,17 @@ func (plc *PuplocCascade) classifyRegion(r, c, s float32, nrows, ncols int, pixe idx := 0 for k := 0; k < int(plc.treeDepth); k++ { r1 := min(nrows-1, max(0, (256*int(r)+int(plc.treeCodes[root+4*idx+0])*int(round(float64(s))))>>8)) - c1 := min(ncols-1, max(0, (256*int(c)+int(plc.treeCodes[root+4*idx+1])*int(round(float64(s))))>>8)) r2 := min(nrows-1, max(0, (256*int(r)+int(plc.treeCodes[root+4*idx+2])*int(round(float64(s))))>>8)) - c2 := min(ncols-1, max(0, (256*int(c)+int(plc.treeCodes[root+4*idx+3])*int(round(float64(s))))>>8)) + // flipV means that we wish to flip the column coordinates sign in the tree nodes. + // This is required when we are running the facial landmark detector over the right side of the detected eyes. + if flipV { + c1 = min(ncols-1, max(0, (256*int(c)+int(-plc.treeCodes[root+4*idx+1])*int(round(float64(s))))>>8)) + c2 = min(ncols-1, max(0, (256*int(c)+int(-plc.treeCodes[root+4*idx+3])*int(round(float64(s))))>>8)) + } else { + c1 = min(ncols-1, max(0, (256*int(c)+int(plc.treeCodes[root+4*idx+1])*int(round(float64(s))))>>8)) + c2 = min(ncols-1, max(0, (256*int(c)+int(plc.treeCodes[root+4*idx+3])*int(round(float64(s))))>>8)) + } bintest := func(p1, p2 uint8) uint8 { if p1 > p2 { return 1 @@ -153,8 +167,11 @@ func (plc *PuplocCascade) classifyRegion(r, c, s float32, nrows, ncols int, pixe lutIdx := 2 * (int(plc.trees)*treeDepth*i + treeDepth*j + idx - (treeDepth - 1)) dr += plc.treePreds[lutIdx+0] - dc += plc.treePreds[lutIdx+1] - + if flipV { + dc += -plc.treePreds[lutIdx+1] + } else { + dc += plc.treePreds[lutIdx+1] + } root += 4*treeDepth - 4 } @@ -166,7 +183,9 @@ func (plc *PuplocCascade) classifyRegion(r, c, s float32, nrows, ncols int, pixe } // classifyRotatedRegion applies the face classification function over a rotated image. -func (plc *PuplocCascade) classifyRotatedRegion(r, c, s float32, a float64, nrows, ncols int, pixels []uint8, dim int) []float32 { +func (plc *PuplocCascade) classifyRotatedRegion(r, c, s float32, a float64, nrows, ncols int, pixels []uint8, dim int, flipV bool) []float32 { + var row1, col1, row2, col2 int + root := 0 treeDepth := int(math.Pow(2, float64(plc.treeDepth))) @@ -182,11 +201,23 @@ func (plc *PuplocCascade) classifyRotatedRegion(r, c, s float32, a float64, nrow for j := 0; j < int(plc.trees); j++ { idx := 0 for k := 0; k < int(plc.treeDepth); k++ { - r1 := min(nrows-1, max(0, 65536*int(r)+int(qcos)*int(plc.treeCodes[root+4*idx+0])-int(qsin)*int(plc.treeCodes[root+4*idx+1]))>>16) - c1 := min(ncols-1, max(0, 65536*int(c)+int(qsin)*int(plc.treeCodes[root+4*idx+0])+int(qcos)*int(plc.treeCodes[root+4*idx+1]))>>16) + row1 = int(plc.treeCodes[root+4*idx+0]) + row2 = int(plc.treeCodes[root+4*idx+2]) - r2 := min(nrows-1, max(0, 65536*int(r)+int(qcos)*int(plc.treeCodes[root+4*idx+2])-int(qsin)*int(plc.treeCodes[root+4*idx+3]))>>16) - c2 := min(ncols-1, max(0, 65536*int(c)+int(qsin)*int(plc.treeCodes[root+4*idx+2])+int(qcos)*int(plc.treeCodes[root+4*idx+3]))>>16) + // flipV means that we wish to flip the column coordinates sign in the tree nodes. + // This is required when we are running the facial landmark detector over the right side of the detected eyes. + if flipV { + col1 = int(-plc.treeCodes[root+4*idx+1]) + col2 = int(-plc.treeCodes[root+4*idx+3]) + } else { + col1 = int(plc.treeCodes[root+4*idx+1]) + col2 = int(plc.treeCodes[root+4*idx+3]) + } + + r1 := min(nrows-1, max(0, 65536*int(r)+int(qcos)*row1-int(qsin)*col1)>>16) + c1 := min(ncols-1, max(0, 65536*int(c)+int(qsin)*row1+int(qcos)*col1)>>16) + r2 := min(nrows-1, max(0, 65536*int(r)+int(qcos)*row2-int(qsin)*col2)>>16) + c2 := min(ncols-1, max(0, 65536*int(c)+int(qsin)*row2+int(qcos)*col2)>>16) bintest := func(px1, px2 uint8) int { if px1 <= px2 { @@ -199,8 +230,11 @@ func (plc *PuplocCascade) classifyRotatedRegion(r, c, s float32, a float64, nrow lutIdx := 2 * (int(plc.trees)*treeDepth*i + treeDepth*j + idx - (treeDepth - 1)) dr += plc.treePreds[lutIdx+0] - dc += plc.treePreds[lutIdx+1] - + if flipV { + dc += -plc.treePreds[lutIdx+1] + } else { + dc += plc.treePreds[lutIdx+1] + } root += 4*treeDepth - 4 } @@ -212,22 +246,22 @@ func (plc *PuplocCascade) classifyRotatedRegion(r, c, s float32, a float64, nrow } // RunDetector runs the pupil localization function. -func (plc *PuplocCascade) RunDetector(pl Puploc, img ImageParams, angle float64) *Puploc { +func (plc *PuplocCascade) RunDetector(pl Puploc, img ImageParams, angle float64, flipV bool) *Puploc { rows, cols, scale := []float32{}, []float32{}, []float32{} res := []float32{} for i := 0; i < pl.Perturbs; i++ { row := float32(pl.Row) + float32(pl.Scale)*0.15*(0.5-rand.Float32()) col := float32(pl.Col) + float32(pl.Scale)*0.15*(0.5-rand.Float32()) - sc := float32(pl.Scale) * (0.25 + rand.Float32()) + sc := float32(pl.Scale) * (0.925 + 0.15*rand.Float32()) if angle > 0.0 { if angle > 1.0 { angle = 1.0 } - res = plc.classifyRotatedRegion(row, col, sc, angle, img.Rows, img.Cols, img.Pixels, img.Dim) + res = plc.classifyRotatedRegion(row, col, sc, angle, img.Rows, img.Cols, img.Pixels, img.Dim, flipV) } else { - res = plc.classifyRegion(row, col, sc, img.Rows, img.Cols, img.Pixels, img.Dim) + res = plc.classifyRegion(row, col, sc, img.Rows, img.Cols, img.Pixels, img.Dim, flipV) } rows = append(rows, res[0])