mirror of
https://github.com/esimov/pigo.git
synced 2025-10-06 16:46:52 +08:00
Talk detection example
This commit is contained in:
@@ -12,6 +12,10 @@ import (
|
|||||||
pigo "github.com/esimov/pigo/core"
|
pigo "github.com/esimov/pigo/core"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type point struct {
|
||||||
|
x, y int
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
cascade []byte
|
cascade []byte
|
||||||
puplocCascade []byte
|
puplocCascade []byte
|
||||||
@@ -21,10 +25,6 @@ var (
|
|||||||
err error
|
err error
|
||||||
)
|
)
|
||||||
|
|
||||||
type point struct {
|
|
||||||
x, y int
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {}
|
func main() {}
|
||||||
|
|
||||||
//export FindFaces
|
//export FindFaces
|
||||||
|
18
examples/talk_detector/README.MD
Normal file
18
examples/talk_detector/README.MD
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
## Talk detection demo
|
||||||
|
|
||||||
|
This demo demonstrates how Pigo's facial landmark points detection capabilities can be used for detecting if a person is talking or not. This method can be used in a variety of fields, like checking if a person is communicating or not.
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
* OpenCV2
|
||||||
|
* Python2
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
$ python2 talkdet.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Keys:
|
||||||
|
<kbd>w</kbd> - Show/hide detected faces (default On)<br/>
|
||||||
|
<kbd>e</kbd> - Show/hide detected pupils (default On)<br/>
|
||||||
|
<kbd>e</kbd> - Show/hide facial landmark points (default On)<br/>
|
||||||
|
<kbd>q</kbd> - Quit
|
198
examples/talk_detector/talkdet.go
Normal file
198
examples/talk_detector/talkdet.go
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
"math"
|
||||||
|
"runtime"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
pigo "github.com/esimov/pigo/core"
|
||||||
|
)
|
||||||
|
|
||||||
|
type point struct {
|
||||||
|
x, y int
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
cascade []byte
|
||||||
|
puplocCascade []byte
|
||||||
|
faceClassifier *pigo.Pigo
|
||||||
|
puplocClassifier *pigo.PuplocCascade
|
||||||
|
flpcs map[string][]*pigo.FlpCascade
|
||||||
|
imgParams *pigo.ImageParams
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
eyeCascades = []string{"lp46", "lp44", "lp42", "lp38", "lp312"}
|
||||||
|
mouthCascade = []string{"lp93", "lp84", "lp82", "lp81"}
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {}
|
||||||
|
|
||||||
|
//export FindFaces
|
||||||
|
func FindFaces(pixels []uint8) uintptr {
|
||||||
|
pointCh := make(chan uintptr)
|
||||||
|
|
||||||
|
results := clusterDetection(pixels, 480, 640)
|
||||||
|
dets := make([][]int, len(results))
|
||||||
|
|
||||||
|
for i := 0; i < len(results); i++ {
|
||||||
|
dets[i] = append(dets[i], results[i].Row, results[i].Col, results[i].Scale, int(results[i].Q), 0)
|
||||||
|
// left eye
|
||||||
|
puploc := &pigo.Puploc{
|
||||||
|
Row: results[i].Row - int(0.085*float32(results[i].Scale)),
|
||||||
|
Col: results[i].Col - int(0.185*float32(results[i].Scale)),
|
||||||
|
Scale: float32(results[i].Scale) * 0.4,
|
||||||
|
Perturbs: 63,
|
||||||
|
}
|
||||||
|
leftEye := puplocClassifier.RunDetector(*puploc, *imgParams, 0.0, false)
|
||||||
|
if leftEye.Row > 0 && leftEye.Col > 0 {
|
||||||
|
dets[i] = append(dets[i], leftEye.Row, leftEye.Col, int(leftEye.Scale), int(results[i].Q), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// right eye
|
||||||
|
puploc = &pigo.Puploc{
|
||||||
|
Row: results[i].Row - int(0.085*float32(results[i].Scale)),
|
||||||
|
Col: results[i].Col + int(0.185*float32(results[i].Scale)),
|
||||||
|
Scale: float32(results[i].Scale) * 0.4,
|
||||||
|
Perturbs: 63,
|
||||||
|
}
|
||||||
|
|
||||||
|
rightEye := puplocClassifier.RunDetector(*puploc, *imgParams, 0.0, false)
|
||||||
|
if rightEye.Row > 0 && rightEye.Col > 0 {
|
||||||
|
dets[i] = append(dets[i], rightEye.Row, rightEye.Col, int(rightEye.Scale), int(results[i].Q), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Traverse all the eye cascades and run the detector on each of them.
|
||||||
|
for _, eye := range eyeCascades {
|
||||||
|
for _, flpc := range flpcs[eye] {
|
||||||
|
flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, false)
|
||||||
|
if flp.Row > 0 && flp.Col > 0 {
|
||||||
|
dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
flp = flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, true)
|
||||||
|
if flp.Row > 0 && flp.Col > 0 {
|
||||||
|
dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mouthPoints := []int{}
|
||||||
|
// Traverse all the mouth cascades and run the detector on each of them.
|
||||||
|
for _, mouth := range mouthCascade {
|
||||||
|
for _, flpc := range flpcs[mouth] {
|
||||||
|
flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, false)
|
||||||
|
if flp.Row > 0 && flp.Col > 0 {
|
||||||
|
mouthPoints = append(mouthPoints, flp.Row, flp.Col)
|
||||||
|
dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flp := flpcs["lp84"][0].FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, true)
|
||||||
|
if flp.Row > 0 && flp.Col > 0 {
|
||||||
|
mouthPoints = append(mouthPoints, flp.Row, flp.Col)
|
||||||
|
dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
|
||||||
|
}
|
||||||
|
fmt.Println(mouthPoints)
|
||||||
|
p1 := &point{x: mouthPoints[2], y: mouthPoints[3]}
|
||||||
|
p2 := &point{x: mouthPoints[len(mouthPoints)-2], y: mouthPoints[len(mouthPoints)-1]}
|
||||||
|
p3 := &point{x: mouthPoints[4], y: mouthPoints[5]}
|
||||||
|
p4 := &point{x: mouthPoints[len(mouthPoints)-4], y: mouthPoints[len(mouthPoints)-3]}
|
||||||
|
|
||||||
|
dist1 := math.Sqrt(math.Pow(float64(p2.y-p1.y), 2) + math.Pow(float64(p2.x-p1.x), 2))
|
||||||
|
dist2 := math.Sqrt(math.Pow(float64(p4.y-p3.y), 2) + math.Pow(float64(p4.x-p3.x), 2))
|
||||||
|
|
||||||
|
ar := math.Round((dist1 / dist2) * 0.2)
|
||||||
|
fmt.Println(ar)
|
||||||
|
}
|
||||||
|
|
||||||
|
coords := make([]int, 0, len(dets))
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
// Since in Go we cannot transfer a 2d array trough an array pointer
|
||||||
|
// we have to transform it into 1d array.
|
||||||
|
for _, v := range dets {
|
||||||
|
coords = append(coords, v...)
|
||||||
|
}
|
||||||
|
// Include as a first slice element the number of detected faces.
|
||||||
|
// We need to transfer this value in order to define the Python array buffer length.
|
||||||
|
coords = append([]int{len(dets), 0, 0, 0, 0}, coords...)
|
||||||
|
|
||||||
|
// Convert the slice into an array pointer.
|
||||||
|
s := *(*[]uint8)(unsafe.Pointer(&coords))
|
||||||
|
p := uintptr(unsafe.Pointer(&s[0]))
|
||||||
|
|
||||||
|
// Ensure `det` is not freed up by GC prematurely.
|
||||||
|
runtime.KeepAlive(coords)
|
||||||
|
|
||||||
|
// return the pointer address
|
||||||
|
pointCh <- p
|
||||||
|
}()
|
||||||
|
return <-pointCh
|
||||||
|
}
|
||||||
|
|
||||||
|
// clusterDetection runs Pigo face detector core methods
|
||||||
|
// and returns a cluster with the detected faces coordinates.
|
||||||
|
func clusterDetection(pixels []uint8, rows, cols int) []pigo.Detection {
|
||||||
|
imgParams = &pigo.ImageParams{
|
||||||
|
Pixels: pixels,
|
||||||
|
Rows: rows,
|
||||||
|
Cols: cols,
|
||||||
|
Dim: cols,
|
||||||
|
}
|
||||||
|
cParams := pigo.CascadeParams{
|
||||||
|
MinSize: 60,
|
||||||
|
MaxSize: 600,
|
||||||
|
ShiftFactor: 0.1,
|
||||||
|
ScaleFactor: 1.1,
|
||||||
|
ImageParams: *imgParams,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the face detection classifier is loaded only once.
|
||||||
|
if len(cascade) == 0 {
|
||||||
|
cascade, err = ioutil.ReadFile("../../cascade/facefinder")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error reading the cascade file: %v", err)
|
||||||
|
}
|
||||||
|
p := pigo.NewPigo()
|
||||||
|
|
||||||
|
// Unpack the binary file. This will return the number of cascade trees,
|
||||||
|
// the tree depth, the threshold and the prediction from tree's leaf nodes.
|
||||||
|
faceClassifier, err = p.Unpack(cascade)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error unpacking the cascade file: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that we load the pupil localization cascade only once
|
||||||
|
if len(puplocCascade) == 0 {
|
||||||
|
puplocCascade, err := ioutil.ReadFile("../../cascade/puploc")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error reading the puploc cascade file: %s", err)
|
||||||
|
}
|
||||||
|
puplocClassifier, err = puplocClassifier.UnpackCascade(puplocCascade)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error unpacking the puploc cascade file: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
flpcs, err = puplocClassifier.ReadCascadeDir("../../cascade/lps")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error unpacking the facial landmark detection cascades: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the classifier over the obtained leaf nodes and return the detection results.
|
||||||
|
// The result contains quadruplets representing the row, column, scale and detection score.
|
||||||
|
dets := faceClassifier.RunCascade(cParams, 0.0)
|
||||||
|
|
||||||
|
// Calculate the intersection over union (IoU) of two clusters.
|
||||||
|
dets = faceClassifier.ClusterDetections(dets, 0.0)
|
||||||
|
|
||||||
|
return dets
|
||||||
|
}
|
76
examples/talk_detector/talkdet.h
Normal file
76
examples/talk_detector/talkdet.h
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
/* Code generated by cmd/cgo; DO NOT EDIT. */
|
||||||
|
|
||||||
|
/* package command-line-arguments */
|
||||||
|
|
||||||
|
|
||||||
|
#line 1 "cgo-builtin-export-prolog"
|
||||||
|
|
||||||
|
#include <stddef.h> /* for ptrdiff_t below */
|
||||||
|
|
||||||
|
#ifndef GO_CGO_EXPORT_PROLOGUE_H
|
||||||
|
#define GO_CGO_EXPORT_PROLOGUE_H
|
||||||
|
|
||||||
|
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
||||||
|
typedef struct { const char *p; ptrdiff_t n; } _GoString_;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Start of preamble from import "C" comments. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* End of preamble from import "C" comments. */
|
||||||
|
|
||||||
|
|
||||||
|
/* Start of boilerplate cgo prologue. */
|
||||||
|
#line 1 "cgo-gcc-export-header-prolog"
|
||||||
|
|
||||||
|
#ifndef GO_CGO_PROLOGUE_H
|
||||||
|
#define GO_CGO_PROLOGUE_H
|
||||||
|
|
||||||
|
typedef signed char GoInt8;
|
||||||
|
typedef unsigned char GoUint8;
|
||||||
|
typedef short GoInt16;
|
||||||
|
typedef unsigned short GoUint16;
|
||||||
|
typedef int GoInt32;
|
||||||
|
typedef unsigned int GoUint32;
|
||||||
|
typedef long long GoInt64;
|
||||||
|
typedef unsigned long long GoUint64;
|
||||||
|
typedef GoInt64 GoInt;
|
||||||
|
typedef GoUint64 GoUint;
|
||||||
|
typedef __SIZE_TYPE__ GoUintptr;
|
||||||
|
typedef float GoFloat32;
|
||||||
|
typedef double GoFloat64;
|
||||||
|
typedef float _Complex GoComplex64;
|
||||||
|
typedef double _Complex GoComplex128;
|
||||||
|
|
||||||
|
/*
|
||||||
|
static assertion to make sure the file is being used on architecture
|
||||||
|
at least with matching size of GoInt.
|
||||||
|
*/
|
||||||
|
typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
|
||||||
|
|
||||||
|
#ifndef GO_CGO_GOSTRING_TYPEDEF
|
||||||
|
typedef _GoString_ GoString;
|
||||||
|
#endif
|
||||||
|
typedef void *GoMap;
|
||||||
|
typedef void *GoChan;
|
||||||
|
typedef struct { void *t; void *v; } GoInterface;
|
||||||
|
typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* End of boilerplate cgo prologue. */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
extern GoUintptr FindFaces(GoSlice p0);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
103
examples/talk_detector/talkdet.py
Normal file
103
examples/talk_detector/talkdet.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
from ctypes import *
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import time
|
||||||
|
|
||||||
|
os.system('go build -o talkdet.so -buildmode=c-shared talkdet.go')
|
||||||
|
pigo = cdll.LoadLibrary('./talkdet.so')
|
||||||
|
os.system('rm talkdet.so')
|
||||||
|
|
||||||
|
MAX_NDETS = 2024
|
||||||
|
ARRAY_DIM = 5
|
||||||
|
|
||||||
|
# define class GoPixelSlice to map to:
|
||||||
|
# C type struct { void *data; GoInt len; GoInt cap; }
|
||||||
|
class GoPixelSlice(Structure):
|
||||||
|
_fields_ = [
|
||||||
|
("pixels", POINTER(c_ubyte)), ("len", c_longlong), ("cap", c_longlong),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Obtain the camera pixels and transfer them to Go trough Ctypes.
|
||||||
|
def process_frame(pixs):
|
||||||
|
dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32)
|
||||||
|
pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte))
|
||||||
|
|
||||||
|
# call FindFaces
|
||||||
|
faces = GoPixelSlice(pixels, len(pixs), len(pixs))
|
||||||
|
pigo.FindFaces.argtypes = [GoPixelSlice]
|
||||||
|
pigo.FindFaces.restype = c_void_p
|
||||||
|
|
||||||
|
# Call the exported FindFaces function from Go.
|
||||||
|
ndets = pigo.FindFaces(faces)
|
||||||
|
data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS))
|
||||||
|
|
||||||
|
if data_pointer :
|
||||||
|
buffarr = ((c_longlong * ARRAY_DIM) * MAX_NDETS).from_address(addressof(data_pointer.contents))
|
||||||
|
res = np.ndarray(buffer=buffarr, dtype=c_longlong, shape=(MAX_NDETS, ARRAY_DIM,))
|
||||||
|
|
||||||
|
# The first value of the buffer aray represents the buffer length.
|
||||||
|
dets_len = res[0][0]
|
||||||
|
res = np.delete(res, 0, 0) # delete the first element from the array
|
||||||
|
|
||||||
|
# We have to multiply the detection length with the total
|
||||||
|
# detection points(face, pupils and facial lendmark points), in total 18
|
||||||
|
dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*18]
|
||||||
|
return dets
|
||||||
|
|
||||||
|
# initialize the camera
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
|
||||||
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
|
||||||
|
|
||||||
|
# Changing the camera resolution introduce a short delay in the camera initialization.
|
||||||
|
# For this reason we should delay the object detection process with a few milliseconds.
|
||||||
|
time.sleep(0.4)
|
||||||
|
|
||||||
|
showFaceDet = True
|
||||||
|
showPupil = True
|
||||||
|
showLandmarkPoints = True
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
ret, frame = cap.read()
|
||||||
|
pixs = np.ascontiguousarray(frame[:, :, 1].reshape((frame.shape[0], frame.shape[1])))
|
||||||
|
pixs = pixs.flatten()
|
||||||
|
|
||||||
|
# Verify if camera is intialized by checking if pixel array is not empty.
|
||||||
|
if np.any(pixs):
|
||||||
|
dets = process_frame(pixs) # pixs needs to be numpy.uint8 array
|
||||||
|
|
||||||
|
if dets is not None:
|
||||||
|
# We know that the detected faces are taking place in the first positions of the multidimensional array.
|
||||||
|
for det in dets:
|
||||||
|
if det[3] > 50:
|
||||||
|
if det[4] == 0: # 0 == face;
|
||||||
|
if showFaceDet:
|
||||||
|
cv2.rectangle(frame,
|
||||||
|
(int(det[1])-int(det[2]/2), int(det[0])-int(det[2]/2)),
|
||||||
|
(int(det[1])+int(det[2]/2), int(det[0])+int(det[2]/2)),
|
||||||
|
(0, 0, 255), 2
|
||||||
|
)
|
||||||
|
elif det[4] == 1: # 1 == pupil;
|
||||||
|
if showPupil:
|
||||||
|
cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 0, 255), -1, 8, 0)
|
||||||
|
elif det[4] == 2: # 2 == facial landmark;
|
||||||
|
if showLandmarkPoints:
|
||||||
|
cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 255, 0), -1, 8, 0)
|
||||||
|
|
||||||
|
cv2.imshow('', frame)
|
||||||
|
|
||||||
|
key = cv2.waitKey(1)
|
||||||
|
if key & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
elif key & 0xFF == ord('w'):
|
||||||
|
showFaceDet = not showFaceDet
|
||||||
|
elif key & 0xFF == ord('e'):
|
||||||
|
showPupil = not showPupil
|
||||||
|
elif key & 0xFF == ord('r'):
|
||||||
|
showLandmarkPoints = not showLandmarkPoints
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows()
|
Reference in New Issue
Block a user