Talk detection example

2025-10-06 16:46:52 +08:00 · 2019-10-29 14:50:04 +02:00
parent d75028199f
commit 2a30e9e8de
5 changed files with 399 additions and 4 deletions
--- a/examples/puploc_masquerade/puploc.go
+++ b/examples/puploc_masquerade/puploc.go
@@ -12,6 +12,10 @@ import (
 	pigo "github.com/esimov/pigo/core"
 )
 type point struct {
 	x, y int
 }
 var (
 	cascade          []byte
 	puplocCascade    []byte
@@ -21,10 +25,6 @@ var (
 	err              error
 )
 type point struct {
 	x, y int
 }
 func main() {}
 //export FindFaces
--- a/examples/talk_detector/README.MD
+++ b/examples/talk_detector/README.MD
@@ -0,0 +1,18 @@
 ## Talk detection demo
 This demo demonstrates how Pigo's facial landmark points detection capabilities can be used for detecting if a person is talking or not. This method can be used in a variety of fields, like checking if a person is communicating or not.
 ### Requirements
 * OpenCV2
 * Python2
 ### Usage
 ```bash
 $ python2 talkdet.py
 ```
 ### Keys:
 <kbd>w</kbd> - Show/hide detected faces (default On)<br/>
 <kbd>e</kbd> - Show/hide detected pupils (default On)<br/>
 <kbd>e</kbd> - Show/hide facial landmark points (default On)<br/>
 <kbd>q</kbd> - Quit
--- a/examples/talk_detector/talkdet.go
+++ b/examples/talk_detector/talkdet.go
@@ -0,0 +1,198 @@
 package main
 import "C"
 import (
 	"fmt"
 	"io/ioutil"
 	"log"
 	"math"
 	"runtime"
 	"unsafe"
 	pigo "github.com/esimov/pigo/core"
 )
 type point struct {
 	x, y int
 }
 var (
 	cascade          []byte
 	puplocCascade    []byte
 	faceClassifier   *pigo.Pigo
 	puplocClassifier *pigo.PuplocCascade
 	flpcs            map[string][]*pigo.FlpCascade
 	imgParams        *pigo.ImageParams
 	err              error
 )
 var (
 	eyeCascades  = []string{"lp46", "lp44", "lp42", "lp38", "lp312"}
 	mouthCascade = []string{"lp93", "lp84", "lp82", "lp81"}
 )
 func main() {}
 //export FindFaces
 func FindFaces(pixels []uint8) uintptr {
 	pointCh := make(chan uintptr)
 	results := clusterDetection(pixels, 480, 640)
 	dets := make([][]int, len(results))
 	for i := 0; i < len(results); i++ {
 		dets[i] = append(dets[i], results[i].Row, results[i].Col, results[i].Scale, int(results[i].Q), 0)
 		// left eye
 		puploc := &pigo.Puploc{
 			Row:      results[i].Row - int(0.085*float32(results[i].Scale)),
 			Col:      results[i].Col - int(0.185*float32(results[i].Scale)),
 			Scale:    float32(results[i].Scale) * 0.4,
 			Perturbs: 63,
 		}
 		leftEye := puplocClassifier.RunDetector(*puploc, *imgParams, 0.0, false)
 		if leftEye.Row > 0 && leftEye.Col > 0 {
 			dets[i] = append(dets[i], leftEye.Row, leftEye.Col, int(leftEye.Scale), int(results[i].Q), 1)
 		}
 		// right eye
 		puploc = &pigo.Puploc{
 			Row:      results[i].Row - int(0.085*float32(results[i].Scale)),
 			Col:      results[i].Col + int(0.185*float32(results[i].Scale)),
 			Scale:    float32(results[i].Scale) * 0.4,
 			Perturbs: 63,
 		}
 		rightEye := puplocClassifier.RunDetector(*puploc, *imgParams, 0.0, false)
 		if rightEye.Row > 0 && rightEye.Col > 0 {
 			dets[i] = append(dets[i], rightEye.Row, rightEye.Col, int(rightEye.Scale), int(results[i].Q), 1)
 		}
 		// Traverse all the eye cascades and run the detector on each of them.
 		for _, eye := range eyeCascades {
 			for _, flpc := range flpcs[eye] {
 				flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, false)
 				if flp.Row > 0 && flp.Col > 0 {
 					dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
 				}
 				flp = flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, true)
 				if flp.Row > 0 && flp.Col > 0 {
 					dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
 				}
 			}
 		}
 		mouthPoints := []int{}
 		// Traverse all the mouth cascades and run the detector on each of them.
 		for _, mouth := range mouthCascade {
 			for _, flpc := range flpcs[mouth] {
 				flp := flpc.FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, false)
 				if flp.Row > 0 && flp.Col > 0 {
 					mouthPoints = append(mouthPoints, flp.Row, flp.Col)
 					dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
 				}
 			}
 		}
 		flp := flpcs["lp84"][0].FindLandmarkPoints(leftEye, rightEye, *imgParams, puploc.Perturbs, true)
 		if flp.Row > 0 && flp.Col > 0 {
 			mouthPoints = append(mouthPoints, flp.Row, flp.Col)
 			dets[i] = append(dets[i], flp.Row, flp.Col, int(flp.Scale), int(results[i].Q), 2)
 		}
 		fmt.Println(mouthPoints)
 		p1 := &point{x: mouthPoints[2], y: mouthPoints[3]}
 		p2 := &point{x: mouthPoints[len(mouthPoints)-2], y: mouthPoints[len(mouthPoints)-1]}
 		p3 := &point{x: mouthPoints[4], y: mouthPoints[5]}
 		p4 := &point{x: mouthPoints[len(mouthPoints)-4], y: mouthPoints[len(mouthPoints)-3]}
 		dist1 := math.Sqrt(math.Pow(float64(p2.y-p1.y), 2) + math.Pow(float64(p2.x-p1.x), 2))
 		dist2 := math.Sqrt(math.Pow(float64(p4.y-p3.y), 2) + math.Pow(float64(p4.x-p3.x), 2))
 		ar := math.Round((dist1 / dist2) * 0.2)
 		fmt.Println(ar)
 	}
 	coords := make([]int, 0, len(dets))
 	go func() {
 		// Since in Go we cannot transfer a 2d array trough an array pointer
 		// we have to transform it into 1d array.
 		for _, v := range dets {
 			coords = append(coords, v...)
 		}
 		// Include as a first slice element the number of detected faces.
 		// We need to transfer this value in order to define the Python array buffer length.
 		coords = append([]int{len(dets), 0, 0, 0, 0}, coords...)
 		// Convert the slice into an array pointer.
 		s := *(*[]uint8)(unsafe.Pointer(&coords))
 		p := uintptr(unsafe.Pointer(&s[0]))
 		// Ensure `det` is not freed up by GC prematurely.
 		runtime.KeepAlive(coords)
 		// return the pointer address
 		pointCh <- p
 	}()
 	return <-pointCh
 }
 // clusterDetection runs Pigo face detector core methods
 // and returns a cluster with the detected faces coordinates.
 func clusterDetection(pixels []uint8, rows, cols int) []pigo.Detection {
 	imgParams = &pigo.ImageParams{
 		Pixels: pixels,
 		Rows:   rows,
 		Cols:   cols,
 		Dim:    cols,
 	}
 	cParams := pigo.CascadeParams{
 		MinSize:     60,
 		MaxSize:     600,
 		ShiftFactor: 0.1,
 		ScaleFactor: 1.1,
 		ImageParams: *imgParams,
 	}
 	// Ensure that the face detection classifier is loaded only once.
 	if len(cascade) == 0 {
 		cascade, err = ioutil.ReadFile("../../cascade/facefinder")
 		if err != nil {
 			log.Fatalf("Error reading the cascade file: %v", err)
 		}
 		p := pigo.NewPigo()
 		// Unpack the binary file. This will return the number of cascade trees,
 		// the tree depth, the threshold and the prediction from tree's leaf nodes.
 		faceClassifier, err = p.Unpack(cascade)
 		if err != nil {
 			log.Fatalf("Error unpacking the cascade file: %s", err)
 		}
 	}
 	// Ensure that we load the pupil localization cascade only once
 	if len(puplocCascade) == 0 {
 		puplocCascade, err := ioutil.ReadFile("../../cascade/puploc")
 		if err != nil {
 			log.Fatalf("Error reading the puploc cascade file: %s", err)
 		}
 		puplocClassifier, err = puplocClassifier.UnpackCascade(puplocCascade)
 		if err != nil {
 			log.Fatalf("Error unpacking the puploc cascade file: %s", err)
 		}
 		flpcs, err = puplocClassifier.ReadCascadeDir("../../cascade/lps")
 		if err != nil {
 			log.Fatalf("Error unpacking the facial landmark detection cascades: %s", err)
 		}
 	}
 	// Run the classifier over the obtained leaf nodes and return the detection results.
 	// The result contains quadruplets representing the row, column, scale and detection score.
 	dets := faceClassifier.RunCascade(cParams, 0.0)
 	// Calculate the intersection over union (IoU) of two clusters.
 	dets = faceClassifier.ClusterDetections(dets, 0.0)
 	return dets
 }
--- a/examples/talk_detector/talkdet.h
+++ b/examples/talk_detector/talkdet.h
@@ -0,0 +1,76 @@
 /* Code generated by cmd/cgo; DO NOT EDIT. */
 /* package command-line-arguments */
 #line 1 "cgo-builtin-export-prolog"
 #include <stddef.h> /* for ptrdiff_t below */
 #ifndef GO_CGO_EXPORT_PROLOGUE_H
 #define GO_CGO_EXPORT_PROLOGUE_H
 #ifndef GO_CGO_GOSTRING_TYPEDEF
 typedef struct { const char *p; ptrdiff_t n; } _GoString_;
 #endif
 #endif
 /* Start of preamble from import "C" comments.  */
 /* End of preamble from import "C" comments.  */
 /* Start of boilerplate cgo prologue.  */
 #line 1 "cgo-gcc-export-header-prolog"
 #ifndef GO_CGO_PROLOGUE_H
 #define GO_CGO_PROLOGUE_H
 typedef signed char GoInt8;
 typedef unsigned char GoUint8;
 typedef short GoInt16;
 typedef unsigned short GoUint16;
 typedef int GoInt32;
 typedef unsigned int GoUint32;
 typedef long long GoInt64;
 typedef unsigned long long GoUint64;
 typedef GoInt64 GoInt;
 typedef GoUint64 GoUint;
 typedef __SIZE_TYPE__ GoUintptr;
 typedef float GoFloat32;
 typedef double GoFloat64;
 typedef float _Complex GoComplex64;
 typedef double _Complex GoComplex128;
 /*
  static assertion to make sure the file is being used on architecture
  at least with matching size of GoInt.
 */
 typedef char _check_for_64_bit_pointer_matching_GoInt[sizeof(void*)==64/8 ? 1:-1];
 #ifndef GO_CGO_GOSTRING_TYPEDEF
 typedef _GoString_ GoString;
 #endif
 typedef void *GoMap;
 typedef void *GoChan;
 typedef struct { void *t; void *v; } GoInterface;
 typedef struct { void *data; GoInt len; GoInt cap; } GoSlice;
 #endif
 /* End of boilerplate cgo prologue.  */
 #ifdef __cplusplus
 extern "C" {
 #endif
 extern GoUintptr FindFaces(GoSlice p0);
 #ifdef __cplusplus
 }
 #endif
--- a/examples/talk_detector/talkdet.py
+++ b/examples/talk_detector/talkdet.py
@@ -0,0 +1,103 @@
 from ctypes import *
 import subprocess
 import numpy as np
 import os
 import cv2
 import time
 os.system('go build -o talkdet.so -buildmode=c-shared talkdet.go')
 pigo = cdll.LoadLibrary('./talkdet.so')
 os.system('rm talkdet.so')
 MAX_NDETS = 2024
 ARRAY_DIM = 5
 # define class GoPixelSlice to map to:
 # C type struct { void *data; GoInt len; GoInt cap; }
 class GoPixelSlice(Structure):
 	_fields_ = [
 		("pixels", POINTER(c_ubyte)), ("len", c_longlong), ("cap", c_longlong),
 	]
 # Obtain the camera pixels and transfer them to Go trough Ctypes.
 def process_frame(pixs):
 	dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32)
 	pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte))
 	# call FindFaces
 	faces = GoPixelSlice(pixels, len(pixs), len(pixs))
 	pigo.FindFaces.argtypes = [GoPixelSlice]
 	pigo.FindFaces.restype = c_void_p
 	# Call the exported FindFaces function from Go. 
 	ndets = pigo.FindFaces(faces)
 	data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS))
 	if data_pointer :
 		buffarr = ((c_longlong * ARRAY_DIM) * MAX_NDETS).from_address(addressof(data_pointer.contents))
 		res = np.ndarray(buffer=buffarr, dtype=c_longlong, shape=(MAX_NDETS, ARRAY_DIM,))
 		# The first value of the buffer aray represents the buffer length.
 		dets_len = res[0][0]
 		res = np.delete(res, 0, 0) # delete the first element from the array
 		# We have to multiply the detection length with the total 
 		# detection points(face, pupils and facial lendmark points), in total 18
 		dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*18]
 		return dets
 # initialize the camera
 cap = cv2.VideoCapture(0)
 cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
 cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
 # Changing the camera resolution introduce a short delay in the camera initialization. 
 # For this reason we should delay the object detection process with a few milliseconds.
 time.sleep(0.4)
 showFaceDet = True
 showPupil = True
 showLandmarkPoints = True
 while(True):
 	ret, frame = cap.read()
 	pixs = np.ascontiguousarray(frame[:, :, 1].reshape((frame.shape[0], frame.shape[1])))
 	pixs = pixs.flatten()
 	# Verify if camera is intialized by checking if pixel array is not empty.
 	if np.any(pixs):
 		dets = process_frame(pixs) # pixs needs to be numpy.uint8 array
 		if dets is not None:
 			# We know that the detected faces are taking place in the first positions of the multidimensional array.
 			for det in dets:
 				if det[3] > 50:
 					if det[4] == 0: # 0 == face;
 						if showFaceDet:
 							cv2.rectangle(frame, 
 								(int(det[1])-int(det[2]/2), int(det[0])-int(det[2]/2)), 
 								(int(det[1])+int(det[2]/2), int(det[0])+int(det[2]/2)), 
 								(0, 0, 255), 2
 							)
 					elif det[4] == 1: # 1 == pupil;
 						if showPupil:
 							cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 0, 255), -1, 8, 0)						
 					elif det[4] == 2: # 2 == facial landmark;
 						if showLandmarkPoints:
 							cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 255, 0), -1, 8, 0)
 	cv2.imshow('', frame)
 	key = cv2.waitKey(1)
 	if key & 0xFF == ord('q'):
 		break
 	elif key & 0xFF == ord('w'):
 		showFaceDet = not showFaceDet
 	elif key & 0xFF == ord('e'):
 		showPupil = not showPupil
 	elif key & 0xFF == ord('r'):
 		showLandmarkPoints = not showLandmarkPoints
 cap.release()
 cv2.destroyAllWindows()