mirror of
https://github.com/esimov/pigo.git
synced 2025-10-24 16:30:31 +08:00
146 lines
5.2 KiB
Python
146 lines
5.2 KiB
Python
from ctypes import *
|
|
|
|
import numpy as np
|
|
import os
|
|
import cv2
|
|
|
|
os.system('go build -o talkdet.so -buildmode=c-shared talkdet.go')
|
|
pigo = cdll.LoadLibrary('./talkdet.so')
|
|
|
|
MAX_NDETS = 2024
|
|
ARRAY_DIM = 6
|
|
NUM_LP = 95
|
|
|
|
MOUTH_AR_CONSEC_FRAMES = 5
|
|
|
|
def verify_alpha_channel(frame):
|
|
try:
|
|
frame.shape[3] # 4th position
|
|
except IndexError:
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
|
|
return frame
|
|
|
|
def alpha_blend(frame_1, frame_2, mask):
|
|
alpha = mask/255.0
|
|
blended = cv2.convertScaleAbs(frame_1*(1-alpha) + frame_2*alpha)
|
|
return blended
|
|
|
|
def apply_circle_focus_blur(frame, x, y, dim, blur):
|
|
frame = verify_alpha_channel(frame)
|
|
height, width, _ = frame.shape
|
|
mask = np.zeros((height, width, 4), dtype='uint8')
|
|
cv2.circle(mask, (int(x), int(y)), int(dim/1.5),
|
|
(255, 255, 255), -1, cv2.LINE_AA)
|
|
mask = cv2.blur(mask, (blur, blur), cv2.BORDER_DEFAULT)
|
|
blured = cv2.blur(frame, (blur, blur), cv2.BORDER_DEFAULT)
|
|
blended = alpha_blend(frame, blured, 255-mask)
|
|
frame = cv2.cvtColor(blended, cv2.COLOR_BGRA2BGR)
|
|
return frame
|
|
|
|
# define class GoPixelSlice to map to:
|
|
# C type struct { void *data; GoInt len; GoInt cap; }
|
|
class GoPixelSlice(Structure):
|
|
_fields_ = [
|
|
("pixels", POINTER(c_ubyte)), ("len", c_longlong), ("cap", c_longlong),
|
|
]
|
|
|
|
# Obtain the camera pixels and transfer them to Go through Ctypes
|
|
def process_frame(pixs):
|
|
dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32)
|
|
pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte))
|
|
|
|
# call FindFaces
|
|
faces = GoPixelSlice(pixels, len(pixs), len(pixs))
|
|
pigo.FindFaces.argtypes = [GoPixelSlice]
|
|
pigo.FindFaces.restype = c_void_p
|
|
|
|
# Call the exported FindFaces function from Go.
|
|
ndets = pigo.FindFaces(faces)
|
|
data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS))
|
|
|
|
if data_pointer:
|
|
buffarr = ((c_longlong * ARRAY_DIM) *
|
|
MAX_NDETS).from_address(addressof(data_pointer.contents))
|
|
res = np.ndarray(buffer=buffarr, dtype=c_longlong,
|
|
shape=(NUM_LP, ARRAY_DIM,))
|
|
|
|
# The first value of the buffer aray represents the buffer length.
|
|
dets_len = res[0][0]
|
|
res = np.delete(res, 0, 0) # delete the first element from the array
|
|
|
|
# We have to multiply the detection length with the total
|
|
# detection points(face, pupils and facial lendmark points), in total 18
|
|
dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*19]
|
|
return dets
|
|
|
|
# initialize the camera
|
|
cap = cv2.VideoCapture(0)
|
|
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
|
|
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
|
|
|
|
showFaceDet = False
|
|
showPupil = True
|
|
showLandmarkPoints = True
|
|
|
|
counter = 0
|
|
talking = False
|
|
|
|
while(True):
|
|
ret, frame = cap.read()
|
|
pixs = np.ascontiguousarray(
|
|
frame[:, :, 1].reshape((frame.shape[0], frame.shape[1])))
|
|
pixs = pixs.flatten()
|
|
|
|
# Verify if camera is intialized by checking if pixel array is not empty.
|
|
if np.any(pixs):
|
|
dets = process_frame(pixs) # pixs needs to be numpy.uint8 array
|
|
|
|
if dets is not None:
|
|
face_col, face_row, face_dim = 0, 0, 0
|
|
# We know that the detected faces are taking place in the first positions of the multidimensional array.
|
|
for row, col, scale, q, det_type, mouth_ar in dets:
|
|
if q > 50:
|
|
if det_type == 0: # 0 == face;
|
|
face_col, face_row, face_dim = col, row, scale
|
|
if showFaceDet:
|
|
cv2.rectangle(frame,
|
|
(int(col)-int(scale/2), int(row)-int(scale/2)),
|
|
(int(col)+int(scale/2), int(row)+int(scale/2)),
|
|
(0, 0, 255), 2
|
|
)
|
|
elif det_type == 1: # 1 == pupil;
|
|
if showPupil:
|
|
cv2.circle(frame, (int(col), int(row)), 4, (0, 0, 255), -1, 8, 0)
|
|
elif det_type == 2: # 2 == facial landmark;
|
|
if showLandmarkPoints:
|
|
cv2.circle(frame, (int(col), int(row)), 4, (0, 255, 0), -1, 8, 0)
|
|
elif det_type == 3:
|
|
if mouth_ar == 1: # mouth is open
|
|
talking = True
|
|
counter = 0
|
|
else: # mouth is closed
|
|
if counter < MOUTH_AR_CONSEC_FRAMES:
|
|
counter += 1
|
|
else:
|
|
talking = False
|
|
counter = 0
|
|
|
|
if talking and counter < MOUTH_AR_CONSEC_FRAMES:
|
|
frame = apply_circle_focus_blur(frame, face_col, face_row, face_dim, 25)
|
|
cv2.putText(frame, "Person talking...", (10, 30),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1)
|
|
|
|
cv2.imshow('Talk detection demo', frame)
|
|
|
|
key = cv2.waitKey(1)
|
|
if key & 0xFF == ord('q'):
|
|
break
|
|
elif key & 0xFF == ord('w'):
|
|
showFaceDet = not showFaceDet
|
|
elif key & 0xFF == ord('e'):
|
|
showPupil = not showPupil
|
|
elif key & 0xFF == ord('r'):
|
|
showLandmarkPoints = not showLandmarkPoints
|
|
|
|
cap.release()
|
|
cv2.destroyAllWindows() |