diff --git a/examples/talk_detector/talkdet.py b/examples/talk_detector/talkdet.py index 4343c1a..365eecf 100644 --- a/examples/talk_detector/talkdet.py +++ b/examples/talk_detector/talkdet.py @@ -15,39 +15,65 @@ ARRAY_DIM = 6 MOUTH_AR_THRESH = 0.2 +def verify_alpha_channel(frame): + try: + frame.shape[3] # 4th position + except IndexError: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) + return frame + +def alpha_blend(frame_1, frame_2, mask): + alpha = mask/255.0 + blended = cv2.convertScaleAbs(frame_1*(1-alpha) + frame_2*alpha) + return blended + +def apply_circle_focus_blur(frame, x, y): + frame = verify_alpha_channel(frame) + height, width, _ = frame.shape + mask = np.zeros((height, width, 4), dtype='uint8') + cv2.circle(mask, (int(x), int(y)), int(x/2), + (255, 255, 255), -1, cv2.LINE_AA) + mask = cv2.GaussianBlur(mask, (41, 41), cv2.BORDER_DEFAULT) + blured = cv2.GaussianBlur(frame, (41, 41), cv2.BORDER_DEFAULT) + blended = alpha_blend(frame, blured, 255-mask) + frame = cv2.cvtColor(blended, cv2.COLOR_BGRA2BGR) + return frame + # define class GoPixelSlice to map to: # C type struct { void *data; GoInt len; GoInt cap; } class GoPixelSlice(Structure): - _fields_ = [ - ("pixels", POINTER(c_ubyte)), ("len", c_longlong), ("cap", c_longlong), - ] + _fields_ = [ + ("pixels", POINTER(c_ubyte)), ("len", c_longlong), ("cap", c_longlong), + ] -# Obtain the camera pixels and transfer them to Go trough Ctypes. +# Obtain the camera pixels and transfer them to Go trough Ctypes def process_frame(pixs): - dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32) - pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte)) + dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32) + pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte)) - # call FindFaces - faces = GoPixelSlice(pixels, len(pixs), len(pixs)) - pigo.FindFaces.argtypes = [GoPixelSlice] - pigo.FindFaces.restype = c_void_p + # call FindFaces + faces = GoPixelSlice(pixels, len(pixs), len(pixs)) + pigo.FindFaces.argtypes = [GoPixelSlice] + pigo.FindFaces.restype = c_void_p - # Call the exported FindFaces function from Go. - ndets = pigo.FindFaces(faces) - data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS)) + # Call the exported FindFaces function from Go. + ndets = pigo.FindFaces(faces) + data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS)) - if data_pointer : - buffarr = ((c_longlong * ARRAY_DIM) * MAX_NDETS).from_address(addressof(data_pointer.contents)) - res = np.ndarray(buffer=buffarr, dtype=c_longlong, shape=(MAX_NDETS, ARRAY_DIM,)) + if data_pointer: + buffarr = ((c_longlong * ARRAY_DIM) * + MAX_NDETS).from_address(addressof(data_pointer.contents)) + res = np.ndarray(buffer=buffarr, dtype=c_longlong, + shape=(MAX_NDETS, ARRAY_DIM,)) - # The first value of the buffer aray represents the buffer length. - dets_len = res[0][0] - res = np.delete(res, 0, 0) # delete the first element from the array + # The first value of the buffer aray represents the buffer length. + dets_len = res[0][0] + res = np.delete(res, 0, 0) # delete the first element from the array - # We have to multiply the detection length with the total - # detection points(face, pupils and facial lendmark points), in total 18 - dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*19] - return dets + # We have to multiply the detection length with the total + # detection points(face, pupils and facial lendmark points), in total 18 + dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*19] + return dets # initialize the camera cap = cv2.VideoCapture(0) @@ -58,48 +84,53 @@ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # For this reason we should delay the object detection process with a few milliseconds. time.sleep(0.4) -showFaceDet = True +showFaceDet = False showPupil = True showLandmarkPoints = True while(True): - ret, frame = cap.read() - pixs = np.ascontiguousarray(frame[:, :, 1].reshape((frame.shape[0], frame.shape[1]))) - pixs = pixs.flatten() + ret, frame = cap.read() + pixs = np.ascontiguousarray( + frame[:, :, 1].reshape((frame.shape[0], frame.shape[1]))) + pixs = pixs.flatten() - # Verify if camera is intialized by checking if pixel array is not empty. - if np.any(pixs): - dets = process_frame(pixs) # pixs needs to be numpy.uint8 array + # Verify if camera is intialized by checking if pixel array is not empty. + if np.any(pixs): + dets = process_frame(pixs) # pixs needs to be numpy.uint8 array - if dets is not None: - # We know that the detected faces are taking place in the first positions of the multidimensional array. - for row, col, scale, q, det_type, mouth_ar in dets: - if q > 50: - if det_type == 0: # 0 == face; - if showFaceDet: - cv2.rectangle(frame, (col-scale/2, row-scale/2), (col+scale/2, row+scale/2), (0, 0, 255), 2) - elif det_type == 1: # 1 == pupil; - if showPupil: - cv2.circle(frame, (int(col), int(row)), 4, (0, 0, 255), -1, 8, 0) - elif det_type == 2: # 2 == facial landmark; - if showLandmarkPoints: - cv2.circle(frame, (int(col), int(row)), 4, (0, 255, 0), -1, 8, 0) - elif det_type == 3: - if mouth_ar < MOUTH_AR_THRESH: # mouth is open - cv2.putText(frame, "TALKING!", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + if dets is not None: + # We know that the detected faces are taking place in the first positions of the multidimensional array. + for row, col, scale, q, det_type, mouth_ar in dets: + if q > 50: + if det_type == 0: # 0 == face; + if showFaceDet: + cv2.rectangle( + frame, (col-scale/2, row-scale/2), (col+scale/2, row+scale/2), (0, 0, 255), 2) + elif det_type == 1: # 1 == pupil; + if showPupil: + cv2.circle(frame, (int(col), int(row)), + 4, (0, 0, 255), -1, 8, 0) + elif det_type == 2: # 2 == facial landmark; + if showLandmarkPoints: + cv2.circle(frame, (int(col), int(row)), + 4, (0, 255, 0), -1, 8, 0) + elif det_type == 3: + if mouth_ar < MOUTH_AR_THRESH: # mouth is open + frame = apply_circle_focus_blur(frame, col, row) + cv2.putText(frame, "TALKING!", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) - cv2.imshow('', frame) + cv2.imshow('', frame) - key = cv2.waitKey(1) - if key & 0xFF == ord('q'): - break - elif key & 0xFF == ord('w'): - showFaceDet = not showFaceDet - elif key & 0xFF == ord('e'): - showPupil = not showPupil - elif key & 0xFF == ord('r'): - showLandmarkPoints = not showLandmarkPoints + key = cv2.waitKey(1) + if key & 0xFF == ord('q'): + break + elif key & 0xFF == ord('w'): + showFaceDet = not showFaceDet + elif key & 0xFF == ord('e'): + showPupil = not showPupil + elif key & 0xFF == ord('r'): + showLandmarkPoints = not showLandmarkPoints cap.release() cv2.destroyAllWindows() \ No newline at end of file