diff --git a/examples/talk_detector/talkdet.py b/examples/talk_detector/talkdet.py index a515753..4343c1a 100644 --- a/examples/talk_detector/talkdet.py +++ b/examples/talk_detector/talkdet.py @@ -13,6 +13,8 @@ os.system('rm talkdet.so') MAX_NDETS = 2024 ARRAY_DIM = 6 +MOUTH_AR_THRESH = 0.2 + # define class GoPixelSlice to map to: # C type struct { void *data; GoInt len; GoInt cap; } class GoPixelSlice(Structure): @@ -24,16 +26,16 @@ class GoPixelSlice(Structure): def process_frame(pixs): dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32) pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte)) - + # call FindFaces faces = GoPixelSlice(pixels, len(pixs), len(pixs)) pigo.FindFaces.argtypes = [GoPixelSlice] pigo.FindFaces.restype = c_void_p - # Call the exported FindFaces function from Go. + # Call the exported FindFaces function from Go. ndets = pigo.FindFaces(faces) data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS)) - + if data_pointer : buffarr = ((c_longlong * ARRAY_DIM) * MAX_NDETS).from_address(addressof(data_pointer.contents)) res = np.ndarray(buffer=buffarr, dtype=c_longlong, shape=(MAX_NDETS, ARRAY_DIM,)) @@ -42,7 +44,7 @@ def process_frame(pixs): dets_len = res[0][0] res = np.delete(res, 0, 0) # delete the first element from the array - # We have to multiply the detection length with the total + # We have to multiply the detection length with the total # detection points(face, pupils and facial lendmark points), in total 18 dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*19] return dets @@ -52,7 +54,7 @@ cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) -# Changing the camera resolution introduce a short delay in the camera initialization. +# Changing the camera resolution introduce a short delay in the camera initialization. # For this reason we should delay the object detection process with a few milliseconds. time.sleep(0.4) @@ -71,23 +73,21 @@ while(True): if dets is not None: # We know that the detected faces are taking place in the first positions of the multidimensional array. - for det in dets: - if det[3] > 50: - if det[4] == 0: # 0 == face; + for row, col, scale, q, det_type, mouth_ar in dets: + if q > 50: + if det_type == 0: # 0 == face; if showFaceDet: - cv2.rectangle(frame, - (int(det[1])-int(det[2]/2), int(det[0])-int(det[2]/2)), - (int(det[1])+int(det[2]/2), int(det[0])+int(det[2]/2)), - (0, 0, 255), 2 - ) - elif det[4] == 1: # 1 == pupil; + cv2.rectangle(frame, (col-scale/2, row-scale/2), (col+scale/2, row+scale/2), (0, 0, 255), 2) + elif det_type == 1: # 1 == pupil; if showPupil: - cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 0, 255), -1, 8, 0) - elif det[4] == 2: # 2 == facial landmark; + cv2.circle(frame, (int(col), int(row)), 4, (0, 0, 255), -1, 8, 0) + elif det_type == 2: # 2 == facial landmark; if showLandmarkPoints: - cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 255, 0), -1, 8, 0) - elif det[4] == 3: - print(det[5]) + cv2.circle(frame, (int(col), int(row)), 4, (0, 255, 0), -1, 8, 0) + elif det_type == 3: + if mouth_ar < MOUTH_AR_THRESH: # mouth is open + cv2.putText(frame, "TALKING!", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.imshow('', frame)