Implemented the Python part

2025-10-06 08:36:54 +08:00 · 2019-10-30 15:05:35 +02:00
parent 0239a7f5a2
commit f61761eb9b
1 changed files with 19 additions and 19 deletions
--- a/examples/talk_detector/talkdet.py
+++ b/examples/talk_detector/talkdet.py
@@ -13,6 +13,8 @@ os.system('rm talkdet.so')
 MAX_NDETS = 2024
 ARRAY_DIM = 6

+MOUTH_AR_THRESH = 0.2
+
 # define class GoPixelSlice to map to:
 # C type struct { void *data; GoInt len; GoInt cap; }
 class GoPixelSlice(Structure):
@@ -24,16 +26,16 @@ class GoPixelSlice(Structure):
 def process_frame(pixs):
 	dets = np.zeros(ARRAY_DIM * MAX_NDETS, dtype=np.float32)
 	pixels = cast((c_ubyte * len(pixs))(*pixs), POINTER(c_ubyte))
-	
+
 	# call FindFaces
 	faces = GoPixelSlice(pixels, len(pixs), len(pixs))
 	pigo.FindFaces.argtypes = [GoPixelSlice]
 	pigo.FindFaces.restype = c_void_p

-	# Call the exported FindFaces function from Go. 
+	# Call the exported FindFaces function from Go.
 	ndets = pigo.FindFaces(faces)
 	data_pointer = cast(ndets, POINTER((c_longlong * ARRAY_DIM) * MAX_NDETS))
-	
+
 	if data_pointer :
 		buffarr = ((c_longlong * ARRAY_DIM) * MAX_NDETS).from_address(addressof(data_pointer.contents))
 		res = np.ndarray(buffer=buffarr, dtype=c_longlong, shape=(MAX_NDETS, ARRAY_DIM,))
@@ -42,7 +44,7 @@ def process_frame(pixs):
 		dets_len = res[0][0]
 		res = np.delete(res, 0, 0) # delete the first element from the array

-		# We have to multiply the detection length with the total 
+		# We have to multiply the detection length with the total
 		# detection points(face, pupils and facial lendmark points), in total 18
 		dets = list(res.reshape(-1, ARRAY_DIM))[0:dets_len*19]
 		return dets
@@ -52,7 +54,7 @@ cap = cv2.VideoCapture(0)
 cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
 cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

-# Changing the camera resolution introduce a short delay in the camera initialization. 
+# Changing the camera resolution introduce a short delay in the camera initialization.
 # For this reason we should delay the object detection process with a few milliseconds.
 time.sleep(0.4)

@@ -71,23 +73,21 @@ while(True):

 		if dets is not None:
 			# We know that the detected faces are taking place in the first positions of the multidimensional array.
-			for det in dets:
-				if det[3] > 50:
-					if det[4] == 0: # 0 == face;
+			for row, col, scale, q, det_type, mouth_ar in dets:
+				if q > 50:
+					if det_type == 0: # 0 == face;
 						if showFaceDet:
-							cv2.rectangle(frame, 
-								(int(det[1])-int(det[2]/2), int(det[0])-int(det[2]/2)), 
-								(int(det[1])+int(det[2]/2), int(det[0])+int(det[2]/2)), 
-								(0, 0, 255), 2
-							)
-					elif det[4] == 1: # 1 == pupil;
+							cv2.rectangle(frame, (col-scale/2, row-scale/2), (col+scale/2, row+scale/2), (0, 0, 255), 2)
+					elif det_type == 1: # 1 == pupil;
 						if showPupil:
-							cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 0, 255), -1, 8, 0)						
-					elif det[4] == 2: # 2 == facial landmark;
+							cv2.circle(frame, (int(col), int(row)), 4, (0, 0, 255), -1, 8, 0)
+					elif det_type == 2: # 2 == facial landmark;
 						if showLandmarkPoints:
-							cv2.circle(frame, (int(det[1]), int(det[0])), 4, (0, 255, 0), -1, 8, 0)
-					elif det[4] == 3:
-						print(det[5])
+							cv2.circle(frame, (int(col), int(row)), 4, (0, 255, 0), -1, 8, 0)
+					elif det_type == 3:
+						if mouth_ar < MOUTH_AR_THRESH: # mouth is open
+							cv2.putText(frame, "TALKING!", (10, 30),
+								cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

 	cv2.imshow('', frame)