increase queue size and add ability to take every nth frame

add back queue full message
add location masking for detected objects
2025-12-24 11:51:39 +08:00 · 2019-04-19 08:23:07 -05:00 · 2019-04-19 06:37:29 -05:00 · 2019-04-14 11:58:33 -05:00 · 2019-04-14 11:28:50 -05:00
4 changed files with 116 additions and 46 deletions
--- a/config/back-mask.bmp
+++ b/config/back-mask.bmp
--- a/config/config.yml
+++ b/config/config.yml
@@ -17,33 +17,26 @@ cameras:
      - size: 350
        x_offset: 0
        y_offset: 300
-        min_person_area: 5000
      - size: 400
        x_offset: 350
        y_offset: 250
-        min_person_area: 2000
      - size: 400
        x_offset: 750
        y_offset: 250
-        min_person_area: 2000
-  back2:
-    rtsp:
-      user: viewer
-      host: 10.0.10.10
-      port: 554
-      # values that begin with a "$" will be replaced with environment variable
-      password: $RTSP_PASSWORD
-      path: /cam/realmonitor?channel=1&subtype=2
-    regions:
-      - size: 350
-        x_offset: 0
-        y_offset: 300
-        min_person_area: 5000
-      - size: 400
-        x_offset: 350
-        y_offset: 250
-        min_person_area: 2000
-      - size: 400
-        x_offset: 750
-        y_offset: 250
-        min_person_area: 2000
+    mask: back-mask.bmp
+    known_sizes:
+      - y: 300
+        min: 700
+        max: 1800
+      - y: 400
+        min: 3000
+        max: 7200
+      - y: 500
+        min: 8500
+        max: 20400
+      - y: 600
+        min: 10000
+        max: 50000
+      - y: 700
+        min: 10000
+        max: 125000
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -36,12 +36,12 @@ def main():
    client.loop_start()
    
    # Queue for prepped frames, max size set to (number of cameras * 5)
-    max_queue_size = len(CONFIG['cameras'].items())*5
+    max_queue_size = len(CONFIG['cameras'].items())*10
    prepped_frame_queue = queue.Queue(max_queue_size)

    cameras = {}
    for name, config in CONFIG['cameras'].items():
-        cameras[name] = Camera(name, config, prepped_frame_queue, client, MQTT_TOPIC_PREFIX)
+        cameras[name] = Camera(name, config, prepped_frame_queue, client, MQTT_TOPIC_PREFIX, DEBUG)

    prepped_queue_processor = PreppedQueueProcessor(
        cameras,
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -5,6 +5,7 @@ import cv2
 import threading
 import ctypes
 import multiprocessing as mp
+import numpy as np
 from object_detection.utils import visualization_utils as vis_util
 from . util import tonumpyarray
 from . object_detection import FramePrepper
@@ -12,7 +13,7 @@ from . objects import ObjectCleaner, BestPersonFrame
 from . mqtt import MqttObjectPublisher

 # fetch the frames as fast a possible and store current frame in a shared memory array
-def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_shape, rtsp_url):
+def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_shape, rtsp_url, take_frame=1):
    # convert shared memory array into numpy and shape into image array
    arr = tonumpyarray(shared_arr).reshape(frame_shape)

@@ -23,6 +24,7 @@ def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_s
    video.set(cv2.CAP_PROP_BUFFERSIZE,1)

    bad_frame_counter = 0
+    frame_num = 0
    while True:
        # check if the video stream is still open, and reopen if needed
        if not video.isOpened():
@@ -35,6 +37,9 @@ def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_s
        # snapshot the time the frame was grabbed
        frame_time = datetime.datetime.now()
        if ret:
+            frame_num += 1
+            if (frame_num % take_frame) != 0:
+                continue
            # go ahead and decode the current frame
            ret, frame = video.retrieve()
            if ret:
@@ -108,17 +113,70 @@ def get_rtsp_url(rtsp_config):
        rtsp_config['password'], rtsp_config['host'], rtsp_config['port'],
        rtsp_config['path'])

+def compute_sizes(frame_shape, known_sizes, mask):
+    # create a 3 dimensional numpy array to store estimated sizes
+    estimated_sizes = np.zeros((frame_shape[0], frame_shape[1], 2), np.uint32)
+
+    sorted_positions = sorted(known_sizes, key=lambda s: s['y'])
+
+    last_position = {'y': 0, 'min': 0, 'max': 0}
+    next_position = sorted_positions.pop(0)
+    # if the next position has the same y coordinate, skip
+    while next_position['y'] == last_position['y']:
+        next_position = sorted_positions.pop(0)
+    y_change = next_position['y']-last_position['y']
+    min_size_change = next_position['min']-last_position['min']
+    max_size_change = next_position['max']-last_position['max']
+    min_step_size = min_size_change/y_change
+    max_step_size = max_size_change/y_change
+
+    min_current_size = 0
+    max_current_size = 0
+
+    for y_position in range(frame_shape[0]):
+        # fill the row with the estimated size
+        estimated_sizes[y_position,:] = [min_current_size, max_current_size]
+
+        # if you have reached the next size
+        if y_position == next_position['y']:
+            last_position = next_position
+            # if there are still positions left
+            if len(sorted_positions) > 0:
+                next_position = sorted_positions.pop(0)
+                # if the next position has the same y coordinate, skip
+                while next_position['y'] == last_position['y']:
+                    next_position = sorted_positions.pop(0)
+                y_change = next_position['y']-last_position['y']
+                min_size_change = next_position['min']-last_position['min']
+                max_size_change = next_position['max']-last_position['max']
+                min_step_size = min_size_change/y_change
+                max_step_size = max_size_change/y_change
+            else:
+                min_step_size = 0
+                max_step_size = 0
+        
+        min_current_size += min_step_size
+        max_current_size += max_step_size
+
+    # apply mask by filling 0s for all locations a person could not be standing
+    if mask is not None:
+        pass
+
+    return estimated_sizes
+
 class Camera:
-    def __init__(self, name, config, prepped_frame_queue, mqtt_client, mqtt_prefix):
+    def __init__(self, name, config, prepped_frame_queue, mqtt_client, mqtt_prefix, debug=False):
        self.name = name
        self.config = config
        self.detected_objects = []
        self.recent_frames = {}
        self.rtsp_url = get_rtsp_url(self.config['rtsp'])
+        self.take_frame = self.config.get('take_frame', 1)
        self.regions = self.config['regions']
        self.frame_shape = get_frame_shape(self.rtsp_url)
        self.mqtt_client = mqtt_client
        self.mqtt_topic_prefix = '{}/{}'.format(mqtt_prefix, self.name)
+        self.debug = debug

        # compute the flattened array length from the shape of the frame
        flat_array_length = self.frame_shape[0] * self.frame_shape[1] * self.frame_shape[2]
@@ -138,7 +196,8 @@ class Camera:

        # create the process to capture frames from the RTSP stream and store in a shared array
        self.capture_process = mp.Process(target=fetch_frames, args=(self.shared_frame_array, 
-            self.shared_frame_time, self.frame_lock, self.frame_ready, self.frame_shape, self.rtsp_url))
+            self.shared_frame_time, self.frame_lock, self.frame_ready, self.frame_shape,
+            self.rtsp_url, self.take_frame))
        self.capture_process.daemon = True

        # for each region, create a separate thread to resize the region and prep for detection
@@ -170,6 +229,20 @@ class Camera:
        # start a thread to publish object scores (currently only person)
        mqtt_publisher = MqttObjectPublisher(self.mqtt_client, self.mqtt_topic_prefix, self.objects_parsed, self.detected_objects)
        mqtt_publisher.start()
+
+        # load in the mask for person detection
+        if 'mask' in self.config:
+            self.mask = cv2.imread("/config/{}".format(self.config['mask']), cv2.IMREAD_GRAYSCALE)
+        else:
+            self.mask = np.zeros((self.frame_shape[0], self.frame_shape[1], 1), np.uint8)
+            self.mask[:] = 255
+
+        # pre-compute estimated person size for every pixel in the image
+        if 'known_sizes' in self.config:
+            self.calculated_person_sizes = compute_sizes((self.frame_shape[0], self.frame_shape[1]), 
+                self.config['known_sizes'], None)
+        else:
+            self.calculated_person_sizes = None
    
    def start(self):
        self.capture_process.start()
@@ -188,23 +261,27 @@ class Camera:
            return

        for obj in objects:
-            if obj['name'] == 'person':
-                person_area = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
-                # find the matching region
-                region = None
-                for r in self.regions:
-                    if (
-                            obj['xmin'] >= r['x_offset'] and
-                            obj['ymin'] >= r['y_offset'] and
-                            obj['xmax'] <= r['x_offset']+r['size'] and
-                            obj['ymax'] <= r['y_offset']+r['size']
-                        ): 
-                        region = r
-                        break
-                
-                # if the min person area is larger than the
-                # detected person, don't add it to detected objects
-                if region and region['min_person_area'] > person_area:
+            if self.debug:
+                # print out the detected objects, scores and locations
+                print(self.name, obj['name'], obj['score'], obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'])
+            
+            location = (int(obj['ymax']), int((obj['xmax']-obj['xmin'])/2))
+
+            # if the person is in a masked location, continue
+            if self.mask[location[0]][location[1]] == [0]:
+                continue
+
+            if self.calculated_person_sizes is not None and obj['name'] == 'person':
+                person_size_range = self.calculated_person_sizes[location[0]][location[1]]
+
+                # if the person isnt on the ground, continue
+                if(person_size_range[0] == 0 and person_size_range[1] == 0):
+                    continue
+
+                person_size = (obj['xmax']-obj['xmin'])*(obj['ymax']-obj['ymin'])
+
+                # if the person is not within 20% of the estimated size for that location, continue
+                if person_size < person_size_range[0] or person_size > person_size_range[1]:
                    continue

            self.detected_objects.append(obj)
Author	SHA1	Message	Date
blakeblackshear	a7d68a4998	increase queue size and add ability to take every nth frame	2019-04-19 08:23:07 -05:00
blakeblackshear	03e46efcdd	add back queue full message	2019-04-19 06:37:29 -05:00
blakeblackshear	27e39edd65	add location masking for detected objects	2019-04-14 11:58:33 -05:00
blakeblackshear	4f829e818e	implement person filtering with min/max by y position	2019-04-14 11:28:50 -05:00