diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index ea387625f..aa3e4044e 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -180,12 +180,12 @@ Inference speeds will vary greatly depending on the GPU and the model used. ✅ - Accelerated with CUDA Graphs ❌ - Not accelerated with CUDA Graphs -| Name | ✅ YOLOv9 Inference Time | ✅ RF-DETR Inference Time | ❌ YOLO-NAS Inference Time -| --------------- | ------------------------ | ------------------------- | -------------------------- | -| RTX 3050 | t-320: 8 ms s-320: 10 ms | Nano-320: ~ 12 ms | 320: ~ 10 ms 640: ~ 16 ms | -| RTX 3070 | t-320: 6 ms s-320: 8 ms | Nano-320: ~ 9 ms | 320: ~ 8 ms 640: ~ 14 ms | -| RTX A4000 | | | 320: ~ 15 ms | -| Tesla P40 | | | 320: ~ 105 ms | +| Name | ✅ YOLOv9 Inference Time | ✅ RF-DETR Inference Time | ❌ YOLO-NAS Inference Time | +| --------------- | ------------------------------------- | ------------------------- | --------------------------- | +| RTX 3050 | t-320: 8 ms s-320: 10 ms s-640: 28 ms | Nano-320: ~ 12 ms | 320: ~ 10 ms 640: ~ 16 ms | +| RTX 3070 | t-320: 6 ms s-320: 8 ms s-640: 25 ms | Nano-320: ~ 9 ms | 320: ~ 8 ms 640: ~ 14 ms | +| RTX A4000 | | | 320: ~ 15 ms | +| Tesla P40 | | | 320: ~ 105 ms | ### Apple Silicon @@ -197,10 +197,11 @@ Apple Silicon can not run within a container, so a ZMQ proxy is utilized to comm ::: -| Name | YOLOv9 Inference Time | -| --------- | ---------------------- | -| M3 Pro | t-320: 6 ms s-320: 8ms | -| M1 | s-320: 9ms | +| Name | YOLOv9 Inference Time | +| --------- | ------------------------------------ | +| M4 | s-20: 10 ms | +| M3 Pro | t-320: 6 ms s-320: 8 ms s-640: 20 ms | +| M1 | s-320: 9ms | ### ROCm - AMD GPU @@ -234,7 +235,7 @@ The MX3 is a pipelined architecture, where the maximum frames per second support | YOLOv9s | 640 | ~ 41 ms | ~ 110 | | YOLOX-Small | 640 | ~ 16 ms | ~ 263 | | SSDlite MobileNet v2 | 320 | ~ 5 ms | ~ 1056 | - + Inference speeds may vary depending on the host platform. The above data was measured on an **Intel 13700 CPU**. Platforms like Raspberry Pi, Orange Pi, and other ARM-based SBCs have different levels of processing capability, which may limit total FPS. ### Nvidia Jetson diff --git a/frigate/detectors/plugins/hailo8l.py b/frigate/detectors/plugins/hailo8l.py index aa856dd80..cafc809c9 100755 --- a/frigate/detectors/plugins/hailo8l.py +++ b/frigate/detectors/plugins/hailo8l.py @@ -33,10 +33,6 @@ def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarr image = image[0] h, w = image.shape[:2] - - if (w, h) == (320, 320) and (model_w, model_h) == (640, 640): - return cv2.resize(image, (model_w, model_h), interpolation=cv2.INTER_LINEAR) - scale = min(model_w / w, model_h / h) new_w, new_h = int(w * scale), int(h * scale) resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC) diff --git a/frigate/util/object.py b/frigate/util/object.py index d9a8c2f71..905745da6 100644 --- a/frigate/util/object.py +++ b/frigate/util/object.py @@ -269,7 +269,20 @@ def is_object_filtered(obj, objects_to_track, object_filters): def get_min_region_size(model_config: ModelConfig) -> int: """Get the min region size.""" - return max(model_config.height, model_config.width) + largest_dimension = max(model_config.height, model_config.width) + + if largest_dimension > 320: + # We originally tested allowing any model to have a region down to half of the model size + # but this led to many false positives. In this case we specifically target larger models + # which can benefit from a smaller region in some cases to detect smaller objects. + half = int(largest_dimension / 2) + + if half % 4 == 0: + return half + + return int((half + 3) / 4) * 4 + + return largest_dimension def create_tensor_input(frame, model_config: ModelConfig, region):