Improve 640x640 model detection of small objects (#20190)

* Allow larger models to have smaller regions * remove unnecessary hailo resize * Update benchmark * Fix table * Update nvidia specs
2025-09-26 19:41:29 +08:00 · 2025-09-23 14:49:54 -06:00
parent 2f99a17e64
commit 7e2f5a3017
3 changed files with 26 additions and 16 deletions
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@@ -180,10 +180,10 @@ Inference speeds will vary greatly depending on the GPU and the model used.
 ✅ - Accelerated with CUDA Graphs
 ❌ - Not accelerated with CUDA Graphs

-| Name            | ✅ YOLOv9 Inference Time  | ✅ RF-DETR Inference Time | ❌ YOLO-NAS Inference Time 
-| --------------- | ------------------------ | ------------------------- | -------------------------- |
-| RTX 3050        | t-320: 8 ms s-320: 10 ms | Nano-320: ~ 12 ms         | 320: ~ 10 ms 640: ~ 16 ms  |
-| RTX 3070        | t-320: 6 ms s-320: 8 ms  | Nano-320: ~ 9 ms          | 320: ~ 8 ms 640: ~ 14 ms   |
+| Name            | ✅ YOLOv9 Inference Time  | ✅ RF-DETR Inference Time | ❌ YOLO-NAS Inference Time |
+| --------------- | ------------------------------------- | ------------------------- | --------------------------- |
+| RTX 3050        | t-320: 8 ms s-320: 10 ms s-640: 28 ms | Nano-320: ~ 12 ms         | 320: ~ 10 ms 640: ~ 16 ms   |
+| RTX 3070        | t-320: 6 ms s-320: 8 ms s-640: 25 ms  | Nano-320: ~ 9 ms          | 320: ~ 8 ms 640: ~ 14 ms    |
 | RTX A4000       |                                       |                           | 320: ~ 15 ms                |
 | Tesla P40       |                                       |                           | 320: ~ 105 ms               |

@@ -198,8 +198,9 @@ Apple Silicon can not run within a container, so a ZMQ proxy is utilized to comm
 :::

 | Name      | YOLOv9 Inference Time                |
-| --------- | ---------------------- |
-| M3 Pro    | t-320: 6 ms s-320: 8ms |
+| --------- | ------------------------------------ |
+| M4        | s-20: 10 ms                          |
+| M3 Pro    | t-320: 6 ms s-320: 8 ms s-640: 20 ms |
 | M1        | s-320: 9ms                           |

 ### ROCm - AMD GPU
--- a/frigate/detectors/plugins/hailo8l.py
+++ b/frigate/detectors/plugins/hailo8l.py
@@ -33,10 +33,6 @@ def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarr
        image = image[0]

    h, w = image.shape[:2]
-
-    if (w, h) == (320, 320) and (model_w, model_h) == (640, 640):
-        return cv2.resize(image, (model_w, model_h), interpolation=cv2.INTER_LINEAR)
-
    scale = min(model_w / w, model_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
--- a/frigate/util/object.py
+++ b/frigate/util/object.py
@@ -269,7 +269,20 @@ def is_object_filtered(obj, objects_to_track, object_filters):

 def get_min_region_size(model_config: ModelConfig) -> int:
    """Get the min region size."""
-    return max(model_config.height, model_config.width)
+    largest_dimension = max(model_config.height, model_config.width)
+
+    if largest_dimension > 320:
+        # We originally tested allowing any model to have a region down to half of the model size
+        # but this led to many false positives. In this case we specifically target larger models
+        # which can benefit from a smaller region in some cases to detect smaller objects.
+        half = int(largest_dimension / 2)
+
+        if half % 4 == 0:
+            return half
+
+        return int((half + 3) / 4) * 4
+
+    return largest_dimension


 def create_tensor_input(frame, model_config: ModelConfig, region):