Cleanup detection (#17785)

* Fix yolov9 NMS * Improve batched yolo NMS * Consolidate grids and strides calculation * Use existing variable * Remove * Ensure init is called
2025-10-30 11:46:21 +08:00 · 2025-04-18 10:26:34 -06:00
parent 14a32a6472
commit 68382d89b4
5 changed files with 116 additions and 121 deletions
--- a/frigate/detectors/detection_api.py
+++ b/frigate/detectors/detection_api.py
@@ -16,7 +16,7 @@ class DetectionApi(ABC):
    @abstractmethod
    def __init__(self, detector_config: BaseDetectorConfig):
        self.detector_config = detector_config
-        self.thresh = 0.5
+        self.thresh = 0.4
        self.height = detector_config.model.height
        self.width = detector_config.model.width

@@ -24,58 +24,21 @@ class DetectionApi(ABC):
    def detect_raw(self, tensor_input):
        pass

-    def post_process_yolonas(self, output):
-        """
-        @param output: output of inference
-        expected shape: [np.array(1, N, 4), np.array(1, N, 80)]
-        where N depends on the input size e.g. N=2100 for 320x320 images
+    def calculate_grids_strides(self) -> None:
+        grids = []
+        expanded_strides = []

-        @return: best results: np.array(20, 6) where each row is
-        in this order (class_id, score, y1/height, x1/width, y2/height, x2/width)
-        """
+        # decode and orient predictions
+        strides = [8, 16, 32]
+        hsizes = [self.height // stride for stride in strides]
+        wsizes = [self.width // stride for stride in strides]

-        N = output[0].shape[1]
+        for hsize, wsize, stride in zip(hsizes, wsizes, strides):
+            xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
+            grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+            grids.append(grid)
+            shape = grid.shape[:2]
+            expanded_strides.append(np.full((*shape, 1), stride))

-        boxes = output[0].reshape(N, 4)
-        scores = output[1].reshape(N, 80)
-
-        class_ids = np.argmax(scores, axis=1)
-        scores = scores[np.arange(N), class_ids]
-
-        args_best = np.argwhere(scores > self.thresh)[:, 0]
-
-        num_matches = len(args_best)
-        if num_matches == 0:
-            return np.zeros((20, 6), np.float32)
-        elif num_matches > 20:
-            args_best20 = np.argpartition(scores[args_best], -20)[-20:]
-            args_best = args_best[args_best20]
-
-        boxes = boxes[args_best]
-        class_ids = class_ids[args_best]
-        scores = scores[args_best]
-
-        boxes = np.transpose(
-            np.vstack(
-                (
-                    boxes[:, 1] / self.height,
-                    boxes[:, 0] / self.width,
-                    boxes[:, 3] / self.height,
-                    boxes[:, 2] / self.width,
-                )
-            )
-        )
-
-        results = np.hstack(
-            (class_ids[..., np.newaxis], scores[..., np.newaxis], boxes)
-        )
-
-        return np.resize(results, (20, 6))
-
-    def post_process(self, output):
-        if self.detector_config.model.model_type == ModelTypeEnum.yolonas:
-            return self.post_process_yolonas(output)
-        else:
-            raise ValueError(
-                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
-            )
+        self.grids = np.concatenate(grids, 1)
+        self.expanded_strides = np.concatenate(expanded_strides, 1)