From 1be84d6833e7502a4bf136add48ba55ec0cfcd48 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 21 Aug 2025 05:30:14 -0600
Subject: [PATCH] Add automatic RKNN conversion and support for semantic search
 model (#19676)

* Create RKNN model runner and and use for jina v1 clip

* Formatting

* Handle model type inference

* Properly provide input to RKNN

* Adjust rknn conversion

* Update docs

* Formatting

* Fix path handling

* Handle inputs

* Cleanup

* Change normalization for better accuracy

* Clarify supported models

* Remove testing
---
 .../hardware_acceleration_enrichments.md      |   7 +-
 docs/docs/configuration/semantic_search.md    |   4 +-
 frigate/embeddings/onnx/runner.py             | 162 +++++++++++++++++-
 frigate/util/rknn_converter.py                |  83 +++++++--
 4 files changed, 233 insertions(+), 23 deletions(-)

diff --git a/docs/docs/configuration/hardware_acceleration_enrichments.md b/docs/docs/configuration/hardware_acceleration_enrichments.md
index 1f894d345..552dbd2e7 100644
--- a/docs/docs/configuration/hardware_acceleration_enrichments.md
+++ b/docs/docs/configuration/hardware_acceleration_enrichments.md
@@ -5,11 +5,11 @@ title: Enrichments
 
 # Enrichments
 
-Some of Frigate's enrichments can use a discrete GPU for accelerated processing.
+Some of Frigate's enrichments can use a discrete GPU / NPU for accelerated processing.
 
 ## Requirements
 
-Object detection and enrichments (like Semantic Search, Face Recognition, and License Plate Recognition) are independent features. To use a GPU for object detection, see the [Object Detectors](/configuration/object_detectors.md) documentation. If you want to use your GPU for any supported enrichments, you must choose the appropriate Frigate Docker image for your GPU and configure the enrichment according to its specific documentation.
+Object detection and enrichments (like Semantic Search, Face Recognition, and License Plate Recognition) are independent features. To use a GPU / NPU for object detection, see the [Object Detectors](/configuration/object_detectors.md) documentation. If you want to use your GPU for any supported enrichments, you must choose the appropriate Frigate Docker image for your GPU / NPU and configure the enrichment according to its specific documentation.
 
 - **AMD**
 
@@ -23,6 +23,9 @@ Object detection and enrichments (like Semantic Search, Face Recognition, and Li
   - Nvidia GPUs will automatically be detected and used for enrichments in the `-tensorrt` Frigate image.
   - Jetson devices will automatically be detected and used for enrichments in the `-tensorrt-jp6` Frigate image.
 
+- **RockChip**
+  - RockChip NPU will automatically be detected and used for semantic search (v1 only) in the `-rk` Frigate image.
+
 Utilizing a GPU for enrichments does not require you to use the same GPU for object detection. For example, you can run the `tensorrt` Docker image for enrichments and still use other dedicated hardware like a Coral or Hailo for object detection. However, one combination that is not supported is TensorRT for object detection and OpenVINO for enrichments.
 
 :::note
diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index e04df3a56..558088646 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -78,7 +78,7 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
 
 ### GPU Acceleration
 
-The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
+The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU / NPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
 
 ```yaml
 semantic_search:
@@ -90,7 +90,7 @@ semantic_search:
 
 :::info
 
-If the correct build is used for your GPU and the `large` model is configured, then the GPU will be detected and used automatically. 
+If the correct build is used for your GPU / NPU and the `large` model is configured, then the GPU / NPU will be detected and used automatically. 
 Specify the `device` option to target a specific GPU in a multi-GPU system (see [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)). 
 If you do not specify a device, the first available GPU will be used.
 
diff --git a/frigate/embeddings/onnx/runner.py b/frigate/embeddings/onnx/runner.py
index c34c97a8d..3a2acc7f6 100644
--- a/frigate/embeddings/onnx/runner.py
+++ b/frigate/embeddings/onnx/runner.py
@@ -4,10 +4,12 @@ import logging
 import os.path
 from typing import Any
 
+import numpy as np
 import onnxruntime as ort
 
 from frigate.const import MODEL_CACHE_DIR
 from frigate.util.model import get_ort_providers
+from frigate.util.rknn_converter import auto_convert_model, is_rknn_compatible
 
 try:
     import openvino as ov
@@ -25,7 +27,33 @@ class ONNXModelRunner:
         self.model_path = model_path
         self.ort: ort.InferenceSession = None
         self.ov: ov.Core = None
-        providers, options = get_ort_providers(device == "CPU", device, requires_fp16)
+        self.rknn = None
+        self.type = "ort"
+
+        try:
+            if device != "CPU" and is_rknn_compatible(model_path):
+                # Try to auto-convert to RKNN format
+                rknn_path = auto_convert_model(model_path)
+                if rknn_path:
+                    try:
+                        self.rknn = RKNNModelRunner(rknn_path, device)
+                        self.type = "rknn"
+                        logger.info(f"Using RKNN model: {rknn_path}")
+                        return
+                    except Exception as e:
+                        logger.debug(
+                            f"Failed to load RKNN model, falling back to ONNX: {e}"
+                        )
+                        self.rknn = None
+        except ImportError:
+            pass
+
+        # Fall back to standard ONNX providers
+        providers, options = get_ort_providers(
+            device == "CPU",
+            device,
+            requires_fp16,
+        )
         self.interpreter = None
 
         if "OpenVINOExecutionProvider" in providers:
@@ -55,7 +83,9 @@ class ONNXModelRunner:
             )
 
     def get_input_names(self) -> list[str]:
-        if self.type == "ov":
+        if self.type == "rknn":
+            return self.rknn.get_input_names()
+        elif self.type == "ov":
             input_names = []
 
             for input in self.interpreter.inputs:
@@ -67,7 +97,9 @@ class ONNXModelRunner:
 
     def get_input_width(self):
         """Get the input width of the model regardless of backend."""
-        if self.type == "ort":
+        if self.type == "rknn":
+            return self.rknn.get_input_width()
+        elif self.type == "ort":
             return self.ort.get_inputs()[0].shape[3]
         elif self.type == "ov":
             input_info = self.interpreter.inputs
@@ -90,8 +122,10 @@ class ONNXModelRunner:
                     return -1
         return -1
 
-    def run(self, input: dict[str, Any]) -> Any:
-        if self.type == "ov":
+    def run(self, input: dict[str, Any]) -> Any | None:
+        if self.type == "rknn":
+            return self.rknn.run(input)
+        elif self.type == "ov":
             infer_request = self.interpreter.create_infer_request()
 
             try:
@@ -107,3 +141,121 @@ class ONNXModelRunner:
             return outputs
         elif self.type == "ort":
             return self.ort.run(None, input)
+
+
+class RKNNModelRunner:
+    """Run RKNN models for embeddings."""
+
+    def __init__(self, model_path: str, device: str = "AUTO", model_type: str = None):
+        self.model_path = model_path
+        self.device = device
+        self.model_type = model_type
+        self.rknn = None
+        self._load_model()
+
+    def _load_model(self):
+        """Load the RKNN model."""
+        try:
+            from rknnlite.api import RKNNLite
+
+            self.rknn = RKNNLite(verbose=False)
+
+            if self.rknn.load_rknn(self.model_path) != 0:
+                logger.error(f"Failed to load RKNN model: {self.model_path}")
+                raise RuntimeError("Failed to load RKNN model")
+
+            if self.rknn.init_runtime() != 0:
+                logger.error("Failed to initialize RKNN runtime")
+                raise RuntimeError("Failed to initialize RKNN runtime")
+
+            logger.info(f"Successfully loaded RKNN model: {self.model_path}")
+
+        except ImportError:
+            logger.error("RKNN Lite not available")
+            raise ImportError("RKNN Lite not available")
+        except Exception as e:
+            logger.error(f"Error loading RKNN model: {e}")
+            raise
+
+    def get_input_names(self) -> list[str]:
+        """Get input names for the model."""
+        # For CLIP models, we need to determine the model type from the path
+        model_name = os.path.basename(self.model_path).lower()
+
+        if "vision" in model_name:
+            return ["pixel_values"]
+        else:
+            # Default fallback - try to infer from model type
+            if self.model_type and "jina-clip" in self.model_type:
+                if "vision" in self.model_type:
+                    return ["pixel_values"]
+
+            # Generic fallback
+            return ["input"]
+
+    def get_input_width(self) -> int:
+        """Get the input width of the model."""
+        # For CLIP vision models, this is typically 224
+        model_name = os.path.basename(self.model_path).lower()
+        if "vision" in model_name:
+            return 224  # CLIP V1 uses 224x224
+        return -1
+
+    def run(self, inputs: dict[str, Any]) -> Any:
+        """Run inference with the RKNN model."""
+        if not self.rknn:
+            raise RuntimeError("RKNN model not loaded")
+
+        try:
+            input_names = self.get_input_names()
+            rknn_inputs = []
+
+            for name in input_names:
+                if name in inputs:
+                    if name == "pixel_values":
+                        # RKNN expects NHWC format, but ONNX typically provides NCHW
+                        # Transpose from [batch, channels, height, width] to [batch, height, width, channels]
+                        pixel_data = inputs[name]
+                        if len(pixel_data.shape) == 4 and pixel_data.shape[1] == 3:
+                            # Transpose from NCHW to NHWC
+                            pixel_data = np.transpose(pixel_data, (0, 2, 3, 1))
+                        rknn_inputs.append(pixel_data)
+                    else:
+                        rknn_inputs.append(inputs[name])
+                else:
+                    logger.warning(f"Input '{name}' not found in inputs, using default")
+
+                    if name == "pixel_values":
+                        batch_size = 1
+                        if inputs:
+                            for val in inputs.values():
+                                if hasattr(val, "shape") and len(val.shape) > 0:
+                                    batch_size = val.shape[0]
+                                    break
+                        # Create default in NHWC format as expected by RKNN
+                        rknn_inputs.append(
+                            np.zeros((batch_size, 224, 224, 3), dtype=np.float32)
+                        )
+                    else:
+                        batch_size = 1
+                        if inputs:
+                            for val in inputs.values():
+                                if hasattr(val, "shape") and len(val.shape) > 0:
+                                    batch_size = val.shape[0]
+                                    break
+                        rknn_inputs.append(np.zeros((batch_size, 1), dtype=np.float32))
+
+            outputs = self.rknn.inference(inputs=rknn_inputs)
+            return outputs
+
+        except Exception as e:
+            logger.error(f"Error during RKNN inference: {e}")
+            raise
+
+    def __del__(self):
+        """Cleanup when the runner is destroyed."""
+        if self.rknn:
+            try:
+                self.rknn.release()
+            except Exception:
+                pass
diff --git a/frigate/util/rknn_converter.py b/frigate/util/rknn_converter.py
index a6e70c3cb..e42547320 100644
--- a/frigate/util/rknn_converter.py
+++ b/frigate/util/rknn_converter.py
@@ -27,9 +27,50 @@ MODEL_TYPE_CONFIGS = {
         "std_values": [[255, 255, 255]],
         "target_platform": None,  # Will be set dynamically
     },
+    "jina-clip-v1-vision": {
+        "mean_values": [[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255]],
+        "std_values": [[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255]],
+        "target_platform": None,  # Will be set dynamically
+    },
 }
 
 
+def get_rknn_model_type(model_path: str) -> str | None:
+    if all(keyword in str(model_path) for keyword in ["jina-clip-v1", "vision"]):
+        return "jina-clip-v1-vision"
+
+    model_name = os.path.basename(str(model_path)).lower()
+
+    if any(keyword in model_name for keyword in ["yolo", "yolox", "yolonas"]):
+        return model_name
+
+    return None
+
+
+def is_rknn_compatible(model_path: str, model_type: str | None = None) -> bool:
+    """
+    Check if a model is compatible with RKNN conversion.
+
+    Args:
+        model_path: Path to the model file
+        model_type: Type of the model (if known)
+
+    Returns:
+        True if the model is RKNN-compatible, False otherwise
+    """
+    soc = get_soc_type()
+    if soc is None:
+        return False
+
+    if not model_type:
+        model_type = get_rknn_model_type(model_path)
+
+    if model_type and model_type in MODEL_TYPE_CONFIGS:
+        return True
+
+    return False
+
+
 def ensure_torch_dependencies() -> bool:
     """Dynamically install torch dependencies if not available."""
     try:
@@ -67,13 +108,12 @@ def ensure_torch_dependencies() -> bool:
 def ensure_rknn_toolkit() -> bool:
     """Ensure RKNN toolkit is available."""
     try:
-        import rknn  # type: ignore  # noqa: F401
         from rknn.api import RKNN  # type: ignore # noqa: F401
 
         logger.debug("RKNN toolkit is already available")
         return True
-    except ImportError:
-        logger.error("RKNN toolkit not found. Please ensure it's installed.")
+    except ImportError as e:
+        logger.error(f"RKNN toolkit not found. Please ensure it's installed. {e}")
         return False
 
 
@@ -109,11 +149,11 @@ def convert_onnx_to_rknn(
         True if conversion successful, False otherwise
     """
     if not ensure_torch_dependencies():
-        logger.error("PyTorch dependencies not available")
+        logger.debug("PyTorch dependencies not available")
         return False
 
     if not ensure_rknn_toolkit():
-        logger.error("RKNN toolkit not available")
+        logger.debug("RKNN toolkit not available")
         return False
 
     # Get SoC type if not provided
@@ -125,7 +165,7 @@ def convert_onnx_to_rknn(
 
     # Get model config for the specified type
     if model_type not in MODEL_TYPE_CONFIGS:
-        logger.error(f"Unsupported model type: {model_type}")
+        logger.debug(f"Unsupported model type: {model_type}")
         return False
 
     config = MODEL_TYPE_CONFIGS[model_type].copy()
@@ -138,7 +178,16 @@ def convert_onnx_to_rknn(
         rknn = RKNN(verbose=True)
         rknn.config(**config)
 
-        if rknn.load_onnx(model=onnx_path) != 0:
+        if model_type == "jina-clip-v1-vision":
+            load_output = rknn.load_onnx(
+                model=onnx_path,
+                inputs=["pixel_values"],
+                input_size_list=[[1, 3, 224, 224]],
+            )
+        else:
+            load_output = rknn.load_onnx(model=onnx_path)
+
+        if load_output != 0:
             logger.error("Failed to load ONNX model")
             return False
 
@@ -265,7 +314,7 @@ def is_lock_stale(lock_file_path: Path, max_age: int = 600) -> bool:
 
 
 def wait_for_conversion_completion(
-    rknn_path: Path, lock_file_path: Path, timeout: int = 300
+    model_type: str, rknn_path: Path, lock_file_path: Path, timeout: int = 300
 ) -> bool:
     """
     Wait for another process to complete the conversion.
@@ -307,7 +356,7 @@ def wait_for_conversion_completion(
                     # Check if RKNN file appeared while waiting
                     if rknn_path.exists():
                         logger.info(f"RKNN model appeared while waiting: {rknn_path}")
-                        return str(rknn_path)
+                        return True
 
                     # Convert ONNX to RKNN
                     logger.info(
@@ -320,12 +369,12 @@ def wait_for_conversion_completion(
 
                     if onnx_path.exists():
                         if convert_onnx_to_rknn(
-                            str(onnx_path), str(rknn_path), "yolo-generic", False
+                            str(onnx_path), str(rknn_path), model_type, False
                         ):
-                            return str(rknn_path)
+                            return True
 
                     logger.error("Failed to convert model after stale lock cleanup")
-                    return None
+                    return False
 
                 finally:
                     release_conversion_lock(lock_file_path)
@@ -338,7 +387,7 @@ def wait_for_conversion_completion(
 
 
 def auto_convert_model(
-    model_path: str, model_type: str, quantization: bool = False
+    model_path: str, model_type: str | None = None, quantization: bool = False
 ) -> Optional[str]:
     """
     Automatically convert a model to RKNN format if needed.
@@ -377,6 +426,9 @@ def auto_convert_model(
                 logger.info(f"Converting {model_path} to RKNN format...")
                 rknn_path.parent.mkdir(parents=True, exist_ok=True)
 
+                if not model_type:
+                    model_type = get_rknn_model_type(base_path)
+
                 if convert_onnx_to_rknn(
                     str(base_path), str(rknn_path), model_type, quantization
                 ):
@@ -392,7 +444,10 @@ def auto_convert_model(
                 f"Another process is converting {model_path}, waiting for completion..."
             )
 
-            if wait_for_conversion_completion(rknn_path, lock_file_path):
+            if not model_type:
+                model_type = get_rknn_model_type(base_path)
+
+            if wait_for_conversion_completion(model_type, rknn_path, lock_file_path):
                 return str(rknn_path)
             else:
                 logger.error(f"Timeout waiting for conversion of {model_path}")