mirror of
https://github.com/swdee/go-rknnlite.git
synced 2025-12-24 10:30:56 +08:00
Merge pull request #45 from swdee/midas
Added Midas depth estimation demo
This commit is contained in:
@@ -104,7 +104,8 @@ See the [example](example) directory.
|
||||
* [HTTP Stream with ByteTrack Tracking](example/stream) - Demo that streams a video over HTTP with YOLO object detection and ByteTrack object tracking.
|
||||
* Slicing Aided Hyper Inference
|
||||
* [SAHI YOLO Demo](example/sahi) - YOLO Object detection using SAHI on a 4k image with Pooled inferencing.
|
||||
|
||||
* Depth Estimation
|
||||
* [MiDaS v3.1 Demo](example/midas)
|
||||
|
||||
|
||||
## Converting Inference Models
|
||||
|
||||
92
example/midas/README.md
Normal file
92
example/midas/README.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# MiDaS Depth Estimation Example
|
||||
|
||||
## Overview
|
||||
|
||||
This example uses the [MiDaS v3.1 depth estimation](https://github.com/isl-org/MiDaS/)
|
||||
for computing depth in a single image.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
Make sure you have downloaded the data files first for the examples.
|
||||
You only need to do this once for all examples.
|
||||
|
||||
```
|
||||
cd example/
|
||||
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
|
||||
```
|
||||
|
||||

|
||||
|
||||
Run the MiDaS example on the above living room scene on rk3588 or replace with your Platform model.
|
||||
```
|
||||
cd example/midas
|
||||
go run midas.go -p rk3588
|
||||
```
|
||||
|
||||
This will result in the output of:
|
||||
```
|
||||
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
|
||||
Model Input Number: 1, Output Number: 1
|
||||
Input tensors:
|
||||
index=0, name=input, n_dims=4, dims=[1, 256, 256, 3], n_elems=196608, size=196608, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=0, scale=0.007843
|
||||
Output tensors:
|
||||
index=0, name=depth, n_dims=4, dims=[1, 1, 256, 256], n_elems=65536, size=65536, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=19.864582
|
||||
Model first run speed: inference=577.442314ms, post processing=1.180646ms, rendering=3.137693ms, total time=581.760653ms
|
||||
Saved depth map result to ../data/bedroom-out.jpg
|
||||
Benchmark time=12.167970519s, count=20, average total time=608.398525ms
|
||||
done
|
||||
```
|
||||
|
||||
The saved JPG image with depth estimation map.
|
||||
|
||||

|
||||
|
||||
|
||||
See the help for command line parameters.
|
||||
```
|
||||
$ go run midas.go -h
|
||||
|
||||
Usage of /tmp/go-build2937772053/b001/exe/midas:
|
||||
-i string
|
||||
Image file to run depth estimation on (default "../data/bedroom.jpg")
|
||||
-m string
|
||||
RKNN compiled depth model file (default "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn")
|
||||
-o string
|
||||
Output JPG file (depth visualization) (default "../data/bedroom-out.jpg")
|
||||
-p string
|
||||
Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588] (default "rk3588")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
### Docker
|
||||
|
||||
To run the MiDaS example using the prebuilt docker image, make sure the data files have been downloaded first,
|
||||
then run.
|
||||
```
|
||||
# from project root directory
|
||||
|
||||
docker run --rm \
|
||||
--device /dev/dri:/dev/dri \
|
||||
-v "$(pwd):/go/src/app" \
|
||||
-v "$(pwd)/example/data:/go/src/data" \
|
||||
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
|
||||
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
|
||||
-w /go/src/app \
|
||||
swdee/go-rknnlite:latest \
|
||||
go run ./example/midas/midas.go -p rk3588
|
||||
```
|
||||
|
||||
|
||||
## Benchmarks
|
||||
|
||||
The following table shows a comparison of the benchmark results across the three distinct platforms.
|
||||
|
||||
| Platform | Execution Time | Average Inference Time Per Image |
|
||||
|----------|----------------|----------------------------------|
|
||||
| rk3588 | 12.16s | 608.39ms |
|
||||
| rk3576 | 16.85s | 842.97ms |
|
||||
| rk3566 | 37.49s | 1.87s
|
||||
|
||||
189
example/midas/midas.go
Normal file
189
example/midas/midas.go
Normal file
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
Example code showing how to perform depth estimation using a MiDaS model.
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"image"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/swdee/go-rknnlite"
|
||||
"github.com/swdee/go-rknnlite/postprocess"
|
||||
"gocv.io/x/gocv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// disable logging timestamps
|
||||
log.SetFlags(0)
|
||||
|
||||
// read in cli flags
|
||||
modelFile := flag.String("m", "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn", "RKNN compiled depth model file")
|
||||
imgFile := flag.String("i", "../data/bedroom.jpg", "Image file to run depth estimation on")
|
||||
saveFile := flag.String("o", "../data/bedroom-out.jpg", "Output JPG file (depth visualization)")
|
||||
rkPlatform := flag.String("p", "rk3588", "Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("Failed to set CPU affinity: %v\n", err)
|
||||
}
|
||||
|
||||
// check if user specified model file or if default is being used. if default
|
||||
// then pick the default platform model to use.
|
||||
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
|
||||
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
|
||||
}
|
||||
|
||||
// create rknn runtime instance
|
||||
rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error initializing RKNN runtime: ", err)
|
||||
}
|
||||
|
||||
// We want float32 outputs for easy depth visualization
|
||||
rt.SetWantFloat(true)
|
||||
|
||||
// optional querying of model file tensors and SDK version for printing
|
||||
// to stdout. not necessary for production inference code
|
||||
err = rt.Query(os.Stdout)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error querying runtime: ", err)
|
||||
}
|
||||
|
||||
// create midas post processor
|
||||
midasProcessor := postprocess.NewMiDaS(postprocess.MiDaSDefaultParams())
|
||||
|
||||
// load image
|
||||
img := gocv.IMRead(*imgFile, gocv.IMReadColor)
|
||||
|
||||
if img.Empty() {
|
||||
log.Fatal("Error reading image from: ", *imgFile)
|
||||
}
|
||||
|
||||
// convert colorspace and resize image to input tensor size
|
||||
rgbImg := gocv.NewMat()
|
||||
gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB)
|
||||
|
||||
cropImg := rgbImg.Clone()
|
||||
scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[2]), int(rt.InputAttrs()[0].Dims[1]))
|
||||
gocv.Resize(rgbImg, &cropImg, scaleSize, 0, 0, gocv.InterpolationArea)
|
||||
|
||||
defer img.Close()
|
||||
defer rgbImg.Close()
|
||||
defer cropImg.Close()
|
||||
|
||||
start := time.Now()
|
||||
|
||||
// perform inference on image file
|
||||
outputs, err := rt.Inference([]gocv.Mat{cropImg})
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||
}
|
||||
|
||||
endInference := time.Now()
|
||||
|
||||
// post process and create depth map
|
||||
depthMap := gocv.NewMat()
|
||||
defer depthMap.Close()
|
||||
err = midasProcessor.CreateDepthMap(outputs, depthMap)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating depth map: ", err)
|
||||
}
|
||||
|
||||
endCreateMap := time.Now()
|
||||
|
||||
// resize the color map back to the original input image size
|
||||
resizedMap := gocv.NewMat()
|
||||
defer resizedMap.Close()
|
||||
gocv.Resize(depthMap, &resizedMap, image.Pt(img.Cols(), img.Rows()), 0, 0, gocv.InterpolationCubic)
|
||||
|
||||
endRendering := time.Now()
|
||||
|
||||
log.Printf("Model first run speed: inference=%s, post processing=%s, rendering=%s, total time=%s\n",
|
||||
endInference.Sub(start).String(),
|
||||
endCreateMap.Sub(endInference).String(),
|
||||
endRendering.Sub(endCreateMap).String(),
|
||||
endRendering.Sub(start).String(),
|
||||
)
|
||||
|
||||
// Save the result
|
||||
if ok := gocv.IMWrite(*saveFile, resizedMap); !ok {
|
||||
log.Fatal("Failed to save the image")
|
||||
}
|
||||
|
||||
log.Printf("Saved depth map result to %s\n", *saveFile)
|
||||
|
||||
// free outputs allocated in C memory after you have finished post processing
|
||||
err = outputs.Free()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error freeing Outputs: ", err)
|
||||
}
|
||||
|
||||
// optional code. run benchmark to get average time
|
||||
runBenchmark(rt, midasProcessor, []gocv.Mat{cropImg}, img)
|
||||
|
||||
// close runtime and release resources
|
||||
err = rt.Close()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error closing RKNN runtime: ", err)
|
||||
}
|
||||
|
||||
log.Println("done")
|
||||
}
|
||||
|
||||
func runBenchmark(rt *rknnlite.Runtime, midasProcessor *postprocess.MiDaS,
|
||||
mats []gocv.Mat, srcImg gocv.Mat) {
|
||||
|
||||
count := 20
|
||||
start := time.Now()
|
||||
|
||||
depthMap := gocv.NewMat()
|
||||
defer depthMap.Close()
|
||||
resizedMap := gocv.NewMat()
|
||||
defer resizedMap.Close()
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
// perform inference on image file
|
||||
outputs, err := rt.Inference(mats)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||
}
|
||||
|
||||
// post process
|
||||
err = midasProcessor.CreateDepthMap(outputs, depthMap)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error creating depth map: ", err)
|
||||
}
|
||||
|
||||
// resize the color map back to the original input image size
|
||||
gocv.Resize(depthMap, &resizedMap, image.Pt(srcImg.Cols(), srcImg.Rows()), 0, 0, gocv.InterpolationCubic)
|
||||
|
||||
err = outputs.Free()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("Error freeing Outputs: ", err)
|
||||
}
|
||||
}
|
||||
|
||||
end := time.Now()
|
||||
total := end.Sub(start)
|
||||
avg := total / time.Duration(count)
|
||||
|
||||
log.Printf("Benchmark time=%s, count=%d, average total time=%s\n",
|
||||
total.String(), count, avg.String(),
|
||||
)
|
||||
}
|
||||
170
postprocess/midas.go
Normal file
170
postprocess/midas.go
Normal file
@@ -0,0 +1,170 @@
|
||||
package postprocess
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/swdee/go-rknnlite"
|
||||
"gocv.io/x/gocv"
|
||||
)
|
||||
|
||||
// MiDaS defines the struct for a MiDaS depth estimation inference post processing
|
||||
type MiDaS struct {
|
||||
// Params are the depth map configuration parameters
|
||||
Params MiDaSParams
|
||||
}
|
||||
|
||||
// GrayscaleMap is used to not apply coloring to output depthmap, but to leave as grayscale
|
||||
const GrayscaleMap = gocv.ColormapTypes(9999)
|
||||
|
||||
type MiDaSParams struct {
|
||||
// Invert the depth map
|
||||
Invert bool
|
||||
// Colormap to apply to depth map, if you want it left as grayscale then
|
||||
// pass postprocess.GrayscaleMap
|
||||
Colormap gocv.ColormapTypes
|
||||
}
|
||||
|
||||
// MiDaSDefaultParams sets output depth map to non-inverting and use Hot color scheme
|
||||
func MiDaSDefaultParams() MiDaSParams {
|
||||
return MiDaSParams{
|
||||
Invert: false,
|
||||
Colormap: gocv.ColormapHot,
|
||||
}
|
||||
}
|
||||
|
||||
// NewMiDaS returns and instance of the MiDaS post processor
|
||||
func NewMiDaS(p MiDaSParams) *MiDaS {
|
||||
return &MiDaS{
|
||||
Params: p,
|
||||
}
|
||||
}
|
||||
|
||||
// CreateDepthMap converts the tensor output data into a depth estimation map image
|
||||
func (m *MiDaS) CreateDepthMap(outputs *rknnlite.Outputs, depthMat gocv.Mat) error {
|
||||
|
||||
// output tensor is in NCHW format
|
||||
// get output tensor width/height
|
||||
outH := int(outputs.OutputAttributes().DimHeights[0])
|
||||
outW := int(outputs.OutputAttributes().DimWidths[0])
|
||||
|
||||
// Convert float depth to uint8 visualization
|
||||
depthU8 := m.depthToU8(outputs.Output[0].BufFloat, outH, outW)
|
||||
|
||||
// Make a Mat from bytes
|
||||
u8Mat, err := gocv.NewMatFromBytes(outH, outW, gocv.MatTypeCV8U, depthU8)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create depth mat: %v", err)
|
||||
}
|
||||
|
||||
defer u8Mat.Close()
|
||||
|
||||
if m.Params.Colormap == GrayscaleMap {
|
||||
// no coloring
|
||||
u8Mat.CopyTo(&depthMat)
|
||||
|
||||
} else {
|
||||
// apply colormap
|
||||
gocv.ApplyColorMap(u8Mat, &depthMat, m.Params.Colormap)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// depthToU8 converts a float32 depth map into an 8-bit visualization image.
|
||||
//
|
||||
// MiDaS outputs “relative depth” values that are not bounded to [0,1] and
|
||||
// can vary per image. To visualize, we normalize the depth values to [0,255]
|
||||
// using the min/max over the whole output map.
|
||||
//
|
||||
// Output layout is row-major grayscale: out[y*w + x]
|
||||
func (m *MiDaS) depthToU8(depth []float32, h, w int) []byte {
|
||||
|
||||
total := h * w
|
||||
out := make([]byte, total)
|
||||
|
||||
// First pass: find min/max depth ignoring NaN/Inf values
|
||||
minV := float32(math.Inf(1))
|
||||
maxV := float32(math.Inf(-1))
|
||||
|
||||
for y := 0; y < h; y++ {
|
||||
for x := 0; x < w; x++ {
|
||||
// Read the depth value at (y,x) from the model output buffer
|
||||
v := m.getDepthAt(depth, y, x, h, w)
|
||||
|
||||
// Skip invalid floating-point values so they don't poison min/max
|
||||
if !m.isFinite32(v) {
|
||||
continue
|
||||
}
|
||||
|
||||
if v < minV {
|
||||
minV = v
|
||||
}
|
||||
|
||||
if v > maxV {
|
||||
maxV = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Guard against all-invalid outputs or a constant output (max==min)
|
||||
den := maxV - minV
|
||||
if !m.isFinite32(minV) || !m.isFinite32(maxV) || den <= 0 {
|
||||
// Fallback: return all zeros (black image)
|
||||
return out
|
||||
}
|
||||
|
||||
// Second pass: normalize each pixel to [0,1], optionally invert, clamp, then scale to [0,255]
|
||||
for y := 0; y < h; y++ {
|
||||
for x := 0; x < w; x++ {
|
||||
v := m.getDepthAt(depth, y, x, h, w)
|
||||
|
||||
// If this pixel is invalid, pin it to minV so it becomes black after normalization
|
||||
if !m.isFinite32(v) {
|
||||
v = minV
|
||||
}
|
||||
|
||||
// Normalize to 0..1 based on the image's min/max range
|
||||
n := (v - minV) / den
|
||||
|
||||
// Optional inversion for visualization (swap near/far appearance)
|
||||
if m.Params.Invert {
|
||||
n = 1.0 - n
|
||||
}
|
||||
|
||||
// Clamp to [0,1] to avoid overflow/underflow due to outliers or rounding
|
||||
if n < 0 {
|
||||
n = 0
|
||||
}
|
||||
if n > 1 {
|
||||
n = 1
|
||||
}
|
||||
|
||||
// Convert to uint8 grayscale
|
||||
out[y*w+x] = byte(n * 255.0)
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// getDepthAt returns the depth value at pixel coordinate (y,x) from the raw output buffer.
|
||||
// This function assumes the output tensor is laid out as NCHW
|
||||
func (m *MiDaS) getDepthAt(buf []float32, y, x, h, w int) float32 {
|
||||
|
||||
// index = ((n*C + ch)*H + y)*W + x ; n=0, ch=0
|
||||
idx := (0*h+y)*w + x
|
||||
if idx >= 0 && idx < len(buf) {
|
||||
return buf[idx]
|
||||
}
|
||||
|
||||
// Out-of-range access should never happen if h/w match the tensor dimensions
|
||||
// Returning 0 is a safe fallback to avoid panics
|
||||
return 0
|
||||
}
|
||||
|
||||
// isFinite32 returns True if v is neither NaN nor +/-Inf
|
||||
func (m *MiDaS) isFinite32(v float32) bool {
|
||||
return !math.IsNaN(float64(v)) && !math.IsInf(float64(v), 0)
|
||||
}
|
||||
@@ -8,6 +8,8 @@ RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
wget \
|
||||
git \
|
||||
git-lfs \
|
||||
ca-certificates \
|
||||
libgl1-mesa-glx && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -89,10 +91,15 @@ RUN pip install --no-cache-dir \
|
||||
pyyaml \
|
||||
"tensorflow<=2.16.0rc0"
|
||||
|
||||
# fixes for python package conflicts, we are forcing specific versions.
|
||||
RUN pip install --no-cache-dir \
|
||||
"ml-dtypes==0.5.4" "onnx==1.18.0" "onnxruntime==1.18.1"
|
||||
|
||||
# download other onnx models
|
||||
RUN git clone --depth 1 https://github.com/swdee/go-rknnlite-build.git /opt/go-rknnlite-build && \
|
||||
git -C /opt/go-rknnlite-build fetch --depth 1 origin ce8b5ce1dc53b1c38324e7506374731ad21070c8 && \
|
||||
git -C /opt/go-rknnlite-build checkout FETCH_HEAD
|
||||
git -C /opt/go-rknnlite-build fetch --depth 1 origin 8380b2265b4becf2dda4c7ff52b9d57f8c718f09 && \
|
||||
git -C /opt/go-rknnlite-build checkout FETCH_HEAD && \
|
||||
git -C /opt/go-rknnlite-build lfs pull
|
||||
|
||||
# copy our modified mobilenet.py script into the rknn_model_zoo directory
|
||||
RUN cp /opt/go-rknnlite-build/mobilenet-batch/mobilenet-rknn.py /opt/rknn_model_zoo/examples/mobilenet/python/mobilenet-rknn-batch.py
|
||||
|
||||
@@ -32,6 +32,7 @@ MODELS=(
|
||||
"yolov8 convert.py /opt/go-rknnlite-build/yolonas-s.onnx i8 '' yolonas-s"
|
||||
"mobilenet mobilenet-rknn-batch.py ../model/mobilenetv2-12.onnx i8 --model mobilenetv2-batch8"
|
||||
"osnet-market1501 build|onnx_to_rknn.py osnet_x1_0_market_256x128.onnx i8 '' osnet-market1501-batch8"
|
||||
"midas build|onnx_to_rknn.py dpt_swin2_tiny_256.onnx i8 '' dpt_swin2_tiny_256"
|
||||
)
|
||||
|
||||
# compile all entries (or just filter) for one platform
|
||||
|
||||
Reference in New Issue
Block a user