mirror of
https://github.com/swdee/go-rknnlite.git
synced 2025-12-24 10:30:56 +08:00
added midas depth estimation example
This commit is contained in:
92
example/midas/README.md
Normal file
92
example/midas/README.md
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# MiDaS Depth Estimation Example
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This example uses the [MiDaS v3.1 depth estimation](https://github.com/isl-org/MiDaS/)
|
||||||
|
for computing depth in a single image.
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Make sure you have downloaded the data files first for the examples.
|
||||||
|
You only need to do this once for all examples.
|
||||||
|
|
||||||
|
```
|
||||||
|
cd example/
|
||||||
|
git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Run the MiDaS example on the above living room scene on rk3588 or replace with your Platform model.
|
||||||
|
```
|
||||||
|
cd example/midas
|
||||||
|
go run midas.go -p rk3588
|
||||||
|
```
|
||||||
|
|
||||||
|
This will result in the output of:
|
||||||
|
```
|
||||||
|
Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33)
|
||||||
|
Model Input Number: 1, Output Number: 1
|
||||||
|
Input tensors:
|
||||||
|
index=0, name=input, n_dims=4, dims=[1, 256, 256, 3], n_elems=196608, size=196608, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=0, scale=0.007843
|
||||||
|
Output tensors:
|
||||||
|
index=0, name=depth, n_dims=4, dims=[1, 1, 256, 256], n_elems=65536, size=65536, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=19.864582
|
||||||
|
Model first run speed: inference=577.442314ms, post processing=1.180646ms, rendering=3.137693ms, total time=581.760653ms
|
||||||
|
Saved depth map result to ../data/bedroom-out.jpg
|
||||||
|
Benchmark time=12.167970519s, count=20, average total time=608.398525ms
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
The saved JPG image with depth estimation map.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
See the help for command line parameters.
|
||||||
|
```
|
||||||
|
$ go run midas.go -h
|
||||||
|
|
||||||
|
Usage of /tmp/go-build2937772053/b001/exe/midas:
|
||||||
|
-i string
|
||||||
|
Image file to run depth estimation on (default "../data/bedroom.jpg")
|
||||||
|
-m string
|
||||||
|
RKNN compiled depth model file (default "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn")
|
||||||
|
-o string
|
||||||
|
Output JPG file (depth visualization) (default "../data/bedroom-out.jpg")
|
||||||
|
-p string
|
||||||
|
Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588] (default "rk3588")
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
To run the MiDaS example using the prebuilt docker image, make sure the data files have been downloaded first,
|
||||||
|
then run.
|
||||||
|
```
|
||||||
|
# from project root directory
|
||||||
|
|
||||||
|
docker run --rm \
|
||||||
|
--device /dev/dri:/dev/dri \
|
||||||
|
-v "$(pwd):/go/src/app" \
|
||||||
|
-v "$(pwd)/example/data:/go/src/data" \
|
||||||
|
-v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \
|
||||||
|
-v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \
|
||||||
|
-w /go/src/app \
|
||||||
|
swdee/go-rknnlite:latest \
|
||||||
|
go run ./example/midas/midas.go -p rk3588
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Benchmarks
|
||||||
|
|
||||||
|
The following table shows a comparison of the benchmark results across the three distinct platforms.
|
||||||
|
|
||||||
|
| Platform | Execution Time | Average Inference Time Per Image |
|
||||||
|
|----------|----------------|----------------------------------|
|
||||||
|
| rk3588 | 12.16s | 608.39ms |
|
||||||
|
| rk3576 | 16.85s | 842.97ms |
|
||||||
|
| rk3566 | 37.49s | 1.87s
|
||||||
|
|
||||||
189
example/midas/midas.go
Normal file
189
example/midas/midas.go
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
/*
|
||||||
|
Example code showing how to perform depth estimation using a MiDaS model.
|
||||||
|
*/
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"image"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/swdee/go-rknnlite"
|
||||||
|
"github.com/swdee/go-rknnlite/postprocess"
|
||||||
|
"gocv.io/x/gocv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// disable logging timestamps
|
||||||
|
log.SetFlags(0)
|
||||||
|
|
||||||
|
// read in cli flags
|
||||||
|
modelFile := flag.String("m", "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn", "RKNN compiled depth model file")
|
||||||
|
imgFile := flag.String("i", "../data/bedroom.jpg", "Image file to run depth estimation on")
|
||||||
|
saveFile := flag.String("o", "../data/bedroom-out.jpg", "Output JPG file (depth visualization)")
|
||||||
|
rkPlatform := flag.String("p", "rk3588", "Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]")
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Failed to set CPU affinity: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if user specified model file or if default is being used. if default
|
||||||
|
// then pick the default platform model to use.
|
||||||
|
if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" {
|
||||||
|
*modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create rknn runtime instance
|
||||||
|
rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error initializing RKNN runtime: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want float32 outputs for easy depth visualization
|
||||||
|
rt.SetWantFloat(true)
|
||||||
|
|
||||||
|
// optional querying of model file tensors and SDK version for printing
|
||||||
|
// to stdout. not necessary for production inference code
|
||||||
|
err = rt.Query(os.Stdout)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error querying runtime: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// create midas post processor
|
||||||
|
midasProcessor := postprocess.NewMiDaS(postprocess.MiDaSDefaultParams())
|
||||||
|
|
||||||
|
// load image
|
||||||
|
img := gocv.IMRead(*imgFile, gocv.IMReadColor)
|
||||||
|
|
||||||
|
if img.Empty() {
|
||||||
|
log.Fatal("Error reading image from: ", *imgFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert colorspace and resize image to input tensor size
|
||||||
|
rgbImg := gocv.NewMat()
|
||||||
|
gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB)
|
||||||
|
|
||||||
|
cropImg := rgbImg.Clone()
|
||||||
|
scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[2]), int(rt.InputAttrs()[0].Dims[1]))
|
||||||
|
gocv.Resize(rgbImg, &cropImg, scaleSize, 0, 0, gocv.InterpolationArea)
|
||||||
|
|
||||||
|
defer img.Close()
|
||||||
|
defer rgbImg.Close()
|
||||||
|
defer cropImg.Close()
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
// perform inference on image file
|
||||||
|
outputs, err := rt.Inference([]gocv.Mat{cropImg})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
endInference := time.Now()
|
||||||
|
|
||||||
|
// post process and create depth map
|
||||||
|
depthMap := gocv.NewMat()
|
||||||
|
defer depthMap.Close()
|
||||||
|
err = midasProcessor.CreateDepthMap(outputs, depthMap)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error creating depth map: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
endCreateMap := time.Now()
|
||||||
|
|
||||||
|
// resize the color map back to the original input image size
|
||||||
|
resizedMap := gocv.NewMat()
|
||||||
|
defer resizedMap.Close()
|
||||||
|
gocv.Resize(depthMap, &resizedMap, image.Pt(img.Cols(), img.Rows()), 0, 0, gocv.InterpolationCubic)
|
||||||
|
|
||||||
|
endRendering := time.Now()
|
||||||
|
|
||||||
|
log.Printf("Model first run speed: inference=%s, post processing=%s, rendering=%s, total time=%s\n",
|
||||||
|
endInference.Sub(start).String(),
|
||||||
|
endCreateMap.Sub(endInference).String(),
|
||||||
|
endRendering.Sub(endCreateMap).String(),
|
||||||
|
endRendering.Sub(start).String(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Save the result
|
||||||
|
if ok := gocv.IMWrite(*saveFile, resizedMap); !ok {
|
||||||
|
log.Fatal("Failed to save the image")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("Saved depth map result to %s\n", *saveFile)
|
||||||
|
|
||||||
|
// free outputs allocated in C memory after you have finished post processing
|
||||||
|
err = outputs.Free()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error freeing Outputs: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// optional code. run benchmark to get average time
|
||||||
|
runBenchmark(rt, midasProcessor, []gocv.Mat{cropImg}, img)
|
||||||
|
|
||||||
|
// close runtime and release resources
|
||||||
|
err = rt.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error closing RKNN runtime: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("done")
|
||||||
|
}
|
||||||
|
|
||||||
|
func runBenchmark(rt *rknnlite.Runtime, midasProcessor *postprocess.MiDaS,
|
||||||
|
mats []gocv.Mat, srcImg gocv.Mat) {
|
||||||
|
|
||||||
|
count := 20
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
depthMap := gocv.NewMat()
|
||||||
|
defer depthMap.Close()
|
||||||
|
resizedMap := gocv.NewMat()
|
||||||
|
defer resizedMap.Close()
|
||||||
|
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
// perform inference on image file
|
||||||
|
outputs, err := rt.Inference(mats)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Runtime inferencing failed with error: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// post process
|
||||||
|
err = midasProcessor.CreateDepthMap(outputs, depthMap)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error creating depth map: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// resize the color map back to the original input image size
|
||||||
|
gocv.Resize(depthMap, &resizedMap, image.Pt(srcImg.Cols(), srcImg.Rows()), 0, 0, gocv.InterpolationCubic)
|
||||||
|
|
||||||
|
err = outputs.Free()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error freeing Outputs: ", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end := time.Now()
|
||||||
|
total := end.Sub(start)
|
||||||
|
avg := total / time.Duration(count)
|
||||||
|
|
||||||
|
log.Printf("Benchmark time=%s, count=%d, average total time=%s\n",
|
||||||
|
total.String(), count, avg.String(),
|
||||||
|
)
|
||||||
|
}
|
||||||
170
postprocess/midas.go
Normal file
170
postprocess/midas.go
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
package postprocess
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/swdee/go-rknnlite"
|
||||||
|
"gocv.io/x/gocv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MiDaS defines the struct for a MiDaS depth estimation inference post processing
|
||||||
|
type MiDaS struct {
|
||||||
|
// Params are the depth map configuration parameters
|
||||||
|
Params MiDaSParams
|
||||||
|
}
|
||||||
|
|
||||||
|
// GrayscaleMap is used to not apply coloring to output depthmap, but to leave as grayscale
|
||||||
|
const GrayscaleMap = gocv.ColormapTypes(9999)
|
||||||
|
|
||||||
|
type MiDaSParams struct {
|
||||||
|
// Invert the depth map
|
||||||
|
Invert bool
|
||||||
|
// Colormap to apply to depth map, if you want it left as grayscale then
|
||||||
|
// pass postprocess.GrayscaleMap
|
||||||
|
Colormap gocv.ColormapTypes
|
||||||
|
}
|
||||||
|
|
||||||
|
// MiDaSDefaultParams sets output depth map to non-inverting and use Hot color scheme
|
||||||
|
func MiDaSDefaultParams() MiDaSParams {
|
||||||
|
return MiDaSParams{
|
||||||
|
Invert: false,
|
||||||
|
Colormap: gocv.ColormapHot,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMiDaS returns and instance of the MiDaS post processor
|
||||||
|
func NewMiDaS(p MiDaSParams) *MiDaS {
|
||||||
|
return &MiDaS{
|
||||||
|
Params: p,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateDepthMap converts the tensor output data into a depth estimation map image
|
||||||
|
func (m *MiDaS) CreateDepthMap(outputs *rknnlite.Outputs, depthMat gocv.Mat) error {
|
||||||
|
|
||||||
|
// output tensor is in NCHW format
|
||||||
|
// get output tensor width/height
|
||||||
|
outH := int(outputs.OutputAttributes().DimHeights[0])
|
||||||
|
outW := int(outputs.OutputAttributes().DimWidths[0])
|
||||||
|
|
||||||
|
// Convert float depth to uint8 visualization
|
||||||
|
depthU8 := m.depthToU8(outputs.Output[0].BufFloat, outH, outW)
|
||||||
|
|
||||||
|
// Make a Mat from bytes
|
||||||
|
u8Mat, err := gocv.NewMatFromBytes(outH, outW, gocv.MatTypeCV8U, depthU8)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Failed to create depth mat: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer u8Mat.Close()
|
||||||
|
|
||||||
|
if m.Params.Colormap == GrayscaleMap {
|
||||||
|
// no coloring
|
||||||
|
u8Mat.CopyTo(&depthMat)
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// apply colormap
|
||||||
|
gocv.ApplyColorMap(u8Mat, &depthMat, m.Params.Colormap)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// depthToU8 converts a float32 depth map into an 8-bit visualization image.
|
||||||
|
//
|
||||||
|
// MiDaS outputs “relative depth” values that are not bounded to [0,1] and
|
||||||
|
// can vary per image. To visualize, we normalize the depth values to [0,255]
|
||||||
|
// using the min/max over the whole output map.
|
||||||
|
//
|
||||||
|
// Output layout is row-major grayscale: out[y*w + x]
|
||||||
|
func (m *MiDaS) depthToU8(depth []float32, h, w int) []byte {
|
||||||
|
|
||||||
|
total := h * w
|
||||||
|
out := make([]byte, total)
|
||||||
|
|
||||||
|
// First pass: find min/max depth ignoring NaN/Inf values
|
||||||
|
minV := float32(math.Inf(1))
|
||||||
|
maxV := float32(math.Inf(-1))
|
||||||
|
|
||||||
|
for y := 0; y < h; y++ {
|
||||||
|
for x := 0; x < w; x++ {
|
||||||
|
// Read the depth value at (y,x) from the model output buffer
|
||||||
|
v := m.getDepthAt(depth, y, x, h, w)
|
||||||
|
|
||||||
|
// Skip invalid floating-point values so they don't poison min/max
|
||||||
|
if !m.isFinite32(v) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if v < minV {
|
||||||
|
minV = v
|
||||||
|
}
|
||||||
|
|
||||||
|
if v > maxV {
|
||||||
|
maxV = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Guard against all-invalid outputs or a constant output (max==min)
|
||||||
|
den := maxV - minV
|
||||||
|
if !m.isFinite32(minV) || !m.isFinite32(maxV) || den <= 0 {
|
||||||
|
// Fallback: return all zeros (black image)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: normalize each pixel to [0,1], optionally invert, clamp, then scale to [0,255]
|
||||||
|
for y := 0; y < h; y++ {
|
||||||
|
for x := 0; x < w; x++ {
|
||||||
|
v := m.getDepthAt(depth, y, x, h, w)
|
||||||
|
|
||||||
|
// If this pixel is invalid, pin it to minV so it becomes black after normalization
|
||||||
|
if !m.isFinite32(v) {
|
||||||
|
v = minV
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize to 0..1 based on the image's min/max range
|
||||||
|
n := (v - minV) / den
|
||||||
|
|
||||||
|
// Optional inversion for visualization (swap near/far appearance)
|
||||||
|
if m.Params.Invert {
|
||||||
|
n = 1.0 - n
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clamp to [0,1] to avoid overflow/underflow due to outliers or rounding
|
||||||
|
if n < 0 {
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
if n > 1 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to uint8 grayscale
|
||||||
|
out[y*w+x] = byte(n * 255.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// getDepthAt returns the depth value at pixel coordinate (y,x) from the raw output buffer.
|
||||||
|
// This function assumes the output tensor is laid out as NCHW
|
||||||
|
func (m *MiDaS) getDepthAt(buf []float32, y, x, h, w int) float32 {
|
||||||
|
|
||||||
|
// index = ((n*C + ch)*H + y)*W + x ; n=0, ch=0
|
||||||
|
idx := (0*h+y)*w + x
|
||||||
|
if idx >= 0 && idx < len(buf) {
|
||||||
|
return buf[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Out-of-range access should never happen if h/w match the tensor dimensions
|
||||||
|
// Returning 0 is a safe fallback to avoid panics
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// isFinite32 returns True if v is neither NaN nor +/-Inf
|
||||||
|
func (m *MiDaS) isFinite32(v float32) bool {
|
||||||
|
return !math.IsNaN(float64(v)) && !math.IsInf(float64(v), 0)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user