From 23d156bae2aa1939daee7a6ecb4e60a6f9ed98e3 Mon Sep 17 00:00:00 2001 From: swdee Date: Fri, 19 Dec 2025 22:54:34 +1300 Subject: [PATCH] added midas depth estimation example --- example/midas/README.md | 92 +++++++++++++++++++ example/midas/midas.go | 189 ++++++++++++++++++++++++++++++++++++++++ postprocess/midas.go | 170 ++++++++++++++++++++++++++++++++++++ 3 files changed, 451 insertions(+) create mode 100644 example/midas/README.md create mode 100644 example/midas/midas.go create mode 100644 postprocess/midas.go diff --git a/example/midas/README.md b/example/midas/README.md new file mode 100644 index 0000000..71fe2cc --- /dev/null +++ b/example/midas/README.md @@ -0,0 +1,92 @@ +# MiDaS Depth Estimation Example + +## Overview + +This example uses the [MiDaS v3.1 depth estimation](https://github.com/isl-org/MiDaS/) +for computing depth in a single image. + + +## Usage + +Make sure you have downloaded the data files first for the examples. +You only need to do this once for all examples. + +``` +cd example/ +git clone --depth=1 https://github.com/swdee/go-rknnlite-data.git data +``` + +![bedroom.jpg](https://github.com/swdee/go-rknnlite-data/raw/master/bedroom.jpg) + +Run the MiDaS example on the above living room scene on rk3588 or replace with your Platform model. +``` +cd example/midas +go run midas.go -p rk3588 +``` + +This will result in the output of: +``` +Driver Version: 0.9.6, API Version: 2.3.0 (c949ad889d@2024-11-07T11:35:33) +Model Input Number: 1, Output Number: 1 +Input tensors: + index=0, name=input, n_dims=4, dims=[1, 256, 256, 3], n_elems=196608, size=196608, fmt=NHWC, type=INT8, qnt_type=AFFINE, zp=0, scale=0.007843 +Output tensors: + index=0, name=depth, n_dims=4, dims=[1, 1, 256, 256], n_elems=65536, size=65536, fmt=NCHW, type=INT8, qnt_type=AFFINE, zp=-128, scale=19.864582 +Model first run speed: inference=577.442314ms, post processing=1.180646ms, rendering=3.137693ms, total time=581.760653ms +Saved depth map result to ../data/bedroom-out.jpg +Benchmark time=12.167970519s, count=20, average total time=608.398525ms +done +``` + +The saved JPG image with depth estimation map. + +![midas-bedroom-out.jpg](https://github.com/swdee/go-rknnlite-data/raw/master/docimg/midas-bedroom-out.jpg) + + +See the help for command line parameters. +``` +$ go run midas.go -h + +Usage of /tmp/go-build2937772053/b001/exe/midas: + -i string + Image file to run depth estimation on (default "../data/bedroom.jpg") + -m string + RKNN compiled depth model file (default "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn") + -o string + Output JPG file (depth visualization) (default "../data/bedroom-out.jpg") + -p string + Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588] (default "rk3588") +``` + + + + +### Docker + +To run the MiDaS example using the prebuilt docker image, make sure the data files have been downloaded first, +then run. +``` +# from project root directory + +docker run --rm \ + --device /dev/dri:/dev/dri \ + -v "$(pwd):/go/src/app" \ + -v "$(pwd)/example/data:/go/src/data" \ + -v "/usr/include/rknn_api.h:/usr/include/rknn_api.h" \ + -v "/usr/lib/librknnrt.so:/usr/lib/librknnrt.so" \ + -w /go/src/app \ + swdee/go-rknnlite:latest \ + go run ./example/midas/midas.go -p rk3588 +``` + + +## Benchmarks + +The following table shows a comparison of the benchmark results across the three distinct platforms. + +| Platform | Execution Time | Average Inference Time Per Image | +|----------|----------------|----------------------------------| +| rk3588 | 12.16s | 608.39ms | +| rk3576 | 16.85s | 842.97ms | +| rk3566 | 37.49s | 1.87s + diff --git a/example/midas/midas.go b/example/midas/midas.go new file mode 100644 index 0000000..f92a5a8 --- /dev/null +++ b/example/midas/midas.go @@ -0,0 +1,189 @@ +/* +Example code showing how to perform depth estimation using a MiDaS model. +*/ +package main + +import ( + "flag" + "image" + "log" + "os" + "strings" + "time" + + "github.com/swdee/go-rknnlite" + "github.com/swdee/go-rknnlite/postprocess" + "gocv.io/x/gocv" +) + +func main() { + // disable logging timestamps + log.SetFlags(0) + + // read in cli flags + modelFile := flag.String("m", "../data/models/rk3588/dpt_swin2_tiny_256-rk3588.rknn", "RKNN compiled depth model file") + imgFile := flag.String("i", "../data/bedroom.jpg", "Image file to run depth estimation on") + saveFile := flag.String("o", "../data/bedroom-out.jpg", "Output JPG file (depth visualization)") + rkPlatform := flag.String("p", "rk3588", "Rockchip platform [rk3562|rk3566|rk3568|rk3576|rk3582|rk3588]") + + flag.Parse() + + err := rknnlite.SetCPUAffinityByPlatform(*rkPlatform, rknnlite.FastCores) + + if err != nil { + log.Printf("Failed to set CPU affinity: %v\n", err) + } + + // check if user specified model file or if default is being used. if default + // then pick the default platform model to use. + if f := flag.Lookup("m"); f != nil && f.Value.String() == f.DefValue && *rkPlatform != "rk3588" { + *modelFile = strings.ReplaceAll(*modelFile, "rk3588", *rkPlatform) + } + + // create rknn runtime instance + rt, err := rknnlite.NewRuntimeByPlatform(*rkPlatform, *modelFile) + + if err != nil { + log.Fatal("Error initializing RKNN runtime: ", err) + } + + // We want float32 outputs for easy depth visualization + rt.SetWantFloat(true) + + // optional querying of model file tensors and SDK version for printing + // to stdout. not necessary for production inference code + err = rt.Query(os.Stdout) + + if err != nil { + log.Fatal("Error querying runtime: ", err) + } + + // create midas post processor + midasProcessor := postprocess.NewMiDaS(postprocess.MiDaSDefaultParams()) + + // load image + img := gocv.IMRead(*imgFile, gocv.IMReadColor) + + if img.Empty() { + log.Fatal("Error reading image from: ", *imgFile) + } + + // convert colorspace and resize image to input tensor size + rgbImg := gocv.NewMat() + gocv.CvtColor(img, &rgbImg, gocv.ColorBGRToRGB) + + cropImg := rgbImg.Clone() + scaleSize := image.Pt(int(rt.InputAttrs()[0].Dims[2]), int(rt.InputAttrs()[0].Dims[1])) + gocv.Resize(rgbImg, &cropImg, scaleSize, 0, 0, gocv.InterpolationArea) + + defer img.Close() + defer rgbImg.Close() + defer cropImg.Close() + + start := time.Now() + + // perform inference on image file + outputs, err := rt.Inference([]gocv.Mat{cropImg}) + + if err != nil { + log.Fatal("Runtime inferencing failed with error: ", err) + } + + endInference := time.Now() + + // post process and create depth map + depthMap := gocv.NewMat() + defer depthMap.Close() + err = midasProcessor.CreateDepthMap(outputs, depthMap) + + if err != nil { + log.Fatal("Error creating depth map: ", err) + } + + endCreateMap := time.Now() + + // resize the color map back to the original input image size + resizedMap := gocv.NewMat() + defer resizedMap.Close() + gocv.Resize(depthMap, &resizedMap, image.Pt(img.Cols(), img.Rows()), 0, 0, gocv.InterpolationCubic) + + endRendering := time.Now() + + log.Printf("Model first run speed: inference=%s, post processing=%s, rendering=%s, total time=%s\n", + endInference.Sub(start).String(), + endCreateMap.Sub(endInference).String(), + endRendering.Sub(endCreateMap).String(), + endRendering.Sub(start).String(), + ) + + // Save the result + if ok := gocv.IMWrite(*saveFile, resizedMap); !ok { + log.Fatal("Failed to save the image") + } + + log.Printf("Saved depth map result to %s\n", *saveFile) + + // free outputs allocated in C memory after you have finished post processing + err = outputs.Free() + + if err != nil { + log.Fatal("Error freeing Outputs: ", err) + } + + // optional code. run benchmark to get average time + runBenchmark(rt, midasProcessor, []gocv.Mat{cropImg}, img) + + // close runtime and release resources + err = rt.Close() + + if err != nil { + log.Fatal("Error closing RKNN runtime: ", err) + } + + log.Println("done") +} + +func runBenchmark(rt *rknnlite.Runtime, midasProcessor *postprocess.MiDaS, + mats []gocv.Mat, srcImg gocv.Mat) { + + count := 20 + start := time.Now() + + depthMap := gocv.NewMat() + defer depthMap.Close() + resizedMap := gocv.NewMat() + defer resizedMap.Close() + + for i := 0; i < count; i++ { + // perform inference on image file + outputs, err := rt.Inference(mats) + + if err != nil { + log.Fatal("Runtime inferencing failed with error: ", err) + } + + // post process + err = midasProcessor.CreateDepthMap(outputs, depthMap) + + if err != nil { + log.Fatal("Error creating depth map: ", err) + } + + // resize the color map back to the original input image size + gocv.Resize(depthMap, &resizedMap, image.Pt(srcImg.Cols(), srcImg.Rows()), 0, 0, gocv.InterpolationCubic) + + err = outputs.Free() + + if err != nil { + log.Fatal("Error freeing Outputs: ", err) + } + } + + end := time.Now() + total := end.Sub(start) + avg := total / time.Duration(count) + + log.Printf("Benchmark time=%s, count=%d, average total time=%s\n", + total.String(), count, avg.String(), + ) +} diff --git a/postprocess/midas.go b/postprocess/midas.go new file mode 100644 index 0000000..b30bf29 --- /dev/null +++ b/postprocess/midas.go @@ -0,0 +1,170 @@ +package postprocess + +import ( + "fmt" + "math" + + "github.com/swdee/go-rknnlite" + "gocv.io/x/gocv" +) + +// MiDaS defines the struct for a MiDaS depth estimation inference post processing +type MiDaS struct { + // Params are the depth map configuration parameters + Params MiDaSParams +} + +// GrayscaleMap is used to not apply coloring to output depthmap, but to leave as grayscale +const GrayscaleMap = gocv.ColormapTypes(9999) + +type MiDaSParams struct { + // Invert the depth map + Invert bool + // Colormap to apply to depth map, if you want it left as grayscale then + // pass postprocess.GrayscaleMap + Colormap gocv.ColormapTypes +} + +// MiDaSDefaultParams sets output depth map to non-inverting and use Hot color scheme +func MiDaSDefaultParams() MiDaSParams { + return MiDaSParams{ + Invert: false, + Colormap: gocv.ColormapHot, + } +} + +// NewMiDaS returns and instance of the MiDaS post processor +func NewMiDaS(p MiDaSParams) *MiDaS { + return &MiDaS{ + Params: p, + } +} + +// CreateDepthMap converts the tensor output data into a depth estimation map image +func (m *MiDaS) CreateDepthMap(outputs *rknnlite.Outputs, depthMat gocv.Mat) error { + + // output tensor is in NCHW format + // get output tensor width/height + outH := int(outputs.OutputAttributes().DimHeights[0]) + outW := int(outputs.OutputAttributes().DimWidths[0]) + + // Convert float depth to uint8 visualization + depthU8 := m.depthToU8(outputs.Output[0].BufFloat, outH, outW) + + // Make a Mat from bytes + u8Mat, err := gocv.NewMatFromBytes(outH, outW, gocv.MatTypeCV8U, depthU8) + + if err != nil { + return fmt.Errorf("Failed to create depth mat: %v", err) + } + + defer u8Mat.Close() + + if m.Params.Colormap == GrayscaleMap { + // no coloring + u8Mat.CopyTo(&depthMat) + + } else { + // apply colormap + gocv.ApplyColorMap(u8Mat, &depthMat, m.Params.Colormap) + } + + return nil +} + +// depthToU8 converts a float32 depth map into an 8-bit visualization image. +// +// MiDaS outputs “relative depth” values that are not bounded to [0,1] and +// can vary per image. To visualize, we normalize the depth values to [0,255] +// using the min/max over the whole output map. +// +// Output layout is row-major grayscale: out[y*w + x] +func (m *MiDaS) depthToU8(depth []float32, h, w int) []byte { + + total := h * w + out := make([]byte, total) + + // First pass: find min/max depth ignoring NaN/Inf values + minV := float32(math.Inf(1)) + maxV := float32(math.Inf(-1)) + + for y := 0; y < h; y++ { + for x := 0; x < w; x++ { + // Read the depth value at (y,x) from the model output buffer + v := m.getDepthAt(depth, y, x, h, w) + + // Skip invalid floating-point values so they don't poison min/max + if !m.isFinite32(v) { + continue + } + + if v < minV { + minV = v + } + + if v > maxV { + maxV = v + } + } + } + + // Guard against all-invalid outputs or a constant output (max==min) + den := maxV - minV + if !m.isFinite32(minV) || !m.isFinite32(maxV) || den <= 0 { + // Fallback: return all zeros (black image) + return out + } + + // Second pass: normalize each pixel to [0,1], optionally invert, clamp, then scale to [0,255] + for y := 0; y < h; y++ { + for x := 0; x < w; x++ { + v := m.getDepthAt(depth, y, x, h, w) + + // If this pixel is invalid, pin it to minV so it becomes black after normalization + if !m.isFinite32(v) { + v = minV + } + + // Normalize to 0..1 based on the image's min/max range + n := (v - minV) / den + + // Optional inversion for visualization (swap near/far appearance) + if m.Params.Invert { + n = 1.0 - n + } + + // Clamp to [0,1] to avoid overflow/underflow due to outliers or rounding + if n < 0 { + n = 0 + } + if n > 1 { + n = 1 + } + + // Convert to uint8 grayscale + out[y*w+x] = byte(n * 255.0) + } + } + + return out +} + +// getDepthAt returns the depth value at pixel coordinate (y,x) from the raw output buffer. +// This function assumes the output tensor is laid out as NCHW +func (m *MiDaS) getDepthAt(buf []float32, y, x, h, w int) float32 { + + // index = ((n*C + ch)*H + y)*W + x ; n=0, ch=0 + idx := (0*h+y)*w + x + if idx >= 0 && idx < len(buf) { + return buf[idx] + } + + // Out-of-range access should never happen if h/w match the tensor dimensions + // Returning 0 is a safe fallback to avoid panics + return 0 +} + +// isFinite32 returns True if v is neither NaN nor +/-Inf +func (m *MiDaS) isFinite32(v float32) bool { + return !math.IsNaN(float64(v)) && !math.IsInf(float64(v), 0) +}