feat(hand): add mediapipe hand 3d pose detecter

2025-09-26 17:51:13 +08:00 · 2021-12-13 16:39:50 +08:00
parent 429e30db91
commit 7eab96fa85
25 changed files with 1628 additions and 334 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,5 @@ _testmain.go
 test
 .vim
 dist/
+
+libtorch/
--- a/README.md
+++ b/README.md
@@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
    - nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
  - pose (for hand pose estimation)
    - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
+  - pose3d (for 3d handpose detection)
+    - mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing)
 - styletransfer
  - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
 - tracker
--- a/go/common/geometry.go
+++ b/go/common/geometry.go
@@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector {

 // GoPoint2fVector convert C.Point2fVector to []Point
 func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
+	if cVector == nil {
+		return nil
+	}
 	l := int(cVector.length)
 	ret := make([]Point, 0, l)
 	ptr := unsafe.Pointer(cVector.points)
@@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) {
 	C.FreePoint2fVector(c)
 	C.free(unsafe.Pointer(c))
 }
+
+// Point3d represents a 3dPoint
+type Point3d struct {
+	X float64
+	Y float64
+	Z float64
+}
+
+// Pt3d returns a New Point3d
+func Pt3d(x, y, z float64) Point3d {
+	return Point3d{x, y, z}
+}
+
+var ZP3d = Point3d{}
+
+// GoPoint3d conver C.Point3d to Point3d
+func GoPoint3d(c *C.Point3d) Point3d {
+	return Pt3d(
+		float64(c.x),
+		float64(c.y),
+		float64(c.z),
+	)
+}
+
+// NewCPoint3dVector retruns C.Point3dVector pointer
+func NewCPoint3dVector() *C.Point3dVector {
+	return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d))
+}
+
+// GoPoint3dVector convert C.Point3dVector to []Point3d
+func GoPoint3dVector(cVector *C.Point3dVector) []Point3d {
+	if cVector == nil {
+		return nil
+	}
+	l := int(cVector.length)
+	ret := make([]Point3d, 0, l)
+	ptr := unsafe.Pointer(cVector.points)
+	for i := 0; i < l; i++ {
+		cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i))))
+		ret = append(ret, GoPoint3d(cPoint3d))
+	}
+	return ret
+}
+
+// FreeCPoint3dVector release C.Point3dVector memory
+func FreeCPoint3dVector(c *C.Point3dVector) {
+	C.FreePoint3dVector(c)
+	C.free(unsafe.Pointer(c))
+}
--- a/go/common/objectinfo.go
+++ b/go/common/objectinfo.go
@@ -20,6 +20,8 @@ type ObjectInfo struct {
 	Rect Rectangle
 	// Points keypoints
 	Keypoints []Keypoint
+	// Name
+	Name string
 }

 // GoObjectInfo convert C.ObjectInfo to go type
--- a/go/common/palmobject.go
+++ b/go/common/palmobject.go
@@ -0,0 +1,56 @@
+package common
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/common/common.h"
+#include "openvision/hand/pose3d.h"
+*/
+import "C"
+import (
+	"unsafe"
+)
+
+// PalmObject
+type PalmObject struct {
+	Score      float64
+	Rotation   float64
+	Rect       []Point
+	Landmarks  []Point
+	Skeleton   []Point
+	Skeleton3d []Point3d
+}
+
+// NewCPalmObjectVector returns *C.PalmObjectVector
+func NewCPalmObjectVector() *C.PalmObjectVector {
+	return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector))
+}
+
+// FreeCPalmObjectVector release *C.PalmObjectVector memory
+func FreeCPalmObjectVector(p *C.PalmObjectVector) {
+	C.FreePalmObjectVector(p)
+	C.free(unsafe.Pointer(p))
+}
+
+// GoPalmObject convert C.PalmObject to Go type
+func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject {
+	return PalmObject{
+		Score:      float64(cObj.score),
+		Rotation:   float64(cObj.rotation),
+		Rect:       GoPoint2fVector(cObj.rect, w, h),
+		Landmarks:  GoPoint2fVector(cObj.landmarks, w, h),
+		Skeleton:   GoPoint2fVector(cObj.skeleton, w, h),
+		Skeleton3d: GoPoint3dVector(cObj.skeleton3d),
+	}
+}
+
+func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject {
+	l := int(c.length)
+	ret := make([]PalmObject, 0, l)
+	ptr := unsafe.Pointer(c.items)
+	for i := 0; i < l; i++ {
+		cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i))))
+		ret = append(ret, GoPalmObject(cObj, w, h))
+	}
+	return ret
+}
--- a/go/examples/hand/main.go
+++ b/go/examples/hand/main.go
@@ -15,6 +15,7 @@ import (
 	"github.com/bububa/openvision/go/hand/detecter"
 	handdrawer "github.com/bububa/openvision/go/hand/drawer"
 	"github.com/bububa/openvision/go/hand/pose"
+	"github.com/bububa/openvision/go/hand/pose3d"
 )

 func main() {
@@ -27,17 +28,19 @@ func main() {
 	cpuCores := common.GetBigCPUCount()
 	common.SetOMPThreads(cpuCores)
 	log.Printf("CPU big cores:%d\n", cpuCores)
-	estimator := handpose(modelPath)
-	defer estimator.Destroy()
-	common.SetEstimatorThreads(estimator, cpuCores)
-	for idx, d := range []detecter.Detecter{
-		yolox(modelPath),
-		nanodet(modelPath),
-	} {
-		defer d.Destroy()
-		common.SetEstimatorThreads(d, cpuCores)
-		detect(d, estimator, imgPath, "hand1.jpg", idx)
-	}
+	// estimator := handpose(modelPath)
+	// defer estimator.Destroy()
+	// common.SetEstimatorThreads(estimator, cpuCores)
+	// for idx, d := range []detecter.Detecter{
+	// 	yolox(modelPath),
+	// 	nanodet(modelPath),
+	// } {
+	// 	defer d.Destroy()
+	// 	common.SetEstimatorThreads(d, cpuCores)
+	// 	detect(d, estimator, imgPath, "hand2.jpg", idx)
+	// }
+	d3d := mediapipe(modelPath)
+	detect3d(d3d, imgPath, "hand1.jpg")
 }

 func yolox(modelPath string) detecter.Detecter {
@@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator {
 	return d
 }

+func mediapipe(modelPath string) *pose3d.Mediapipe {
+	palmPath := filepath.Join(modelPath, "mediapipe/palm/full")
+	handPath := filepath.Join(modelPath, "mediapipe/hand/full")
+	d := pose3d.NewMediapipe()
+	if err := d.LoadModel(palmPath, handPath); err != nil {
+		log.Fatalln(err)
+	}
+	return d
+}
+
 func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
 	inPath := filepath.Join(imgPath, filename)
 	imgSrc, err := loadImage(inPath)
@@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
 	if err := saveImage(out, outPath); err != nil {
 		log.Fatalln(err)
 	}
+}
+
+func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) {
+	inPath := filepath.Join(imgPath, filename)
+	imgSrc, err := loadImage(inPath)
+	if err != nil {
+		log.Fatalln("load image failed,", err)
+	}
+	img := common.NewImage(imgSrc)
+	rois, err := d.Detect(img)
+	if err != nil {
+		log.Fatalln(err)
+	}
+	log.Printf("%+v\n", rois)
+	drawer := handdrawer.New()
+	outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename))
+	out := drawer.DrawPalm(img, rois)
+
+	if err := saveImage(out, outPath); err != nil {
+		log.Fatalln(err)
+	}
+
+	for idx, roi := range rois {
+		outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename))
+		out := drawer.DrawPalm3D(roi, 400, "#442519")
+
+		if err := saveImage(out, outPath); err != nil {
+			log.Fatalln(err)
+		}
+	}

 }

--- a/go/face/tracker/cgo.go
+++ b/go/face/tracker/cgo.go
@@ -1,6 +1,6 @@
 // +build !vulkan

-package eye
+package tracker

 /*
 #cgo CXXFLAGS:   --std=c++11 -fopenmp
--- a/go/hand/drawer/const.go
+++ b/go/hand/drawer/const.go
@@ -7,71 +7,16 @@ import (
 const (
 	// DefaultBorderColor default drawer border color
 	DefaultBorderColor = common.Green
+	// DefaultKeypointColor default drawer keypoint color
+	DefaultKeypointColor = common.Pink
 	// DefaultBorderStrokeWidth default drawer border stroke width
 	DefaultBorderStrokeWidth = 3
 	// DefaultKeypointRadius default drawer keypoint radius
 	DefaultKeypointRadius = 3
 	// DefaultKeypointStrokeWidth default drawer keypoint stroke width
 	DefaultKeypointStrokeWidth = 1
-)
-
-// CocoPart coco part define
-type CocoPart = int
-
-const (
-	// CocoPartNose nose
-	CocoPartNose CocoPart = iota
-	// CocoPartLEye left eye
-	CocoPartLEye
-	// CocoPartREye right eye
-	CocoPartREye
-	// CocoPartLEar left ear
-	CocoPartLEar
-	// CocoPartREar right ear
-	CocoPartREar
-	// CocoPartLShoulder left sholder
-	CocoPartLShoulder
-	// CocoPartRShoulder right sholder
-	CocoPartRShoulder
-	// CocoPartLElbow left elbow
-	CocoPartLElbow
-	// CocoPartRElbow right elbow
-	CocoPartRElbow
-	// CocoPartLWrist left wrist
-	CocoPartLWrist
-	// CocoPartRWrist right wrist
-	CocoPartRWrist
-	// CocoPartLHip left hip
-	CocoPartLHip
-	// CocoPartRHip right hip
-	CocoPartRHip
-	// CocoPartLKnee left knee
-	CocoPartLKnee
-	// CocoPartRKnee right knee
-	CocoPartRKnee
-	// CocoPartRAnkle right ankle
-	CocoPartRAnkle
-	// CocoPartLAnkle left ankle
-	CocoPartLAnkle
-	// CocoPartNeck neck
-	CocoPartNeck
-	// CocoPartBackground background
-	CocoPartBackground
-)
-
-var (
-	// CocoPair represents joints pair
-	CocoPair = [16][2]CocoPart{
-		{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16},
-	}
-	// CocoColors represents color for coco parts
-	CocoColors = [17]string{
-		"#ff0000", "#ff5500", "#ffaa00", "#ffff00",
-		"#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa",
-		"#00ffff", "#00aaff", "#0055ff",
-		"#0000ff", "#aa00ff", "#ff00ff",
-		"#ff00aa", "#ff0055",
-	}
+	// DefaultLabelColor default label color
+	DefaultLabelColor = common.White
 )

 var (
--- a/go/hand/drawer/drawer.go
+++ b/go/hand/drawer/drawer.go
@@ -2,8 +2,10 @@ package drawer

 import (
 	"image"
+	"image/color"

 	"github.com/llgcode/draw2d/draw2dimg"
+	"github.com/llgcode/draw2d/draw2dkit"

 	"github.com/bububa/openvision/go/common"
 )
@@ -18,6 +20,12 @@ type Drawer struct {
 	KeypointStrokeWidth float64
 	// KeypointRadius represents keypoints circle radius
 	KeypointRadius float64
+	// KeypointColor represents keypoint color
+	KeypointColor string
+	// LabelColor string
+	LabelColor string
+	// Font
+	Font *common.Font
 }

 // New returns a new Drawer
@@ -27,6 +35,8 @@ func New(options ...Option) *Drawer {
 		BorderStrokeWidth:   DefaultBorderStrokeWidth,
 		KeypointStrokeWidth: DefaultKeypointStrokeWidth,
 		KeypointRadius:      DefaultKeypointRadius,
+		KeypointColor:       DefaultKeypointColor,
+		LabelColor:          DefaultLabelColor,
 	}
 	for _, opt := range options {
 		opt.apply(d)
@@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
 	gc := draw2dimg.NewGraphicContext(out)
 	gc.DrawImage(img)
 	for _, roi := range rois {
+		rect := common.Rect(
+			roi.Rect.X*imgW,
+			roi.Rect.Y*imgH,
+			roi.Rect.Width*imgW,
+			roi.Rect.Height*imgH,
+		)
+		borderColor := d.BorderColor
 		if drawBorder {
 			// draw rect
-			rect := common.Rect(
-				roi.Rect.X*imgW,
-				roi.Rect.Y*imgH,
-				roi.Rect.Width*imgW,
-				roi.Rect.Height*imgH,
-			)
-			borderColor := d.BorderColor
 			common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
 		}
 		l := len(roi.Keypoints)
@@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
 			poseColor := PoseColors[colorIdx]
 			common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
 		}
+		// draw name
+		if roi.Name != "" {
+			common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width)
+		}
+	}
+	return out
+}
+
+// DrawPalm draw PalmObject
+func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image {
+	imgW := float64(img.Bounds().Dx())
+	imgH := float64(img.Bounds().Dy())
+	out := image.NewRGBA(img.Bounds())
+	gc := draw2dimg.NewGraphicContext(out)
+	gc.DrawImage(img)
+	for _, roi := range rois {
+		gc.SetLineWidth(d.BorderStrokeWidth)
+		gc.SetStrokeColor(common.ColorFromHex(d.BorderColor))
+		gc.BeginPath()
+		for idx, pt := range roi.Rect {
+			gc.MoveTo(pt.X*imgW, pt.Y*imgH)
+			if idx == len(roi.Rect)-1 {
+				gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH)
+			} else {
+				gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH)
+			}
+		}
+		gc.Close()
+		gc.Stroke()
+
+		l := len(roi.Skeleton)
+		if l == 0 {
+			continue
+		}
+		// draw skeleton
+		for idx := range roi.Skeleton[:l-1] {
+			var (
+				p0        common.Point
+				p1        common.Point
+				poseColor = PoseColors[idx/4]
+			)
+			gc.SetStrokeColor(common.ColorFromHex(poseColor))
+			if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
+				p0 = roi.Skeleton[0]
+				p1 = roi.Skeleton[idx]
+				gc.BeginPath()
+				gc.MoveTo(p0.X*imgW, p0.Y*imgH)
+				gc.LineTo(p1.X*imgW, p1.Y*imgH)
+				gc.Close()
+				gc.Stroke()
+			} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
+				continue
+			}
+			p0 = roi.Skeleton[idx]
+			p1 = roi.Skeleton[idx+1]
+			gc.BeginPath()
+			gc.MoveTo(p0.X*imgW, p0.Y*imgH)
+			gc.LineTo(p1.X*imgW, p1.Y*imgH)
+			gc.Close()
+			gc.Stroke()
+		}
+		for _, pt := range roi.Landmarks {
+			common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth)
+		}
+	}
+	return out
+}
+
+// DrawPalm3D draw 3d PalmObject
+func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image {
+	out := image.NewRGBA(image.Rect(0, 0, int(size), int(size)))
+	gc := draw2dimg.NewGraphicContext(out)
+	l := len(roi.Skeleton3d)
+	if l == 0 {
+		return out
+	}
+	if bg != "" {
+		bgColor := common.ColorFromHex(bg)
+		gc.SetFillColor(bgColor)
+		draw2dkit.Rectangle(gc, 0, 0, size, size)
+		gc.Fill()
+		gc.SetFillColor(color.Transparent)
+	}
+	// draw skeleton3d
+	for idx := range roi.Skeleton3d[:l-1] {
+		var (
+			p0        common.Point3d
+			p1        common.Point3d
+			poseColor = PoseColors[idx/4]
+		)
+		gc.SetStrokeColor(common.ColorFromHex(poseColor))
+		if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
+			p0 = roi.Skeleton3d[0]
+			p1 = roi.Skeleton3d[idx]
+			gc.BeginPath()
+			gc.MoveTo(p0.X*size, p0.Y*size)
+			gc.LineTo(p1.X*size, p1.Y*size)
+			gc.Close()
+			gc.Stroke()
+		} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
+			continue
+		}
+		p0 = roi.Skeleton3d[idx]
+		p1 = roi.Skeleton3d[idx+1]
+		gc.BeginPath()
+		gc.MoveTo(p0.X*size, p0.Y*size)
+		gc.LineTo(p1.X*size, p1.Y*size)
+		gc.Close()
+		gc.Stroke()
 	}
 	return out
 }
--- a/go/hand/drawer/option.go
+++ b/go/hand/drawer/option.go
@@ -1,5 +1,9 @@
 package drawer

+import (
+	"github.com/bububa/openvision/go/common"
+)
+
 // Option represents Drawer option interface
 type Option interface {
 	apply(*Drawer)
@@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option {
 		d.KeypointStrokeWidth = w
 	})
 }
+
+// WithKeypointColor set Drawer KeypointColor
+func WithKeypointColor(color string) Option {
+	return optionFunc(func(d *Drawer) {
+		d.KeypointColor = color
+	})
+}
+
+// WithFont set Drawer Font
+func WithFont(font *common.Font) Option {
+	return optionFunc(func(d *Drawer) {
+		d.Font = font
+	})
+}
--- a/go/hand/pose3d/cgo.go
+++ b/go/hand/pose3d/cgo.go
@@ -0,0 +1,11 @@
+// +build !vulkan
+
+package pose3d
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
+*/
+import "C"
--- a/go/hand/pose3d/cgo_vulkan.go
+++ b/go/hand/pose3d/cgo_vulkan.go
@@ -0,0 +1,11 @@
+// +build vulkan
+
+package pose3d
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
+*/
+import "C"
--- a/go/hand/pose3d/doc.go
+++ b/go/hand/pose3d/doc.go
@@ -0,0 +1,2 @@
+// Package pose hand 3d pose estimator
+package pose3d
--- a/go/hand/pose3d/mediapipe.go
+++ b/go/hand/pose3d/mediapipe.go
@@ -0,0 +1,62 @@
+package pose3d
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/common/common.h"
+#include "openvision/hand/pose3d.h"
+*/
+import "C"
+import (
+	"unsafe"
+
+	openvision "github.com/bububa/openvision/go"
+	"github.com/bububa/openvision/go/common"
+)
+
+// Mediapipe represents mediapipe estimator interface
+type Mediapipe struct {
+	d C.IHandPose3DEstimator
+}
+
+func NewMediapipe() *Mediapipe {
+	return &Mediapipe{
+		d: C.new_mediapipe_hand(),
+	}
+}
+
+func (m *Mediapipe) Destroy() {
+	C.destroy_mediapipe_hand(m.d)
+}
+
+func (m *Mediapipe) LoadModel(palmPath string, handPath string) error {
+	cPalm := C.CString(palmPath)
+	defer C.free(unsafe.Pointer(cPalm))
+	cHand := C.CString(handPath)
+	defer C.free(unsafe.Pointer(cHand))
+	retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand)
+	if retCode != 0 {
+		return openvision.LoadModelError(int(retCode))
+	}
+	return nil
+
+}
+
+// Detect detect hand 3d pose
+func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) {
+	imgWidth := img.WidthF64()
+	imgHeight := img.HeightF64()
+	data := img.Bytes()
+	cObjs := common.NewCPalmObjectVector()
+	defer common.FreeCPalmObjectVector(cObjs)
+	errCode := C.mediapipe_hand_detect(
+		m.d,
+		(*C.uchar)(unsafe.Pointer(&data[0])),
+		C.int(imgWidth), C.int(imgHeight),
+		(*C.PalmObjectVector)(unsafe.Pointer(cObjs)),
+	)
+	if errCode != 0 {
+		return nil, openvision.DetectHandError(int(errCode))
+	}
+	return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil
+}
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,7 @@ target_include_directories(openvision
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d>

        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
@@ -109,6 +110,7 @@ file(COPY
 file(COPY
    ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
    ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h
    DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
 )

--- a/src/common/common.cpp
+++ b/src/common/common.cpp
@@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) {
  }
 }

+void FreePoint3dVector(Point3dVector *p) {
+  if (p->points != NULL) {
+    free(p->points);
+    p->points = NULL;
+  }
+}
+
 void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
  if (p->points == NULL || i >= p->length) {
    return;
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -11,123 +11,134 @@ typedef ov::Size Size;
 typedef ov::Size2f Size2f;
 typedef ov::Point Point;
 typedef ov::Point2f Point2f;
+typedef ov::Point3d Point3d;
 typedef ov::Rect Rect;
 typedef ov::Keypoint Keypoint;
 #else

 // Wrapper for an individual cv::cvSize
 typedef struct Size {
-    int width;
-    int height;
+  int width;
+  int height;
 } Size;
 //
 // Wrapper for an individual cv::cvSize2f
 typedef struct Size2f {
-    int width;
-    int height;
+  int width;
+  int height;
 } Size2f;

 // Wrapper for an individual cv::cvPoint
 typedef struct Point {
-    int x;
-    int y;
+  int x;
+  int y;
 } Point;

 // Wrapper for an individual cv::Point2f
 typedef struct Point2f {
-    float x;
-    float y;
+  float x;
+  float y;
 } Point2f;

+typedef struct Point3d {
+  float x;
+  float y;
+  float z;
+} Point3d;

 // Wrapper for an individual cv::Rect
 typedef struct Rect {
-    int x;
-    int y;
-    int width;
-    int height;
+  int x;
+  int y;
+  int width;
+  int height;
 } Rect;

-
 typedef struct Keypoint {
-    Point2f p;
-    float score;
-    int id;
+  Point2f p;
+  float score;
+  int id;
 } Keypoint;

-
 #endif

-typedef void* IEstimator;
+typedef void *IEstimator;

 int get_gpu_count();
 int create_gpu_instance();
 void destroy_gpu_instance();
-int get_big_cpu_count(); 
+int get_big_cpu_count();
 void set_omp_num_threads(int n);
-int load_model(IEstimator e, const char* root_path);
+int load_model(IEstimator e, const char *root_path);
 void destroy_estimator(IEstimator e);
 void set_num_threads(IEstimator e, int n);
 void set_light_mode(IEstimator e, bool mode);

 typedef struct Point2fVector {
-    Point2f* points;
-    int length;
+  Point2f *points;
+  int length;
 } Point2fVector;

 void FreePoint2fVector(Point2fVector *p);
-void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val);
+void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val);
+
+typedef struct Point3dVector {
+  Point3d *points;
+  int length;
+} Point3dVector;
+
+void FreePoint3dVector(Point3dVector *p);

 typedef struct RectVector {
-    Rect* rects;
-    int length;
+  Rect *rects;
+  int length;
 } RectVector;

 void FreeRectVector(RectVector *p);

 typedef struct FloatVector {
-    float* values;
-    int length;
+  float *values;
+  int length;
 } FloatVector;

 void FreeFloatVector(FloatVector *p);

 typedef struct Bytes {
-    unsigned char* values;
-    int length;
+  unsigned char *values;
+  int length;
 } Bytes;

 void FreeBytes(Bytes *p);

 typedef struct KeypointVector {
-    Keypoint* points;
-    int length;
+  Keypoint *points;
+  int length;
 } KeypointVector;

 void FreeKeypointVector(KeypointVector *p);
-void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
+void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val);

 typedef struct ImageC {
-    unsigned char* data;
-    int width;
-    int height;
-    int channels;
+  unsigned char *data;
+  int width;
+  int height;
+  int channels;
 } Image;

-void FreeImage(Image* p);
+void FreeImage(Image *p);

 typedef struct ObjectInfoC {
-    Rect rect;
-    float score;
-    int label;
-    KeypointVector* pts;
+  Rect rect;
+  float score;
+  int label;
+  KeypointVector *pts;
 } ObjectInfo;

 void FreeObjectInfo(ObjectInfo *p);

 typedef struct ObjectInfoVector {
-    ObjectInfo* items;
-    int length;
+  ObjectInfo *items;
+  int length;
 } ObjectInfoVector;

 void FreeObjectInfoVector(ObjectInfoVector *p);
--- a/src/common/common.hpp
+++ b/src/common/common.hpp
@@ -76,6 +76,13 @@ struct Point2f {
  };
 };

+struct Point3d {
+  float x;
+  float y;
+  float z;
+  Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {}
+};
+
 // Wrapper for an individual cv::Rect
 struct Rect {
  int x;
--- a/src/hand/detecter/nanodet/nanodet.cpp
+++ b/src/hand/detecter/nanodet/nanodet.cpp
@@ -1,6 +1,6 @@
 #include "nanodet.hpp"
-#include <string>
 #include <float.h>
+#include <string>

 #ifdef OV_VULKAN
 #include "gpu.h"
@@ -8,227 +8,219 @@

 namespace ovhand {

-static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
-{
+static void generate_nanodet_proposals(const ncnn::Mat &cls_pred,
+                                       const ncnn::Mat &dis_pred, int stride,
+                                       const ncnn::Mat &in_pad,
+                                       float prob_threshold,
+                                       std::vector<ov::ObjectInfo> &objects) {

-    const int num_grid = cls_pred.h;
+  const int num_grid = cls_pred.h;

-    int num_grid_x;
-    int num_grid_y;
-    if (in_pad.w > in_pad.h)
-    {
-        num_grid_x = in_pad.w / stride;
-        num_grid_y = num_grid / num_grid_x;
-    }
-    else
-    {
-        num_grid_y = in_pad.h / stride;
-        num_grid_x = num_grid / num_grid_y;
-    }
+  int num_grid_x;
+  int num_grid_y;
+  if (in_pad.w > in_pad.h) {
+    num_grid_x = in_pad.w / stride;
+    num_grid_y = num_grid / num_grid_x;
+  } else {
+    num_grid_y = in_pad.h / stride;
+    num_grid_x = num_grid / num_grid_y;
+  }

-    const int num_class = cls_pred.w;
-    const int reg_max_1 = dis_pred.w / 4;
-    //__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
-    //__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
-    for (int i = 0; i < num_grid_y; i++)
-    {
-        for (int j = 0; j < num_grid_x; j++)
-        {
-            const int idx = i * num_grid_x + j;
+  const int num_class = cls_pred.w;
+  const int reg_max_1 = dis_pred.w / 4;

-            const float* scores = cls_pred.row(idx);
+  for (int i = 0; i < num_grid_y; i++) {
+    for (int j = 0; j < num_grid_x; j++) {
+      const int idx = i * num_grid_x + j;

-            // find label with max score
-            int label = -1;
-            float score = -FLT_MAX;
-            for (int k = 0; k < num_class; k++)
-            {
-                if (scores[k] > score)
-                {
-                    label = k;
-                    score = scores[k];
-                }
-            }
+      const float *scores = cls_pred.row(idx);

-            if (score >= prob_threshold)
-            {
-                ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
-                {
-                    ncnn::Layer* softmax = ncnn::create_layer("Softmax");
-
-                    ncnn::ParamDict pd;
-                    pd.set(0, 1); // axis
-                    pd.set(1, 1);
-                    softmax->load_param(pd);
-
-                    ncnn::Option opt;
-                    // opt.num_threads = 1;
-                    opt.use_packing_layout = false;
-
-                    softmax->create_pipeline(opt);
-
-                    softmax->forward_inplace(bbox_pred, opt);
-
-                    softmax->destroy_pipeline(opt);
-
-                    delete softmax;
-                }
-
-                float pred_ltrb[4];
-                for (int k = 0; k < 4; k++)
-                {
-                    float dis = 0.f;
-                    const float* dis_after_sm = bbox_pred.row(k);
-                    for (int l = 0; l < reg_max_1; l++)
-                    {
-                        dis += l * dis_after_sm[l];
-                    }
-
-                    pred_ltrb[k] = dis * stride;
-                }
-
-                float pb_cx = (j + 0.5f) * stride;
-                float pb_cy = (i + 0.5f) * stride;
-
-                float x0 = pb_cx - pred_ltrb[0];
-                float y0 = pb_cy - pred_ltrb[1];
-                float x1 = pb_cx + pred_ltrb[2];
-                float y1 = pb_cy + pred_ltrb[3];
-
-                ov::ObjectInfo obj;
-                obj.rect.x = x0;
-                obj.rect.y = y0;
-                obj.rect.width = x1 - x0;
-                obj.rect.height = y1 - y0;
-                obj.label = label;
-                obj.score= score;
-
-                objects.push_back(obj);
-            }
+      // find label with max score
+      int label = -1;
+      float score = -FLT_MAX;
+      for (int k = 0; k < num_class; k++) {
+        if (scores[k] > score) {
+          label = k;
+          score = scores[k];
        }
-    }
-}
+      }

-int Nanodet::Detect(const unsigned char* rgbdata,
-    int img_width, int img_height, 
-    std::vector<ov::ObjectInfo>& rois) {
-	if (!initialized_) {
-		return 10000;
-	}
-	if (rgbdata == 0){
-		return 10001;
-    }
-
-    int w = img_width;
-    int h = img_height;
-    float scale = 1.f;
-    if (w > h) {
-        scale = (float)target_size / w;
-        w = target_size;
-        h = h * scale;
-    } else {
-        scale = (float)target_size / h;
-        h = target_size;
-        w = w * scale;
-    }
-
-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
-
-    // pad to target_size rectangle
-    float wpad = 320-w;//(w + 31) / 32 * 32 - w;
-    float hpad = 320-h;//(h + 31) / 32 * 32 - h;
-    ncnn::Mat in_pad;
-    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
-
-    in_pad.substract_mean_normalize(mean_vals, norm_vals);
-
-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-    ex.input("input.1", in_pad);
-
-    std::vector<ov::ObjectInfo> proposals;
-    // stride 8
-    {
-        ncnn::Mat cls_pred;
-        ncnn::Mat dis_pred;
-        ex.extract("cls_pred_stride_8", cls_pred);
-        ex.extract("dis_pred_stride_8", dis_pred);
-
-        std::vector<ov::ObjectInfo> objects8;
-        generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
-
-        proposals.insert(proposals.end(), objects8.begin(), objects8.end());
-    }
-
-    // stride 16
-    {
-        ncnn::Mat cls_pred;
-        ncnn::Mat dis_pred;
-        ex.extract("cls_pred_stride_16", cls_pred);
-        ex.extract("dis_pred_stride_16", dis_pred);
-
-        std::vector<ov::ObjectInfo> objects16;
-        generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
-
-        proposals.insert(proposals.end(), objects16.begin(), objects16.end());
-    }
-
-    // stride 32
-    {
-        ncnn::Mat cls_pred;
-        ncnn::Mat dis_pred;
-        ex.extract("cls_pred_stride_32", cls_pred);
-        ex.extract("dis_pred_stride_32", dis_pred);
-
-        std::vector<ov::ObjectInfo> objects32;
-        generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
-
-        proposals.insert(proposals.end(), objects32.begin(), objects32.end());
-    }
-
-    // sort all proposals by score from highest to lowest
-    qsort_descent_inplace(proposals);
-
-    // apply nms with nms_threshold
-    std::vector<int> picked;
-    nms_sorted_bboxes(proposals, picked, nms_threshold);
-
-    int count = picked.size();
-    rois.resize(count);
-
-    for (int i = 0; i < count; i++)
-    {
-        ov::ObjectInfo roi = proposals[picked[i]];
-
-        // adjust offset to original unpadded
-        float x0 = (roi.rect.x - (wpad / 2)) / scale;
-        float y0 = (roi.rect.y - (hpad / 2)) / scale;
-        float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
-        float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
-
-        // clip
-        x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
-        y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
-        x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
-        y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
-
-        roi.rect.x = x0;
-        roi.rect.y = y0;
-        roi.rect.width = x1 - x0;
-        roi.rect.height = y1 - y0;
-
-        rois[i] = roi;
-    }
-    // sort objects by area
-    struct
-    {
-        bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
+      if (score >= prob_threshold) {
+        ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx));
        {
-            return a.rect.area() > b.rect.area();
+          ncnn::Layer *softmax = ncnn::create_layer("Softmax");
+
+          ncnn::ParamDict pd;
+          pd.set(0, 1); // axis
+          pd.set(1, 1);
+          softmax->load_param(pd);
+
+          ncnn::Option opt;
+          opt.num_threads = 1;
+          opt.use_packing_layout = false;
+
+          softmax->create_pipeline(opt);
+
+          softmax->forward_inplace(bbox_pred, opt);
+
+          softmax->destroy_pipeline(opt);
+
+          delete softmax;
        }
-    } objects_area_greater;
-    std::sort(rois.begin(), rois.end(), objects_area_greater);
-    return 0;
+
+        float pred_ltrb[4];
+        for (int k = 0; k < 4; k++) {
+          float dis = 0.f;
+          const float *dis_after_sm = bbox_pred.row(k);
+          for (int l = 0; l < reg_max_1; l++) {
+            dis += l * dis_after_sm[l];
+          }
+          pred_ltrb[k] = dis * stride;
+        }
+
+        float pb_cx = (j + 0.5f) * stride;
+        float pb_cy = (i + 0.5f) * stride;
+
+        float x0 = pb_cx - pred_ltrb[0];
+        float y0 = pb_cy - pred_ltrb[1];
+        float x1 = pb_cx + pred_ltrb[2];
+        float y1 = pb_cy + pred_ltrb[3];
+
+        ov::ObjectInfo obj;
+        obj.rect.x = x0;
+        obj.rect.y = y0;
+        obj.rect.width = x1 - x0;
+        obj.rect.height = y1 - y0;
+        obj.label = label;
+        obj.score = score;
+
+        objects.push_back(obj);
+      }
+    }
+  }
 }
+
+int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
+                    std::vector<ov::ObjectInfo> &rois) {
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+
+  int w = img_width;
+  int h = img_height;
+  float scale = 1.f;
+  if (w > h) {
+    scale = (float)target_size / w;
+    w = target_size;
+    h = h * scale;
+  } else {
+    scale = (float)target_size / h;
+    h = target_size;
+    w = w * scale;
+  }
+
+  ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
+                                               img_width, img_height, w, h);
+
+  // pad to target_size rectangle
+  float wpad = 320 - w; //(w + 31) / 32 * 32 - w;
+  float hpad = 320 - h; //(h + 31) / 32 * 32 - h;
+  ncnn::Mat in_pad;
+  ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
+                         wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
+
+  in_pad.substract_mean_normalize(mean_vals, norm_vals);
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("input.1", in_pad);
+
+  std::vector<ov::ObjectInfo> proposals;
+  // stride 8
+  {
+    ncnn::Mat cls_pred;
+    ncnn::Mat dis_pred;
+    ex.extract("cls_pred_stride_8", cls_pred);
+    ex.extract("dis_pred_stride_8", dis_pred);
+
+    std::vector<ov::ObjectInfo> objects8;
+    generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold,
+                               objects8);
+
+    proposals.insert(proposals.end(), objects8.begin(), objects8.end());
+  }
+
+  // stride 16
+  {
+    ncnn::Mat cls_pred;
+    ncnn::Mat dis_pred;
+    ex.extract("cls_pred_stride_16", cls_pred);
+    ex.extract("dis_pred_stride_16", dis_pred);
+
+    std::vector<ov::ObjectInfo> objects16;
+    generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold,
+                               objects16);
+
+    proposals.insert(proposals.end(), objects16.begin(), objects16.end());
+  }
+
+  // stride 32
+  {
+    ncnn::Mat cls_pred;
+    ncnn::Mat dis_pred;
+    ex.extract("cls_pred_stride_32", cls_pred);
+    ex.extract("dis_pred_stride_32", dis_pred);
+
+    std::vector<ov::ObjectInfo> objects32;
+    generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold,
+                               objects32);
+
+    proposals.insert(proposals.end(), objects32.begin(), objects32.end());
+  }
+
+  // sort all proposals by score from highest to lowest
+  qsort_descent_inplace(proposals);
+
+  // apply nms with nms_threshold
+  std::vector<int> picked;
+  nms_sorted_bboxes(proposals, picked, nms_threshold);
+
+  int count = picked.size();
+  rois.resize(count);
+
+  for (int i = 0; i < count; i++) {
+    ov::ObjectInfo roi = proposals[picked[i]];
+
+    // adjust offset to original unpadded
+    float x0 = (roi.rect.x - (wpad / 2)) / scale;
+    float y0 = (roi.rect.y - (hpad / 2)) / scale;
+    float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
+    float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
+
+    // clip
+    x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
+    y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
+    x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
+    y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
+
+    roi.rect.x = x0;
+    roi.rect.y = y0;
+    roi.rect.width = x1 - x0;
+    roi.rect.height = y1 - y0;
+
+    rois[i] = roi;
+  }
+  // sort objects by area
+  struct {
+    bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const {
+      return a.rect.area() > b.rect.area();
+    }
+  } objects_area_greater;
+  std::sort(rois.begin(), rois.end(), objects_area_greater);
+  return 0;
 }
+} // namespace ovhand
--- a/src/hand/pose3d.h
+++ b/src/hand/pose3d.h
@@ -0,0 +1,37 @@
+#ifndef _HAND_POSE3D_C_H_
+#define _HAND_POSE3D_C_H_
+
+#include "../common/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef struct PalmObject {
+  float score;
+  float rotation;
+  Point2fVector *rect;
+  Point2fVector *landmarks;
+  Point2fVector *skeleton;
+  Point3dVector *skeleton3d;
+} PalmObject;
+
+typedef struct PalmObjectVector {
+  PalmObject *items;
+  int length;
+} PalmObjectVector;
+
+void FreePalmObject(PalmObject *obj);
+void FreePalmObjectVector(PalmObjectVector *vec);
+
+typedef void *IHandPose3DEstimator;
+IHandPose3DEstimator new_mediapipe_hand();
+void destroy_mediapipe_hand(IHandPose3DEstimator d);
+int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
+                              const char *hand_path);
+int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
+                          int img_width, int img_height, PalmObjectVector *vec);
+#ifdef __cplusplus
+}
+#endif
+
+#endif // !_HAND_POSE3D_C_H_
--- a/src/hand/pose3d/estimator.cpp
+++ b/src/hand/pose3d/estimator.cpp
@@ -0,0 +1,106 @@
+#include "../pose3d.h"
+#include "mediapipe/mediapipe.hpp"
+#include <iostream>
+
+void FreePalmObject(PalmObject *obj) {
+  if (obj->rect != NULL) {
+    FreePoint2fVector(obj->rect);
+    obj->rect = NULL;
+  }
+  if (obj->skeleton != NULL) {
+    FreePoint2fVector(obj->skeleton);
+    obj->skeleton = NULL;
+  }
+  if (obj->skeleton3d != NULL) {
+    FreePoint3dVector(obj->skeleton3d);
+    obj->skeleton3d = NULL;
+  }
+  if (obj->landmarks != NULL) {
+    FreePoint2fVector(obj->landmarks);
+    obj->landmarks = NULL;
+  }
+}
+
+void FreePalmObjectVector(PalmObjectVector *vec) {
+  if (vec->items != NULL) {
+    for (int i = 0; i < vec->length; i++) {
+      FreePalmObject(&vec->items[i]);
+    }
+    free(vec->items);
+    vec->items = NULL;
+  }
+}
+
+IHandPose3DEstimator new_mediapipe_hand() {
+  return new ovhand3d::MediapipeHand();
+}
+
+void destroy_mediapipe_hand(IHandPose3DEstimator d) {
+  delete static_cast<ovhand3d::MediapipeHand *>(d);
+}
+
+int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
+                              const char *hand_path) {
+  return static_cast<ovhand3d::MediapipeHand *>(d)->LoadModel(palm_path,
+                                                              hand_path);
+}
+
+int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
+                          int img_width, int img_height,
+                          PalmObjectVector *objects) {
+  std::vector<ovhand3d::PalmObject> objs;
+
+  int ret = static_cast<ovhand3d::MediapipeHand *>(d)->Detect(
+      rgbdata, img_width, img_height, objs);
+  if (ret != 0) {
+    return ret;
+  }
+  const size_t total_objs = objs.size();
+  objects->length = total_objs;
+  if (total_objs == 0) {
+    objects->items = NULL;
+    return 0;
+  }
+  objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject));
+  for (size_t i = 0; i < total_objs; ++i) {
+    objects->items[i].score = objs[i].score;
+    objects->items[i].rotation = objs[i].rotation;
+    objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector));
+    objects->items[i].rect->length = 4;
+    objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f));
+    for (size_t j = 0; j < 4; ++j) {
+      objects->items[i].rect->points[j] = objs[i].hand_pos[j];
+    }
+    objects->items[i].landmarks =
+        (Point2fVector *)malloc(sizeof(Point2fVector));
+    objects->items[i].landmarks->length = 7;
+    objects->items[i].landmarks->points =
+        (Point2f *)malloc(4 * sizeof(Point2f));
+    for (size_t j = 0; j < 7; ++j) {
+      objects->items[i].landmarks->points[j] = objs[i].landmarks[j];
+    }
+    const size_t total_skeleton = objs[i].skeleton.size();
+    if (total_skeleton == 0) {
+      objects->items[i].skeleton = NULL;
+      objects->items[i].skeleton3d = NULL;
+      continue;
+    }
+    objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector));
+    objects->items[i].skeleton->length = total_skeleton;
+    objects->items[i].skeleton->points =
+        (Point2f *)malloc(total_skeleton * sizeof(Point2f));
+    objects->items[i].skeleton3d =
+        (Point3dVector *)malloc(sizeof(Point3dVector));
+    objects->items[i].skeleton3d->length = total_skeleton;
+    objects->items[i].skeleton3d->points =
+        (Point3d *)malloc(total_skeleton * sizeof(Point3d));
+    for (size_t j = 0; j < total_skeleton; ++j) {
+      objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x;
+      objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y;
+      objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x;
+      objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y;
+      objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z;
+    }
+  }
+  return 0;
+}
--- a/src/hand/pose3d/mediapipe/mediapipe.cpp
+++ b/src/hand/pose3d/mediapipe/mediapipe.cpp
@@ -0,0 +1,534 @@
+#include "mediapipe.hpp"
+#include "mat.h"
+#include <math.h>
+
+namespace ovhand3d {
+
+static float calculate_scale(float min_scale, float max_scale, int stride_index,
+                             int num_strides) {
+  if (num_strides == 1)
+    return (min_scale + max_scale) * 0.5f;
+  else
+    return min_scale +
+           (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
+}
+
+static void generate_anchors(std::vector<Anchor> &anchors,
+                             const AnchorsParams &anchor_params) {
+  int layer_id = 0;
+  for (int layer_id = 0; layer_id < anchor_params.strides.size();) {
+    std::vector<float> anchor_height;
+    std::vector<float> anchor_width;
+    std::vector<float> aspect_ratios;
+    std::vector<float> scales;
+
+    int last_same_stride_layer = layer_id;
+    while (last_same_stride_layer < (int)anchor_params.strides.size() &&
+           anchor_params.strides[last_same_stride_layer] ==
+               anchor_params.strides[layer_id]) {
+      const float scale =
+          calculate_scale(anchor_params.min_scale, anchor_params.max_scale,
+                          last_same_stride_layer, anchor_params.strides.size());
+      {
+        for (int aspect_ratio_id = 0;
+             aspect_ratio_id < (int)anchor_params.aspect_ratios.size();
+             aspect_ratio_id++) {
+          aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]);
+          scales.push_back(scale);
+        }
+
+        const float scale_next =
+            last_same_stride_layer == (int)anchor_params.strides.size() - 1
+                ? 1.0f
+                : calculate_scale(
+                      anchor_params.min_scale, anchor_params.max_scale,
+                      last_same_stride_layer + 1, anchor_params.strides.size());
+        scales.push_back(sqrt(scale * scale_next));
+        aspect_ratios.push_back(1.0);
+      }
+      last_same_stride_layer++;
+    }
+
+    for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
+      const float ratio_sqrts = sqrt(aspect_ratios[i]);
+      anchor_height.push_back(scales[i] / ratio_sqrts);
+      anchor_width.push_back(scales[i] * ratio_sqrts);
+    }
+
+    int feature_map_height = 0;
+    int feature_map_width = 0;
+    const int stride = anchor_params.strides[layer_id];
+    feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride);
+    feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride);
+
+    for (int y = 0; y < feature_map_height; ++y) {
+      for (int x = 0; x < feature_map_width; ++x) {
+        for (int anchor_id = 0; anchor_id < (int)anchor_height.size();
+             ++anchor_id) {
+          const float x_center =
+              (x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width;
+          const float y_center =
+              (y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height;
+
+          Anchor new_anchor;
+          new_anchor.x_center = x_center;
+          new_anchor.y_center = y_center;
+
+          new_anchor.w = 1.0f;
+          new_anchor.h = 1.0f;
+
+          anchors.push_back(new_anchor);
+        }
+      }
+    }
+    layer_id = last_same_stride_layer;
+  }
+}
+
+static void create_ssd_anchors(int input_w, int input_h,
+                               std::vector<Anchor> &anchors) {
+  AnchorsParams anchor_options;
+  anchor_options.num_layers = 4;
+  anchor_options.min_scale = 0.1484375;
+  anchor_options.max_scale = 0.75;
+  anchor_options.input_size_height = 192;
+  anchor_options.input_size_width = 192;
+  anchor_options.anchor_offset_x = 0.5f;
+  anchor_options.anchor_offset_y = 0.5f;
+  anchor_options.strides.push_back(8);
+  anchor_options.strides.push_back(16);
+  anchor_options.strides.push_back(16);
+  anchor_options.strides.push_back(16);
+  anchor_options.aspect_ratios.push_back(1.0);
+  generate_anchors(anchors, anchor_options);
+}
+
+static int decode_bounds(std::list<DetectRegion> &region_list,
+                         float score_thresh, int input_img_w, int input_img_h,
+                         float *scores_ptr, float *bboxes_ptr,
+                         std::vector<Anchor> &anchors) {
+  DetectRegion region;
+  int i = 0;
+  for (auto &anchor : anchors) {
+    float score = ov::sigmoid(scores_ptr[i]);
+
+    if (score > score_thresh) {
+      float *p = bboxes_ptr + (i * 18);
+
+      float cx = p[0] / input_img_w + anchor.x_center;
+      float cy = p[1] / input_img_h + anchor.y_center;
+      float w = p[2] / input_img_w;
+      float h = p[3] / input_img_h;
+
+      ov::Point2f topleft, btmright;
+      topleft.x = cx - w * 0.5f;
+      topleft.y = cy - h * 0.5f;
+      btmright.x = cx + w * 0.5f;
+      btmright.y = cy + h * 0.5f;
+
+      region.score = score;
+      region.topleft = topleft;
+      region.btmright = btmright;
+
+      for (int j = 0; j < 7; j++) {
+        float lx = p[4 + (2 * j) + 0];
+        float ly = p[4 + (2 * j) + 1];
+        lx += anchor.x_center * input_img_w;
+        ly += anchor.y_center * input_img_h;
+        lx /= (float)input_img_w;
+        ly /= (float)input_img_h;
+
+        region.landmarks[j].x = lx;
+        region.landmarks[j].y = ly;
+      }
+
+      region_list.push_back(region);
+    }
+    i++;
+  }
+  return 0;
+}
+
+static float calc_intersection_over_union(DetectRegion &region0,
+                                          DetectRegion &region1) {
+  float sx0 = region0.topleft.x;
+  float sy0 = region0.topleft.y;
+  float ex0 = region0.btmright.x;
+  float ey0 = region0.btmright.y;
+  float sx1 = region1.topleft.x;
+  float sy1 = region1.topleft.y;
+  float ex1 = region1.btmright.x;
+  float ey1 = region1.btmright.y;
+
+  float xmin0 = std::min(sx0, ex0);
+  float ymin0 = std::min(sy0, ey0);
+  float xmax0 = std::max(sx0, ex0);
+  float ymax0 = std::max(sy0, ey0);
+  float xmin1 = std::min(sx1, ex1);
+  float ymin1 = std::min(sy1, ey1);
+  float xmax1 = std::max(sx1, ex1);
+  float ymax1 = std::max(sy1, ey1);
+
+  float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
+  float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
+  if (area0 <= 0 || area1 <= 0)
+    return 0.0f;
+
+  float intersect_xmin = std::max(xmin0, xmin1);
+  float intersect_ymin = std::max(ymin0, ymin1);
+  float intersect_xmax = std::min(xmax0, xmax1);
+  float intersect_ymax = std::min(ymax0, ymax1);
+
+  float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
+                         std::max(intersect_xmax - intersect_xmin, 0.0f);
+
+  return intersect_area / (area0 + area1 - intersect_area);
+}
+
+static int non_max_suppression(std::list<DetectRegion> &region_list,
+                               std::list<DetectRegion> &region_nms_list,
+                               float iou_thresh) {
+  region_list.sort([](DetectRegion &v1, DetectRegion &v2) {
+    return v1.score > v2.score ? true : false;
+  });
+
+  for (auto itr = region_list.begin(); itr != region_list.end(); itr++) {
+    DetectRegion region_candidate = *itr;
+
+    int ignore_candidate = false;
+    for (auto itr_nms = region_nms_list.rbegin();
+         itr_nms != region_nms_list.rend(); itr_nms++) {
+      DetectRegion region_nms = *itr_nms;
+
+      float iou = calc_intersection_over_union(region_candidate, region_nms);
+      if (iou >= iou_thresh) {
+        ignore_candidate = true;
+        break;
+      }
+    }
+
+    if (!ignore_candidate) {
+      region_nms_list.push_back(region_candidate);
+      if (region_nms_list.size() >= 5)
+        break;
+    }
+  }
+  return 0;
+}
+
+static float normalize_radians(float angle) {
+  return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
+}
+
+static void compute_rotation(DetectRegion &region) {
+  float x0 = region.landmarks[0].x;
+  float y0 = region.landmarks[0].y;
+  float x1 = region.landmarks[2].x;
+  float y1 = region.landmarks[2].y;
+
+  float target_angle = M_PI * 0.5f;
+  float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
+
+  region.rotation = normalize_radians(rotation);
+}
+
+void rot_vec(ov::Point2f &vec, float rotation) {
+  float sx = vec.x;
+  float sy = vec.y;
+  vec.x = sx * cos(rotation) - sy * sin(rotation);
+  vec.y = sx * sin(rotation) + sy * cos(rotation);
+}
+
+void compute_detect_to_roi(DetectRegion &region, const int &target_size,
+                           PalmObject &palm) {
+  float width = region.btmright.x - region.topleft.x;
+  float height = region.btmright.y - region.topleft.y;
+  float palm_cx = region.topleft.x + width * 0.5f;
+  float palm_cy = region.topleft.y + height * 0.5f;
+
+  float hand_cx;
+  float hand_cy;
+  float rotation = region.rotation;
+  float shift_x = 0.0f;
+  float shift_y = -0.5f;
+
+  if (rotation == 0.0f) {
+    hand_cx = palm_cx + (width * shift_x);
+    hand_cy = palm_cy + (height * shift_y);
+  } else {
+    float dx =
+        (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
+    float dy =
+        (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
+    hand_cx = palm_cx + dx;
+    hand_cy = palm_cy + dy;
+  }
+
+  float long_side = std::max(width, height);
+  width = long_side;
+  height = long_side;
+  float hand_w = width * 2.6f;
+  float hand_h = height * 2.6f;
+
+  palm.hand_cx = hand_cx;
+  palm.hand_cy = hand_cy;
+  palm.hand_w = hand_w;
+  palm.hand_h = hand_h;
+
+  float dx = hand_w * 0.5f;
+  float dy = hand_h * 0.5f;
+
+  palm.hand_pos[0].x = -dx;
+  palm.hand_pos[0].y = -dy;
+  palm.hand_pos[1].x = +dx;
+  palm.hand_pos[1].y = -dy;
+  palm.hand_pos[2].x = +dx;
+  palm.hand_pos[2].y = +dy;
+  palm.hand_pos[3].x = -dx;
+  palm.hand_pos[3].y = +dy;
+
+  for (int i = 0; i < 4; i++) {
+    rot_vec(palm.hand_pos[i], rotation);
+    palm.hand_pos[i].x += hand_cx;
+    palm.hand_pos[i].y += hand_cy;
+  }
+
+  for (int i = 0; i < 7; i++) {
+    palm.landmarks[i] = region.landmarks[i];
+  }
+
+  palm.score = region.score;
+}
+
+static void pack_detect_result(std::vector<DetectRegion> &detect_results,
+                               std::list<DetectRegion> &region_list,
+                               const int &target_size,
+                               std::vector<PalmObject> &palmlist) {
+  for (auto &region : region_list) {
+    compute_rotation(region);
+    PalmObject palm;
+    compute_detect_to_roi(region, target_size, palm);
+    palmlist.push_back(palm);
+    detect_results.push_back(region);
+  }
+}
+
+MediapipeHand::MediapipeHand() : ov::EstimatorBase() {
+  palm_blob_allocator_.set_size_compare_ratio(0.f);
+  palm_workspace_allocator_.set_size_compare_ratio(0.f);
+  hand_blob_allocator_.set_size_compare_ratio(0.f);
+  hand_workspace_allocator_.set_size_compare_ratio(0.f);
+  palm_net_ = new ncnn::Net();
+  hand_net_ = new ncnn::Net();
+  initialized_ = false;
+  if (num_threads > 0) {
+    palm_net_->opt.num_threads = num_threads;
+    hand_net_->opt.num_threads = num_threads;
+  }
+  palm_net_->opt.blob_allocator = &palm_blob_allocator_;
+  palm_net_->opt.workspace_allocator = &palm_workspace_allocator_;
+  palm_net_->opt.lightmode = light_mode_;
+  hand_net_->opt.blob_allocator = &hand_blob_allocator_;
+  hand_net_->opt.workspace_allocator = &hand_workspace_allocator_;
+  hand_net_->opt.lightmode = light_mode_;
+#ifdef OV_VULKAN
+  palm_net_->opt.use_vulkan_compute = true;
+  hand_net_->opt.use_vulkan_compute = true;
+#endif // OV_VULKAN
+}
+
+MediapipeHand::~MediapipeHand() {
+  if (palm_net_) {
+    palm_net_->clear();
+  }
+  if (hand_net_) {
+    hand_net_->clear();
+  }
+  palm_workspace_allocator_.clear();
+  palm_blob_allocator_.clear();
+  hand_workspace_allocator_.clear();
+  hand_blob_allocator_.clear();
+}
+
+void MediapipeHand::set_num_threads(int n) {
+  EstimatorBase::set_num_threads(n);
+  if (palm_net_) {
+    palm_net_->opt.num_threads = n;
+  }
+  if (hand_net_) {
+    hand_net_->opt.num_threads = n;
+  }
+}
+
+void MediapipeHand::set_light_mode(bool mode) {
+  if (palm_net_) {
+    palm_net_->opt.lightmode = mode;
+  }
+  if (hand_net_) {
+    hand_net_->opt.lightmode = mode;
+  }
+  light_mode_ = mode;
+}
+
+int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) {
+  std::string palm_param_file = std::string(palm_path) + "/param";
+  std::string palm_bin_file = std::string(palm_path) + "/bin";
+  std::string hand_param_file = std::string(hand_path) + "/param";
+  std::string hand_bin_file = std::string(hand_path) + "/bin";
+  if (palm_net_->load_param(palm_param_file.c_str()) == -1 ||
+      palm_net_->load_model(palm_bin_file.c_str()) == -1) {
+    return 10000;
+  }
+  if (hand_net_->load_param(hand_param_file.c_str()) == -1 ||
+      hand_net_->load_model(hand_bin_file.c_str()) == -1) {
+    return 10000;
+  }
+
+  initialized_ = true;
+  anchors.clear();
+  create_ssd_anchors(target_size, target_size, anchors);
+
+  return 0;
+}
+
+int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width,
+                          int img_height, std::vector<PalmObject> &objects) {
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  int w = img_width;
+  int h = img_height;
+  float scale = 1.f;
+  if (w > h) {
+    scale = (float)target_size / w;
+    w = target_size;
+    h = h * scale;
+  } else {
+    scale = (float)target_size / h;
+    h = target_size;
+    w = w * scale;
+  }
+
+  ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
+                                               img_width, img_height, w, h);
+
+  int wpad = target_size - w;
+  int hpad = target_size - h;
+  ncnn::Mat in_pad;
+  ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
+                         wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
+  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+  in_pad.substract_mean_normalize(0, norm_vals);
+
+  ncnn::Extractor ex = palm_net_->create_extractor();
+  ncnn::Mat cls, reg;
+  ex.input("input", in_pad);
+  ex.extract("cls", cls);
+  ex.extract("reg", reg);
+
+  float *scores = (float *)cls.data;
+  float *bboxes = (float *)reg.data;
+
+  std::list<DetectRegion> region_list, region_nms_list;
+  std::vector<DetectRegion> detect_results;
+
+  decode_bounds(region_list, prob_threshold, target_size, target_size, scores,
+                bboxes, anchors);
+  non_max_suppression(region_list, region_nms_list, nms_threshold);
+  objects.clear();
+  pack_detect_result(detect_results, region_nms_list, target_size, objects);
+
+  for (int i = 0; i < objects.size(); i++) {
+    objects[i].hand_pos[0].x =
+        (objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale;
+    objects[i].hand_pos[0].y =
+        (objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale;
+    objects[i].hand_pos[1].x =
+        (objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale;
+    objects[i].hand_pos[1].y =
+        (objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale;
+    objects[i].hand_pos[2].x =
+        (objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale;
+    objects[i].hand_pos[2].y =
+        (objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale;
+    objects[i].hand_pos[3].x =
+        (objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale;
+    objects[i].hand_pos[3].y =
+        (objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale;
+
+    for (int j = 0; j < 7; j++) {
+      objects[i].landmarks[j].x =
+          (objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale;
+      objects[i].landmarks[j].y =
+          (objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale;
+    }
+
+    const float srcPts[8] = {
+        objects[i].hand_pos[0].x, objects[i].hand_pos[0].y,
+        objects[i].hand_pos[1].x, objects[i].hand_pos[1].y,
+        objects[i].hand_pos[2].x, objects[i].hand_pos[2].y,
+        objects[i].hand_pos[3].x, objects[i].hand_pos[3].y,
+    };
+
+    const float dstPts[8] = {
+        0, 0, 224, 0, 224, 224, 0, 224,
+    };
+
+    float tm[6];
+    unsigned char *trans_mat =
+        (unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char));
+    ncnn::get_affine_transform(dstPts, srcPts, 4, tm);
+
+    ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224,
+                                 224, tm);
+
+    ncnn::Mat trans_image =
+        ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224);
+
+    float score = GetLandmarks(trans_image, tm, objects[i].skeleton,
+                               objects[i].skeleton3d);
+
+    free(trans_mat);
+  }
+  return 0;
+}
+
+float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6],
+                                  std::vector<ov::Point2f> &skeleton,
+                                  std::vector<ov::Point3d> &skeleton3d) {
+
+  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+  in.substract_mean_normalize(NULL, norm_vals);
+  ncnn::Mat points, score;
+  {
+    ncnn::Extractor ex = hand_net_->create_extractor();
+    ex.input("input", in);
+    ex.extract("points", points);
+    ex.extract("score", score);
+  }
+
+  float *points_data = (float *)points.data;
+  float *score_data = (float *)score.data;
+  for (int i = 0; i < 21; i++) {
+    ov::Point3d pt3d;
+    pt3d.x = points_data[i * 3];
+    pt3d.y = points_data[i * 3 + 1];
+    pt3d.z = points_data[i * 3 + 2];
+
+    ov::Point2f pt;
+    pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2];
+    pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5];
+
+    skeleton.push_back(pt);
+
+    pt3d.x /= 224.f;
+    pt3d.y /= 224.f;
+    skeleton3d.push_back(pt3d);
+  }
+  return score_data[0];
+}
+
+} // namespace ovhand3d
--- a/src/hand/pose3d/mediapipe/mediapipe.hpp
+++ b/src/hand/pose3d/mediapipe/mediapipe.hpp
@@ -0,0 +1,87 @@
+#ifndef _HAND_POSE3D_MEDIAPIPE_H_
+#define _HAND_POSE3D_MEDIAPIPE_H_
+
+#include "../../../common/common.hpp"
+#include <net.h>
+
+namespace ovhand3d {
+
+struct PalmObject {
+  float score;
+  ov::Point2f landmarks[7];
+  float rotation;
+
+  float hand_cx;
+  float hand_cy;
+  float hand_w;
+  float hand_h;
+  ov::Point2f hand_pos[4];
+
+  std::vector<ov::Point2f> skeleton;
+  std::vector<ov::Point3d> skeleton3d;
+};
+
+struct DetectRegion {
+  float score;
+  ov::Point2f topleft;
+  ov::Point2f btmright;
+  ov::Point2f landmarks[7];
+
+  float rotation;
+  ov::Point2f roi_center;
+  ov::Point2f roi_size;
+  ov::Point2f roi_coord[4];
+};
+
+struct Anchor {
+  float x_center, y_center, w, h;
+};
+
+struct AnchorsParams {
+  int input_size_width;
+  int input_size_height;
+
+  float min_scale;
+  float max_scale;
+
+  float anchor_offset_x;
+  float anchor_offset_y;
+
+  int num_layers;
+  std::vector<int> feature_map_width;
+  std::vector<int> feature_map_height;
+  std::vector<int> strides;
+  std::vector<float> aspect_ratios;
+};
+
+class MediapipeHand : public ov::EstimatorBase {
+public:
+  MediapipeHand();
+  ~MediapipeHand();
+  int LoadModel(const char *palm_model, const char *hand_model);
+  int Detect(const unsigned char *rgbdata, int img_width, int img_heidht,
+             std::vector<PalmObject> &objects);
+  float GetLandmarks(ncnn::Mat in, float tm[6],
+                     std::vector<ov::Point2f> &skeleton,
+                     std::vector<ov::Point3d> &skeleton3d);
+  void set_light_mode(bool mode);
+  void set_num_threads(int n);
+
+private:
+  ncnn::Net *palm_net_ = NULL;
+  ncnn::Net *hand_net_ = NULL;
+  ncnn::PoolAllocator palm_workspace_allocator_;
+  ncnn::UnlockedPoolAllocator palm_blob_allocator_;
+  ncnn::PoolAllocator hand_workspace_allocator_;
+  ncnn::UnlockedPoolAllocator hand_blob_allocator_;
+  bool initialized_ = false;
+  bool light_mode_ = true;
+  std::vector<Anchor> anchors;
+  float prob_threshold = 0.55f;
+  float nms_threshold = 0.3f;
+  const int target_size = 192;
+  const float mean_vals[3] = {0.f, 0.f, 0.f};
+  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+};
+} // namespace ovhand3d
+#endif // !_HAND_POSE3D_MEDIAPIPE_H_
--- a/src/pose/estimator/pptinypose/pptinypose.bak
+++ b/src/pose/estimator/pptinypose/pptinypose.bak
@@ -0,0 +1,161 @@
+#include "pptinypose.hpp"
+#include <string>
+
+#ifdef OV_VULKAN
+#include "gpu.h"
+#endif // OV_VULKAN
+
+namespace ovpose {
+static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob,
+                  std::vector<float> &prob) {
+  int size = bottom_blob.total();
+  const float *ptr = bottom_blob;
+  std::vector<std::pair<float, int>> vec;
+  vec.resize(size);
+  for (int i = 0; i < size; i++) {
+    vec[i] = std::make_pair(ptr[i], i);
+  }
+  top_blob.create(bottom_blob.c, 1, 1, 4u);
+  float *outptr = top_blob;
+
+  for (size_t i = 0; i < bottom_blob.c; i++) {
+    int size0 = bottom_blob.channel(i).total();
+    std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1),
+                      vec.begin() + size0 * (i + 1),
+                      std::greater<std::pair<float, int>>());
+    outptr[i] = vec[size0 * i].second - size0 * i;
+    prob.push_back(vec[size0 * i].first);
+  }
+
+  return 0;
+}
+
+static void dark_parse(const ncnn::Mat &heatmap, std::vector<int> &dim,
+                       std::vector<float> &coords, int px, int py, int ch) {
+  /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
+  Representation for Human Pose Estimation (CVPR 2020).
+  1) offset = - hassian.inv() * derivative
+  2) dx = (heatmap[x+1] - heatmap[x-1])/2.
+  3) dxx = (dx[x+1] - dx[x-1])/2.
+  4) derivative = Mat([dx, dy])
+  5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
+  */
+
+  float *heatmap_data = (float *)heatmap.channel(ch).data;
+  std::vector<float> heatmap_ch;
+  heatmap_ch.insert(heatmap_ch.begin(), heatmap_data,
+                    heatmap_data + heatmap.channel(ch).total());
+  cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
+  heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
+  cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
+  heatmap_mat = heatmap_mat.reshape(1, 1);
+  heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
+
+  ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]);
+  heatmap_mat = heatmap_mat.reshape(1);
+  heatmap_ch = (float *)heatmap_mat.data;
+
+  float epsilon = 1e-10;
+  // sample heatmap to get values in around target location
+  float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
+  float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
+  float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
+
+  float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
+  float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
+  float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
+  float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
+  float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
+  float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
+  float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
+  float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
+  float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
+  float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
+
+  // compute dx/dy and dxx/dyy with sampled values
+  float dx = 0.5 * (xr - xl);
+  float dy = 0.5 * (yu - yd);
+  float dxx = 0.25 * (xr2 - 2 * xy + xl2);
+  float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
+  float dyy = 0.25 * (yu2 - 2 * xy + yd2);
+
+  // finally get offset by derivative and hassian, which combined by dx/dy and
+  // dxx/dyy
+  if (dxx * dyy - dxy * dxy != 0) {
+    float M[2][2] = {dxx, dxy, dxy, dyy};
+    float D[2] = {dx, dy};
+    cv::Mat hassian(2, 2, CV_32F, M);
+    cv::Mat derivative(2, 1, CV_32F, D);
+    cv::Mat offset = -hassian.inv() * derivative;
+    coords[ch * 2] += offset.at<float>(0, 0);
+    coords[ch * 2 + 1] += offset.at<float>(1, 0);
+  }
+}
+
+static std::vector<float> get_final_preds(const ncnn::Mat &heatmap,
+                                          const ncnn::Mat &argmax_out) {
+  std::vector<float> coords((size_t)heatmap.c * 2);
+  for (int i = 0; i < heatmap.c; i++) {
+    int idx = argmax_out[i];
+    coords[i * 2] = idx % heatmap.w;
+    coords[i * 2 + 1] = (float)idx / heatmap.w;
+
+    int px = int(coords[i * 2] + 0.5);
+    int py = int(coords[i * 2 + 1] + 0.5);
+
+    std::vector<int> dim({1, heatmap.c, heatmap.h, heatmap.w});
+    dark_parse(heatmap, dim, coords, px, py, i);
+  }
+
+  return coords;
+}
+PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() {
+  if (target_size == 128) {
+    target_width_ = 96;
+    target_height_ = 128;
+  } else {
+    target_width_ = 196;
+    target_height_ = 256;
+  }
+}
+
+int PPTinyPoseEstimator::ExtractKeypoints(
+    const unsigned char *rgbdata, int img_width, int img_height,
+    const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  keypoints->clear();
+
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
+      rect.width, rect.height, target_width_, target_height_);
+  in.substract_mean_normalize(meanVals, normVals);
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("image", in);
+  ncnn::Mat out;
+  ex.extract("save_infer_model/scale_0.tmp_1", out);
+
+  ncnn::Mat argmax_out;
+  std::vector<float> probs;
+  argmax(out, argmax_out, probs);
+  std::vector<float> coords = get_final_preds(out, argmax_out);
+
+  for (int i = 0; i < coords.size() / 2; i++) {
+    ov::KeyPoint keypoint;
+    keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x,
+                           coords[i * 2 + 1] * rect.h / (float)out.h + rect.y);
+    keypoint.score = probs[i];
+    keypoints->push_back(keypoint);
+  }
+
+  return 0;
+}
+
+} // namespace ovpose
--- a/src/pose/estimator/pptinypose/pptinypose.hpp
+++ b/src/pose/estimator/pptinypose/pptinypose.hpp
@@ -0,0 +1,25 @@
+#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_
+#define _POSE_PPTINYPOSE_ESTIMATOR_H_
+
+#include "../estimator.hpp"
+#include "net.h"
+#include <vector>
+
+namespace ovpose {
+class PPTinyPoseEstimator : public Estimator {
+public:
+  PPTinyPoseEstimator(int target_size);
+  int ExtractKeypoints(const unsigned char *rgbdata, int img_width,
+                       int img_height, const ov::Rect &rect,
+                       std::vector<ov::Keypoint> *keypoints);
+
+private:
+  int target_width_ = 96;
+  int target_height_ = 128;
+  const float meanVals[3] = {123.675f, 116.28f, 103.53f};
+  const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f};
+};
+
+} // namespace ovpose
+
+#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_