feat(hand): add mediapipe hand 3d pose detecter

2025-09-27 01:56:04 +08:00 · 2021-12-13 16:39:50 +08:00
parent 429e30db91
commit 7eab96fa85
25 changed files with 1628 additions and 334 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,5 @@ _testmain.go
 test
 .vim
 dist/
 libtorch/
--- a/README.md
+++ b/README.md
@@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
    - nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
  - pose (for hand pose estimation)
    - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
  - pose3d (for 3d handpose detection)
    - mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing)
 - styletransfer
  - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
 - tracker
--- a/go/common/geometry.go
+++ b/go/common/geometry.go
@@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector {
 // GoPoint2fVector convert C.Point2fVector to []Point
 func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
 	if cVector == nil {
 		return nil
 	}
 	l := int(cVector.length)
 	ret := make([]Point, 0, l)
 	ptr := unsafe.Pointer(cVector.points)
@@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) {
 	C.FreePoint2fVector(c)
 	C.free(unsafe.Pointer(c))
 }
 // Point3d represents a 3dPoint
 type Point3d struct {
 	X float64
 	Y float64
 	Z float64
 }
 // Pt3d returns a New Point3d
 func Pt3d(x, y, z float64) Point3d {
 	return Point3d{x, y, z}
 }
 var ZP3d = Point3d{}
 // GoPoint3d conver C.Point3d to Point3d
 func GoPoint3d(c *C.Point3d) Point3d {
 	return Pt3d(
 		float64(c.x),
 		float64(c.y),
 		float64(c.z),
 	)
 }
 // NewCPoint3dVector retruns C.Point3dVector pointer
 func NewCPoint3dVector() *C.Point3dVector {
 	return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d))
 }
 // GoPoint3dVector convert C.Point3dVector to []Point3d
 func GoPoint3dVector(cVector *C.Point3dVector) []Point3d {
 	if cVector == nil {
 		return nil
 	}
 	l := int(cVector.length)
 	ret := make([]Point3d, 0, l)
 	ptr := unsafe.Pointer(cVector.points)
 	for i := 0; i < l; i++ {
 		cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i))))
 		ret = append(ret, GoPoint3d(cPoint3d))
 	}
 	return ret
 }
 // FreeCPoint3dVector release C.Point3dVector memory
 func FreeCPoint3dVector(c *C.Point3dVector) {
 	C.FreePoint3dVector(c)
 	C.free(unsafe.Pointer(c))
 }
--- a/go/common/objectinfo.go
+++ b/go/common/objectinfo.go
@@ -20,6 +20,8 @@ type ObjectInfo struct {
 	Rect Rectangle
 	// Points keypoints
 	Keypoints []Keypoint
 	// Name
 	Name string
 }
 // GoObjectInfo convert C.ObjectInfo to go type
--- a/go/common/palmobject.go
+++ b/go/common/palmobject.go
@@ -0,0 +1,56 @@
 package common
 /*
 #include <stdlib.h>
 #include <stdbool.h>
 #include "openvision/common/common.h"
 #include "openvision/hand/pose3d.h"
 */
 import "C"
 import (
 	"unsafe"
 )
 // PalmObject
 type PalmObject struct {
 	Score      float64
 	Rotation   float64
 	Rect       []Point
 	Landmarks  []Point
 	Skeleton   []Point
 	Skeleton3d []Point3d
 }
 // NewCPalmObjectVector returns *C.PalmObjectVector
 func NewCPalmObjectVector() *C.PalmObjectVector {
 	return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector))
 }
 // FreeCPalmObjectVector release *C.PalmObjectVector memory
 func FreeCPalmObjectVector(p *C.PalmObjectVector) {
 	C.FreePalmObjectVector(p)
 	C.free(unsafe.Pointer(p))
 }
 // GoPalmObject convert C.PalmObject to Go type
 func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject {
 	return PalmObject{
 		Score:      float64(cObj.score),
 		Rotation:   float64(cObj.rotation),
 		Rect:       GoPoint2fVector(cObj.rect, w, h),
 		Landmarks:  GoPoint2fVector(cObj.landmarks, w, h),
 		Skeleton:   GoPoint2fVector(cObj.skeleton, w, h),
 		Skeleton3d: GoPoint3dVector(cObj.skeleton3d),
 	}
 }
 func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject {
 	l := int(c.length)
 	ret := make([]PalmObject, 0, l)
 	ptr := unsafe.Pointer(c.items)
 	for i := 0; i < l; i++ {
 		cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i))))
 		ret = append(ret, GoPalmObject(cObj, w, h))
 	}
 	return ret
 }
--- a/go/examples/hand/main.go
+++ b/go/examples/hand/main.go
@@ -15,6 +15,7 @@ import (
 	"github.com/bububa/openvision/go/hand/detecter"
 	handdrawer "github.com/bububa/openvision/go/hand/drawer"
 	"github.com/bububa/openvision/go/hand/pose"
 	"github.com/bububa/openvision/go/hand/pose3d"
 )
 func main() {
@@ -27,17 +28,19 @@ func main() {
 	cpuCores := common.GetBigCPUCount()
 	common.SetOMPThreads(cpuCores)
 	log.Printf("CPU big cores:%d\n", cpuCores)
-	estimator := handpose(modelPath)
+	// estimator := handpose(modelPath)
-	defer estimator.Destroy()
+	// defer estimator.Destroy()
-	common.SetEstimatorThreads(estimator, cpuCores)
+	// common.SetEstimatorThreads(estimator, cpuCores)
-	for idx, d := range []detecter.Detecter{
+	// for idx, d := range []detecter.Detecter{
-		yolox(modelPath),
+	// 	yolox(modelPath),
-		nanodet(modelPath),
+	// 	nanodet(modelPath),
-	} {
+	// } {
-		defer d.Destroy()
+	// 	defer d.Destroy()
-		common.SetEstimatorThreads(d, cpuCores)
+	// 	common.SetEstimatorThreads(d, cpuCores)
-		detect(d, estimator, imgPath, "hand1.jpg", idx)
+	// 	detect(d, estimator, imgPath, "hand2.jpg", idx)
-	}
+	// }
 	d3d := mediapipe(modelPath)
 	detect3d(d3d, imgPath, "hand1.jpg")
 }
 func yolox(modelPath string) detecter.Detecter {
@@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator {
 	return d
 }
 func mediapipe(modelPath string) *pose3d.Mediapipe {
 	palmPath := filepath.Join(modelPath, "mediapipe/palm/full")
 	handPath := filepath.Join(modelPath, "mediapipe/hand/full")
 	d := pose3d.NewMediapipe()
 	if err := d.LoadModel(palmPath, handPath); err != nil {
 		log.Fatalln(err)
 	}
 	return d
 }
 func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
 	inPath := filepath.Join(imgPath, filename)
 	imgSrc, err := loadImage(inPath)
@@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
 	if err := saveImage(out, outPath); err != nil {
 		log.Fatalln(err)
 	}
 }
 func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) {
 	inPath := filepath.Join(imgPath, filename)
 	imgSrc, err := loadImage(inPath)
 	if err != nil {
 		log.Fatalln("load image failed,", err)
 	}
 	img := common.NewImage(imgSrc)
 	rois, err := d.Detect(img)
 	if err != nil {
 		log.Fatalln(err)
 	}
 	log.Printf("%+v\n", rois)
 	drawer := handdrawer.New()
 	outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename))
 	out := drawer.DrawPalm(img, rois)
 	if err := saveImage(out, outPath); err != nil {
 		log.Fatalln(err)
 	}
 	for idx, roi := range rois {
 		outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename))
 		out := drawer.DrawPalm3D(roi, 400, "#442519")
 		if err := saveImage(out, outPath); err != nil {
 			log.Fatalln(err)
 		}
 	}
 }
--- a/go/face/tracker/cgo.go
+++ b/go/face/tracker/cgo.go
@@ -1,6 +1,6 @@
 // +build !vulkan
-package eye
+package tracker
 /*
 #cgo CXXFLAGS:   --std=c++11 -fopenmp
--- a/go/hand/drawer/const.go
+++ b/go/hand/drawer/const.go
@@ -7,71 +7,16 @@ import (
 const (
 	// DefaultBorderColor default drawer border color
 	DefaultBorderColor = common.Green
 	// DefaultKeypointColor default drawer keypoint color
 	DefaultKeypointColor = common.Pink
 	// DefaultBorderStrokeWidth default drawer border stroke width
 	DefaultBorderStrokeWidth = 3
 	// DefaultKeypointRadius default drawer keypoint radius
 	DefaultKeypointRadius = 3
 	// DefaultKeypointStrokeWidth default drawer keypoint stroke width
 	DefaultKeypointStrokeWidth = 1
-)
+	// DefaultLabelColor default label color
-
+	DefaultLabelColor = common.White
 // CocoPart coco part define
 type CocoPart = int
 const (
 	// CocoPartNose nose
 	CocoPartNose CocoPart = iota
 	// CocoPartLEye left eye
 	CocoPartLEye
 	// CocoPartREye right eye
 	CocoPartREye
 	// CocoPartLEar left ear
 	CocoPartLEar
 	// CocoPartREar right ear
 	CocoPartREar
 	// CocoPartLShoulder left sholder
 	CocoPartLShoulder
 	// CocoPartRShoulder right sholder
 	CocoPartRShoulder
 	// CocoPartLElbow left elbow
 	CocoPartLElbow
 	// CocoPartRElbow right elbow
 	CocoPartRElbow
 	// CocoPartLWrist left wrist
 	CocoPartLWrist
 	// CocoPartRWrist right wrist
 	CocoPartRWrist
 	// CocoPartLHip left hip
 	CocoPartLHip
 	// CocoPartRHip right hip
 	CocoPartRHip
 	// CocoPartLKnee left knee
 	CocoPartLKnee
 	// CocoPartRKnee right knee
 	CocoPartRKnee
 	// CocoPartRAnkle right ankle
 	CocoPartRAnkle
 	// CocoPartLAnkle left ankle
 	CocoPartLAnkle
 	// CocoPartNeck neck
 	CocoPartNeck
 	// CocoPartBackground background
 	CocoPartBackground
 )
 var (
 	// CocoPair represents joints pair
 	CocoPair = [16][2]CocoPart{
 		{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16},
 	}
 	// CocoColors represents color for coco parts
 	CocoColors = [17]string{
 		"#ff0000", "#ff5500", "#ffaa00", "#ffff00",
 		"#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa",
 		"#00ffff", "#00aaff", "#0055ff",
 		"#0000ff", "#aa00ff", "#ff00ff",
 		"#ff00aa", "#ff0055",
 	}
 )
 var (
--- a/go/hand/drawer/drawer.go
+++ b/go/hand/drawer/drawer.go
@@ -2,8 +2,10 @@ package drawer
 import (
 	"image"
 	"image/color"
 	"github.com/llgcode/draw2d/draw2dimg"
 	"github.com/llgcode/draw2d/draw2dkit"
 	"github.com/bububa/openvision/go/common"
 )
@@ -18,6 +20,12 @@ type Drawer struct {
 	KeypointStrokeWidth float64
 	// KeypointRadius represents keypoints circle radius
 	KeypointRadius float64
 	// KeypointColor represents keypoint color
 	KeypointColor string
 	// LabelColor string
 	LabelColor string
 	// Font
 	Font *common.Font
 }
 // New returns a new Drawer
@@ -27,6 +35,8 @@ func New(options ...Option) *Drawer {
 		BorderStrokeWidth:   DefaultBorderStrokeWidth,
 		KeypointStrokeWidth: DefaultKeypointStrokeWidth,
 		KeypointRadius:      DefaultKeypointRadius,
 		KeypointColor:       DefaultKeypointColor,
 		LabelColor:          DefaultLabelColor,
 	}
 	for _, opt := range options {
 		opt.apply(d)
@@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
 	gc := draw2dimg.NewGraphicContext(out)
 	gc.DrawImage(img)
 	for _, roi := range rois {
 		rect := common.Rect(
 			roi.Rect.X*imgW,
 			roi.Rect.Y*imgH,
 			roi.Rect.Width*imgW,
 			roi.Rect.Height*imgH,
 		)
 		borderColor := d.BorderColor
 		if drawBorder {
 			// draw rect
 			rect := common.Rect(
 				roi.Rect.X*imgW,
 				roi.Rect.Y*imgH,
 				roi.Rect.Width*imgW,
 				roi.Rect.Height*imgH,
 			)
 			borderColor := d.BorderColor
 			common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
 		}
 		l := len(roi.Keypoints)
@@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
 			poseColor := PoseColors[colorIdx]
 			common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
 		}
 		// draw name
 		if roi.Name != "" {
 			common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width)
 		}
 	}
 	return out
 }
 // DrawPalm draw PalmObject
 func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image {
 	imgW := float64(img.Bounds().Dx())
 	imgH := float64(img.Bounds().Dy())
 	out := image.NewRGBA(img.Bounds())
 	gc := draw2dimg.NewGraphicContext(out)
 	gc.DrawImage(img)
 	for _, roi := range rois {
 		gc.SetLineWidth(d.BorderStrokeWidth)
 		gc.SetStrokeColor(common.ColorFromHex(d.BorderColor))
 		gc.BeginPath()
 		for idx, pt := range roi.Rect {
 			gc.MoveTo(pt.X*imgW, pt.Y*imgH)
 			if idx == len(roi.Rect)-1 {
 				gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH)
 			} else {
 				gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH)
 			}
 		}
 		gc.Close()
 		gc.Stroke()
 		l := len(roi.Skeleton)
 		if l == 0 {
 			continue
 		}
 		// draw skeleton
 		for idx := range roi.Skeleton[:l-1] {
 			var (
 				p0        common.Point
 				p1        common.Point
 				poseColor = PoseColors[idx/4]
 			)
 			gc.SetStrokeColor(common.ColorFromHex(poseColor))
 			if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
 				p0 = roi.Skeleton[0]
 				p1 = roi.Skeleton[idx]
 				gc.BeginPath()
 				gc.MoveTo(p0.X*imgW, p0.Y*imgH)
 				gc.LineTo(p1.X*imgW, p1.Y*imgH)
 				gc.Close()
 				gc.Stroke()
 			} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
 				continue
 			}
 			p0 = roi.Skeleton[idx]
 			p1 = roi.Skeleton[idx+1]
 			gc.BeginPath()
 			gc.MoveTo(p0.X*imgW, p0.Y*imgH)
 			gc.LineTo(p1.X*imgW, p1.Y*imgH)
 			gc.Close()
 			gc.Stroke()
 		}
 		for _, pt := range roi.Landmarks {
 			common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth)
 		}
 	}
 	return out
 }
 // DrawPalm3D draw 3d PalmObject
 func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image {
 	out := image.NewRGBA(image.Rect(0, 0, int(size), int(size)))
 	gc := draw2dimg.NewGraphicContext(out)
 	l := len(roi.Skeleton3d)
 	if l == 0 {
 		return out
 	}
 	if bg != "" {
 		bgColor := common.ColorFromHex(bg)
 		gc.SetFillColor(bgColor)
 		draw2dkit.Rectangle(gc, 0, 0, size, size)
 		gc.Fill()
 		gc.SetFillColor(color.Transparent)
 	}
 	// draw skeleton3d
 	for idx := range roi.Skeleton3d[:l-1] {
 		var (
 			p0        common.Point3d
 			p1        common.Point3d
 			poseColor = PoseColors[idx/4]
 		)
 		gc.SetStrokeColor(common.ColorFromHex(poseColor))
 		if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
 			p0 = roi.Skeleton3d[0]
 			p1 = roi.Skeleton3d[idx]
 			gc.BeginPath()
 			gc.MoveTo(p0.X*size, p0.Y*size)
 			gc.LineTo(p1.X*size, p1.Y*size)
 			gc.Close()
 			gc.Stroke()
 		} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
 			continue
 		}
 		p0 = roi.Skeleton3d[idx]
 		p1 = roi.Skeleton3d[idx+1]
 		gc.BeginPath()
 		gc.MoveTo(p0.X*size, p0.Y*size)
 		gc.LineTo(p1.X*size, p1.Y*size)
 		gc.Close()
 		gc.Stroke()
 	}
 	return out
 }
--- a/go/hand/drawer/option.go
+++ b/go/hand/drawer/option.go
@@ -1,5 +1,9 @@
 package drawer
 import (
 	"github.com/bububa/openvision/go/common"
 )
 // Option represents Drawer option interface
 type Option interface {
 	apply(*Drawer)
@@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option {
 		d.KeypointStrokeWidth = w
 	})
 }
 // WithKeypointColor set Drawer KeypointColor
 func WithKeypointColor(color string) Option {
 	return optionFunc(func(d *Drawer) {
 		d.KeypointColor = color
 	})
 }
 // WithFont set Drawer Font
 func WithFont(font *common.Font) Option {
 	return optionFunc(func(d *Drawer) {
 		d.Font = font
 	})
 }
--- a/go/hand/pose3d/cgo.go
+++ b/go/hand/pose3d/cgo.go
@@ -0,0 +1,11 @@
 // +build !vulkan
 package pose3d
 /*
 #cgo CXXFLAGS:   --std=c++11 -fopenmp
 #cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
 #cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
 #cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
 */
 import "C"
--- a/go/hand/pose3d/cgo_vulkan.go
+++ b/go/hand/pose3d/cgo_vulkan.go
@@ -0,0 +1,11 @@
 // +build vulkan
 package pose3d
 /*
 #cgo CXXFLAGS:   --std=c++11 -fopenmp
 #cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
 #cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
 #cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
 */
 import "C"
--- a/go/hand/pose3d/doc.go
+++ b/go/hand/pose3d/doc.go
@@ -0,0 +1,2 @@
 // Package pose hand 3d pose estimator
 package pose3d
--- a/go/hand/pose3d/mediapipe.go
+++ b/go/hand/pose3d/mediapipe.go
@@ -0,0 +1,62 @@
 package pose3d
 /*
 #include <stdlib.h>
 #include <stdbool.h>
 #include "openvision/common/common.h"
 #include "openvision/hand/pose3d.h"
 */
 import "C"
 import (
 	"unsafe"
 	openvision "github.com/bububa/openvision/go"
 	"github.com/bububa/openvision/go/common"
 )
 // Mediapipe represents mediapipe estimator interface
 type Mediapipe struct {
 	d C.IHandPose3DEstimator
 }
 func NewMediapipe() *Mediapipe {
 	return &Mediapipe{
 		d: C.new_mediapipe_hand(),
 	}
 }
 func (m *Mediapipe) Destroy() {
 	C.destroy_mediapipe_hand(m.d)
 }
 func (m *Mediapipe) LoadModel(palmPath string, handPath string) error {
 	cPalm := C.CString(palmPath)
 	defer C.free(unsafe.Pointer(cPalm))
 	cHand := C.CString(handPath)
 	defer C.free(unsafe.Pointer(cHand))
 	retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand)
 	if retCode != 0 {
 		return openvision.LoadModelError(int(retCode))
 	}
 	return nil
 }
 // Detect detect hand 3d pose
 func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) {
 	imgWidth := img.WidthF64()
 	imgHeight := img.HeightF64()
 	data := img.Bytes()
 	cObjs := common.NewCPalmObjectVector()
 	defer common.FreeCPalmObjectVector(cObjs)
 	errCode := C.mediapipe_hand_detect(
 		m.d,
 		(*C.uchar)(unsafe.Pointer(&data[0])),
 		C.int(imgWidth), C.int(imgHeight),
 		(*C.PalmObjectVector)(unsafe.Pointer(cObjs)),
 	)
 	if errCode != 0 {
 		return nil, openvision.DetectHandError(int(errCode))
 	}
 	return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil
 }
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,7 @@ target_include_directories(openvision
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
@@ -109,6 +110,7 @@ file(COPY
 file(COPY
    ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
    ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
    ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h
    DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
 )
--- a/src/common/common.cpp
+++ b/src/common/common.cpp
@@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) {
  }
 }
 void FreePoint3dVector(Point3dVector *p) {
  if (p->points != NULL) {
    free(p->points);
    p->points = NULL;
  }
 }
 void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
  if (p->points == NULL || i >= p->length) {
    return;
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -11,123 +11,134 @@ typedef ov::Size Size;
 typedef ov::Size2f Size2f;
 typedef ov::Point Point;
 typedef ov::Point2f Point2f;
 typedef ov::Point3d Point3d;
 typedef ov::Rect Rect;
 typedef ov::Keypoint Keypoint;
 #else
 // Wrapper for an individual cv::cvSize
 typedef struct Size {
-    int width;
+  int width;
-    int height;
+  int height;
 } Size;
 //
 // Wrapper for an individual cv::cvSize2f
 typedef struct Size2f {
-    int width;
+  int width;
-    int height;
+  int height;
 } Size2f;
 // Wrapper for an individual cv::cvPoint
 typedef struct Point {
-    int x;
+  int x;
-    int y;
+  int y;
 } Point;
 // Wrapper for an individual cv::Point2f
 typedef struct Point2f {
-    float x;
+  float x;
-    float y;
+  float y;
 } Point2f;
 typedef struct Point3d {
  float x;
  float y;
  float z;
 } Point3d;
 // Wrapper for an individual cv::Rect
 typedef struct Rect {
-    int x;
+  int x;
-    int y;
+  int y;
-    int width;
+  int width;
-    int height;
+  int height;
 } Rect;
 typedef struct Keypoint {
-    Point2f p;
+  Point2f p;
-    float score;
+  float score;
-    int id;
+  int id;
 } Keypoint;
 #endif
-typedef void* IEstimator;
+typedef void *IEstimator;
 int get_gpu_count();
 int create_gpu_instance();
 void destroy_gpu_instance();
-int get_big_cpu_count(); 
+int get_big_cpu_count();
 void set_omp_num_threads(int n);
-int load_model(IEstimator e, const char* root_path);
+int load_model(IEstimator e, const char *root_path);
 void destroy_estimator(IEstimator e);
 void set_num_threads(IEstimator e, int n);
 void set_light_mode(IEstimator e, bool mode);
 typedef struct Point2fVector {
-    Point2f* points;
+  Point2f *points;
-    int length;
+  int length;
 } Point2fVector;
 void FreePoint2fVector(Point2fVector *p);
-void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val);
+void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val);
 typedef struct Point3dVector {
  Point3d *points;
  int length;
 } Point3dVector;
 void FreePoint3dVector(Point3dVector *p);
 typedef struct RectVector {
-    Rect* rects;
+  Rect *rects;
-    int length;
+  int length;
 } RectVector;
 void FreeRectVector(RectVector *p);
 typedef struct FloatVector {
-    float* values;
+  float *values;
-    int length;
+  int length;
 } FloatVector;
 void FreeFloatVector(FloatVector *p);
 typedef struct Bytes {
-    unsigned char* values;
+  unsigned char *values;
-    int length;
+  int length;
 } Bytes;
 void FreeBytes(Bytes *p);
 typedef struct KeypointVector {
-    Keypoint* points;
+  Keypoint *points;
-    int length;
+  int length;
 } KeypointVector;
 void FreeKeypointVector(KeypointVector *p);
-void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
+void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val);
 typedef struct ImageC {
-    unsigned char* data;
+  unsigned char *data;
-    int width;
+  int width;
-    int height;
+  int height;
-    int channels;
+  int channels;
 } Image;
-void FreeImage(Image* p);
+void FreeImage(Image *p);
 typedef struct ObjectInfoC {
-    Rect rect;
+  Rect rect;
-    float score;
+  float score;
-    int label;
+  int label;
-    KeypointVector* pts;
+  KeypointVector *pts;
 } ObjectInfo;
 void FreeObjectInfo(ObjectInfo *p);
 typedef struct ObjectInfoVector {
-    ObjectInfo* items;
+  ObjectInfo *items;
-    int length;
+  int length;
 } ObjectInfoVector;
 void FreeObjectInfoVector(ObjectInfoVector *p);
--- a/src/common/common.hpp
+++ b/src/common/common.hpp
@@ -76,6 +76,13 @@ struct Point2f {
  };
 };
 struct Point3d {
  float x;
  float y;
  float z;
  Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {}
 };
 // Wrapper for an individual cv::Rect
 struct Rect {
  int x;
--- a/src/hand/detecter/nanodet/nanodet.cpp
+++ b/src/hand/detecter/nanodet/nanodet.cpp
@@ -1,6 +1,6 @@
 #include "nanodet.hpp"
 #include <string>
 #include <float.h>
 #include <string>
 #ifdef OV_VULKAN
 #include "gpu.h"
@@ -8,227 +8,219 @@
 namespace ovhand {
-static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
+static void generate_nanodet_proposals(const ncnn::Mat &cls_pred,
-{
+                                       const ncnn::Mat &dis_pred, int stride,
                                       const ncnn::Mat &in_pad,
                                       float prob_threshold,
                                       std::vector<ov::ObjectInfo> &objects) {
-    const int num_grid = cls_pred.h;
+  const int num_grid = cls_pred.h;
-    int num_grid_x;
+  int num_grid_x;
-    int num_grid_y;
+  int num_grid_y;
-    if (in_pad.w > in_pad.h)
+  if (in_pad.w > in_pad.h) {
-    {
+    num_grid_x = in_pad.w / stride;
-        num_grid_x = in_pad.w / stride;
+    num_grid_y = num_grid / num_grid_x;
-        num_grid_y = num_grid / num_grid_x;
+  } else {
-    }
+    num_grid_y = in_pad.h / stride;
-    else
+    num_grid_x = num_grid / num_grid_y;
-    {
+  }
        num_grid_y = in_pad.h / stride;
        num_grid_x = num_grid / num_grid_y;
    }
-    const int num_class = cls_pred.w;
+  const int num_class = cls_pred.w;
-    const int reg_max_1 = dis_pred.w / 4;
+  const int reg_max_1 = dis_pred.w / 4;
    //__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
    //__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
    for (int i = 0; i < num_grid_y; i++)
    {
        for (int j = 0; j < num_grid_x; j++)
        {
            const int idx = i * num_grid_x + j;
-            const float* scores = cls_pred.row(idx);
+  for (int i = 0; i < num_grid_y; i++) {
    for (int j = 0; j < num_grid_x; j++) {
      const int idx = i * num_grid_x + j;
-            // find label with max score
+      const float *scores = cls_pred.row(idx);
            int label = -1;
            float score = -FLT_MAX;
            for (int k = 0; k < num_class; k++)
            {
                if (scores[k] > score)
                {
                    label = k;
                    score = scores[k];
                }
            }
-            if (score >= prob_threshold)
+      // find label with max score
-            {
+      int label = -1;
-                ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
+      float score = -FLT_MAX;
-                {
+      for (int k = 0; k < num_class; k++) {
-                    ncnn::Layer* softmax = ncnn::create_layer("Softmax");
+        if (scores[k] > score) {
-
+          label = k;
-                    ncnn::ParamDict pd;
+          score = scores[k];
                    pd.set(0, 1); // axis
                    pd.set(1, 1);
                    softmax->load_param(pd);
                    ncnn::Option opt;
                    // opt.num_threads = 1;
                    opt.use_packing_layout = false;
                    softmax->create_pipeline(opt);
                    softmax->forward_inplace(bbox_pred, opt);
                    softmax->destroy_pipeline(opt);
                    delete softmax;
                }
                float pred_ltrb[4];
                for (int k = 0; k < 4; k++)
                {
                    float dis = 0.f;
                    const float* dis_after_sm = bbox_pred.row(k);
                    for (int l = 0; l < reg_max_1; l++)
                    {
                        dis += l * dis_after_sm[l];
                    }
                    pred_ltrb[k] = dis * stride;
                }
                float pb_cx = (j + 0.5f) * stride;
                float pb_cy = (i + 0.5f) * stride;
                float x0 = pb_cx - pred_ltrb[0];
                float y0 = pb_cy - pred_ltrb[1];
                float x1 = pb_cx + pred_ltrb[2];
                float y1 = pb_cy + pred_ltrb[3];
                ov::ObjectInfo obj;
                obj.rect.x = x0;
                obj.rect.y = y0;
                obj.rect.width = x1 - x0;
                obj.rect.height = y1 - y0;
                obj.label = label;
                obj.score= score;
                objects.push_back(obj);
            }
        }
-    }
+      }
 }
-int Nanodet::Detect(const unsigned char* rgbdata,
+      if (score >= prob_threshold) {
-    int img_width, int img_height, 
+        ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx));
    std::vector<ov::ObjectInfo>& rois) {
 	if (!initialized_) {
 		return 10000;
 	}
 	if (rgbdata == 0){
 		return 10001;
    }
    int w = img_width;
    int h = img_height;
    float scale = 1.f;
    if (w > h) {
        scale = (float)target_size / w;
        w = target_size;
        h = h * scale;
    } else {
        scale = (float)target_size / h;
        h = target_size;
        w = w * scale;
    }
    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
    // pad to target_size rectangle
    float wpad = 320-w;//(w + 31) / 32 * 32 - w;
    float hpad = 320-h;//(h + 31) / 32 * 32 - h;
    ncnn::Mat in_pad;
    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
    in_pad.substract_mean_normalize(mean_vals, norm_vals);
    ncnn::Extractor ex = net_->create_extractor();
    ex.set_light_mode(light_mode_);
    ex.set_num_threads(num_threads);
    ex.input("input.1", in_pad);
    std::vector<ov::ObjectInfo> proposals;
    // stride 8
    {
        ncnn::Mat cls_pred;
        ncnn::Mat dis_pred;
        ex.extract("cls_pred_stride_8", cls_pred);
        ex.extract("dis_pred_stride_8", dis_pred);
        std::vector<ov::ObjectInfo> objects8;
        generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
        proposals.insert(proposals.end(), objects8.begin(), objects8.end());
    }
    // stride 16
    {
        ncnn::Mat cls_pred;
        ncnn::Mat dis_pred;
        ex.extract("cls_pred_stride_16", cls_pred);
        ex.extract("dis_pred_stride_16", dis_pred);
        std::vector<ov::ObjectInfo> objects16;
        generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
        proposals.insert(proposals.end(), objects16.begin(), objects16.end());
    }
    // stride 32
    {
        ncnn::Mat cls_pred;
        ncnn::Mat dis_pred;
        ex.extract("cls_pred_stride_32", cls_pred);
        ex.extract("dis_pred_stride_32", dis_pred);
        std::vector<ov::ObjectInfo> objects32;
        generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
        proposals.insert(proposals.end(), objects32.begin(), objects32.end());
    }
    // sort all proposals by score from highest to lowest
    qsort_descent_inplace(proposals);
    // apply nms with nms_threshold
    std::vector<int> picked;
    nms_sorted_bboxes(proposals, picked, nms_threshold);
    int count = picked.size();
    rois.resize(count);
    for (int i = 0; i < count; i++)
    {
        ov::ObjectInfo roi = proposals[picked[i]];
        // adjust offset to original unpadded
        float x0 = (roi.rect.x - (wpad / 2)) / scale;
        float y0 = (roi.rect.y - (hpad / 2)) / scale;
        float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
        float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
        // clip
        x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
        y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
        x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
        y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
        roi.rect.x = x0;
        roi.rect.y = y0;
        roi.rect.width = x1 - x0;
        roi.rect.height = y1 - y0;
        rois[i] = roi;
    }
    // sort objects by area
    struct
    {
        bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
        {
-            return a.rect.area() > b.rect.area();
+          ncnn::Layer *softmax = ncnn::create_layer("Softmax");
          ncnn::ParamDict pd;
          pd.set(0, 1); // axis
          pd.set(1, 1);
          softmax->load_param(pd);
          ncnn::Option opt;
          opt.num_threads = 1;
          opt.use_packing_layout = false;
          softmax->create_pipeline(opt);
          softmax->forward_inplace(bbox_pred, opt);
          softmax->destroy_pipeline(opt);
          delete softmax;
        }
-    } objects_area_greater;
+
-    std::sort(rois.begin(), rois.end(), objects_area_greater);
+        float pred_ltrb[4];
-    return 0;
+        for (int k = 0; k < 4; k++) {
          float dis = 0.f;
          const float *dis_after_sm = bbox_pred.row(k);
          for (int l = 0; l < reg_max_1; l++) {
            dis += l * dis_after_sm[l];
          }
          pred_ltrb[k] = dis * stride;
        }
        float pb_cx = (j + 0.5f) * stride;
        float pb_cy = (i + 0.5f) * stride;
        float x0 = pb_cx - pred_ltrb[0];
        float y0 = pb_cy - pred_ltrb[1];
        float x1 = pb_cx + pred_ltrb[2];
        float y1 = pb_cy + pred_ltrb[3];
        ov::ObjectInfo obj;
        obj.rect.x = x0;
        obj.rect.y = y0;
        obj.rect.width = x1 - x0;
        obj.rect.height = y1 - y0;
        obj.label = label;
        obj.score = score;
        objects.push_back(obj);
      }
    }
  }
 }
 int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
                    std::vector<ov::ObjectInfo> &rois) {
  if (!initialized_) {
    return 10000;
  }
  if (rgbdata == 0) {
    return 10001;
  }
  int w = img_width;
  int h = img_height;
  float scale = 1.f;
  if (w > h) {
    scale = (float)target_size / w;
    w = target_size;
    h = h * scale;
  } else {
    scale = (float)target_size / h;
    h = target_size;
    w = w * scale;
  }
  ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
                                               img_width, img_height, w, h);
  // pad to target_size rectangle
  float wpad = 320 - w; //(w + 31) / 32 * 32 - w;
  float hpad = 320 - h; //(h + 31) / 32 * 32 - h;
  ncnn::Mat in_pad;
  ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
                         wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
  in_pad.substract_mean_normalize(mean_vals, norm_vals);
  ncnn::Extractor ex = net_->create_extractor();
  ex.set_light_mode(light_mode_);
  ex.set_num_threads(num_threads);
  ex.input("input.1", in_pad);
  std::vector<ov::ObjectInfo> proposals;
  // stride 8
  {
    ncnn::Mat cls_pred;
    ncnn::Mat dis_pred;
    ex.extract("cls_pred_stride_8", cls_pred);
    ex.extract("dis_pred_stride_8", dis_pred);
    std::vector<ov::ObjectInfo> objects8;
    generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold,
                               objects8);
    proposals.insert(proposals.end(), objects8.begin(), objects8.end());
  }
  // stride 16
  {
    ncnn::Mat cls_pred;
    ncnn::Mat dis_pred;
    ex.extract("cls_pred_stride_16", cls_pred);
    ex.extract("dis_pred_stride_16", dis_pred);
    std::vector<ov::ObjectInfo> objects16;
    generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold,
                               objects16);
    proposals.insert(proposals.end(), objects16.begin(), objects16.end());
  }
  // stride 32
  {
    ncnn::Mat cls_pred;
    ncnn::Mat dis_pred;
    ex.extract("cls_pred_stride_32", cls_pred);
    ex.extract("dis_pred_stride_32", dis_pred);
    std::vector<ov::ObjectInfo> objects32;
    generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold,
                               objects32);
    proposals.insert(proposals.end(), objects32.begin(), objects32.end());
  }
  // sort all proposals by score from highest to lowest
  qsort_descent_inplace(proposals);
  // apply nms with nms_threshold
  std::vector<int> picked;
  nms_sorted_bboxes(proposals, picked, nms_threshold);
  int count = picked.size();
  rois.resize(count);
  for (int i = 0; i < count; i++) {
    ov::ObjectInfo roi = proposals[picked[i]];
    // adjust offset to original unpadded
    float x0 = (roi.rect.x - (wpad / 2)) / scale;
    float y0 = (roi.rect.y - (hpad / 2)) / scale;
    float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
    float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
    // clip
    x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
    y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
    x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
    y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
    roi.rect.x = x0;
    roi.rect.y = y0;
    roi.rect.width = x1 - x0;
    roi.rect.height = y1 - y0;
    rois[i] = roi;
  }
  // sort objects by area
  struct {
    bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const {
      return a.rect.area() > b.rect.area();
    }
  } objects_area_greater;
  std::sort(rois.begin(), rois.end(), objects_area_greater);
  return 0;
 }
 } // namespace ovhand
--- a/src/hand/pose3d.h
+++ b/src/hand/pose3d.h
@@ -0,0 +1,37 @@
 #ifndef _HAND_POSE3D_C_H_
 #define _HAND_POSE3D_C_H_
 #include "../common/common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 typedef struct PalmObject {
  float score;
  float rotation;
  Point2fVector *rect;
  Point2fVector *landmarks;
  Point2fVector *skeleton;
  Point3dVector *skeleton3d;
 } PalmObject;
 typedef struct PalmObjectVector {
  PalmObject *items;
  int length;
 } PalmObjectVector;
 void FreePalmObject(PalmObject *obj);
 void FreePalmObjectVector(PalmObjectVector *vec);
 typedef void *IHandPose3DEstimator;
 IHandPose3DEstimator new_mediapipe_hand();
 void destroy_mediapipe_hand(IHandPose3DEstimator d);
 int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
                              const char *hand_path);
 int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
                          int img_width, int img_height, PalmObjectVector *vec);
 #ifdef __cplusplus
 }
 #endif
 #endif // !_HAND_POSE3D_C_H_
--- a/src/hand/pose3d/estimator.cpp
+++ b/src/hand/pose3d/estimator.cpp
@@ -0,0 +1,106 @@
 #include "../pose3d.h"
 #include "mediapipe/mediapipe.hpp"
 #include <iostream>
 void FreePalmObject(PalmObject *obj) {
  if (obj->rect != NULL) {
    FreePoint2fVector(obj->rect);
    obj->rect = NULL;
  }
  if (obj->skeleton != NULL) {
    FreePoint2fVector(obj->skeleton);
    obj->skeleton = NULL;
  }
  if (obj->skeleton3d != NULL) {
    FreePoint3dVector(obj->skeleton3d);
    obj->skeleton3d = NULL;
  }
  if (obj->landmarks != NULL) {
    FreePoint2fVector(obj->landmarks);
    obj->landmarks = NULL;
  }
 }
 void FreePalmObjectVector(PalmObjectVector *vec) {
  if (vec->items != NULL) {
    for (int i = 0; i < vec->length; i++) {
      FreePalmObject(&vec->items[i]);
    }
    free(vec->items);
    vec->items = NULL;
  }
 }
 IHandPose3DEstimator new_mediapipe_hand() {
  return new ovhand3d::MediapipeHand();
 }
 void destroy_mediapipe_hand(IHandPose3DEstimator d) {
  delete static_cast<ovhand3d::MediapipeHand *>(d);
 }
 int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
                              const char *hand_path) {
  return static_cast<ovhand3d::MediapipeHand *>(d)->LoadModel(palm_path,
                                                              hand_path);
 }
 int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
                          int img_width, int img_height,
                          PalmObjectVector *objects) {
  std::vector<ovhand3d::PalmObject> objs;
  int ret = static_cast<ovhand3d::MediapipeHand *>(d)->Detect(
      rgbdata, img_width, img_height, objs);
  if (ret != 0) {
    return ret;
  }
  const size_t total_objs = objs.size();
  objects->length = total_objs;
  if (total_objs == 0) {
    objects->items = NULL;
    return 0;
  }
  objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject));
  for (size_t i = 0; i < total_objs; ++i) {
    objects->items[i].score = objs[i].score;
    objects->items[i].rotation = objs[i].rotation;
    objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector));
    objects->items[i].rect->length = 4;
    objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f));
    for (size_t j = 0; j < 4; ++j) {
      objects->items[i].rect->points[j] = objs[i].hand_pos[j];
    }
    objects->items[i].landmarks =
        (Point2fVector *)malloc(sizeof(Point2fVector));
    objects->items[i].landmarks->length = 7;
    objects->items[i].landmarks->points =
        (Point2f *)malloc(4 * sizeof(Point2f));
    for (size_t j = 0; j < 7; ++j) {
      objects->items[i].landmarks->points[j] = objs[i].landmarks[j];
    }
    const size_t total_skeleton = objs[i].skeleton.size();
    if (total_skeleton == 0) {
      objects->items[i].skeleton = NULL;
      objects->items[i].skeleton3d = NULL;
      continue;
    }
    objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector));
    objects->items[i].skeleton->length = total_skeleton;
    objects->items[i].skeleton->points =
        (Point2f *)malloc(total_skeleton * sizeof(Point2f));
    objects->items[i].skeleton3d =
        (Point3dVector *)malloc(sizeof(Point3dVector));
    objects->items[i].skeleton3d->length = total_skeleton;
    objects->items[i].skeleton3d->points =
        (Point3d *)malloc(total_skeleton * sizeof(Point3d));
    for (size_t j = 0; j < total_skeleton; ++j) {
      objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x;
      objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y;
      objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x;
      objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y;
      objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z;
    }
  }
  return 0;
 }
--- a/src/hand/pose3d/mediapipe/mediapipe.cpp
+++ b/src/hand/pose3d/mediapipe/mediapipe.cpp
@@ -0,0 +1,534 @@
 #include "mediapipe.hpp"
 #include "mat.h"
 #include <math.h>
 namespace ovhand3d {
 static float calculate_scale(float min_scale, float max_scale, int stride_index,
                             int num_strides) {
  if (num_strides == 1)
    return (min_scale + max_scale) * 0.5f;
  else
    return min_scale +
           (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
 }
 static void generate_anchors(std::vector<Anchor> &anchors,
                             const AnchorsParams &anchor_params) {
  int layer_id = 0;
  for (int layer_id = 0; layer_id < anchor_params.strides.size();) {
    std::vector<float> anchor_height;
    std::vector<float> anchor_width;
    std::vector<float> aspect_ratios;
    std::vector<float> scales;
    int last_same_stride_layer = layer_id;
    while (last_same_stride_layer < (int)anchor_params.strides.size() &&
           anchor_params.strides[last_same_stride_layer] ==
               anchor_params.strides[layer_id]) {
      const float scale =
          calculate_scale(anchor_params.min_scale, anchor_params.max_scale,
                          last_same_stride_layer, anchor_params.strides.size());
      {
        for (int aspect_ratio_id = 0;
             aspect_ratio_id < (int)anchor_params.aspect_ratios.size();
             aspect_ratio_id++) {
          aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]);
          scales.push_back(scale);
        }
        const float scale_next =
            last_same_stride_layer == (int)anchor_params.strides.size() - 1
                ? 1.0f
                : calculate_scale(
                      anchor_params.min_scale, anchor_params.max_scale,
                      last_same_stride_layer + 1, anchor_params.strides.size());
        scales.push_back(sqrt(scale * scale_next));
        aspect_ratios.push_back(1.0);
      }
      last_same_stride_layer++;
    }
    for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
      const float ratio_sqrts = sqrt(aspect_ratios[i]);
      anchor_height.push_back(scales[i] / ratio_sqrts);
      anchor_width.push_back(scales[i] * ratio_sqrts);
    }
    int feature_map_height = 0;
    int feature_map_width = 0;
    const int stride = anchor_params.strides[layer_id];
    feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride);
    feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride);
    for (int y = 0; y < feature_map_height; ++y) {
      for (int x = 0; x < feature_map_width; ++x) {
        for (int anchor_id = 0; anchor_id < (int)anchor_height.size();
             ++anchor_id) {
          const float x_center =
              (x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width;
          const float y_center =
              (y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height;
          Anchor new_anchor;
          new_anchor.x_center = x_center;
          new_anchor.y_center = y_center;
          new_anchor.w = 1.0f;
          new_anchor.h = 1.0f;
          anchors.push_back(new_anchor);
        }
      }
    }
    layer_id = last_same_stride_layer;
  }
 }
 static void create_ssd_anchors(int input_w, int input_h,
                               std::vector<Anchor> &anchors) {
  AnchorsParams anchor_options;
  anchor_options.num_layers = 4;
  anchor_options.min_scale = 0.1484375;
  anchor_options.max_scale = 0.75;
  anchor_options.input_size_height = 192;
  anchor_options.input_size_width = 192;
  anchor_options.anchor_offset_x = 0.5f;
  anchor_options.anchor_offset_y = 0.5f;
  anchor_options.strides.push_back(8);
  anchor_options.strides.push_back(16);
  anchor_options.strides.push_back(16);
  anchor_options.strides.push_back(16);
  anchor_options.aspect_ratios.push_back(1.0);
  generate_anchors(anchors, anchor_options);
 }
 static int decode_bounds(std::list<DetectRegion> &region_list,
                         float score_thresh, int input_img_w, int input_img_h,
                         float *scores_ptr, float *bboxes_ptr,
                         std::vector<Anchor> &anchors) {
  DetectRegion region;
  int i = 0;
  for (auto &anchor : anchors) {
    float score = ov::sigmoid(scores_ptr[i]);
    if (score > score_thresh) {
      float *p = bboxes_ptr + (i * 18);
      float cx = p[0] / input_img_w + anchor.x_center;
      float cy = p[1] / input_img_h + anchor.y_center;
      float w = p[2] / input_img_w;
      float h = p[3] / input_img_h;
      ov::Point2f topleft, btmright;
      topleft.x = cx - w * 0.5f;
      topleft.y = cy - h * 0.5f;
      btmright.x = cx + w * 0.5f;
      btmright.y = cy + h * 0.5f;
      region.score = score;
      region.topleft = topleft;
      region.btmright = btmright;
      for (int j = 0; j < 7; j++) {
        float lx = p[4 + (2 * j) + 0];
        float ly = p[4 + (2 * j) + 1];
        lx += anchor.x_center * input_img_w;
        ly += anchor.y_center * input_img_h;
        lx /= (float)input_img_w;
        ly /= (float)input_img_h;
        region.landmarks[j].x = lx;
        region.landmarks[j].y = ly;
      }
      region_list.push_back(region);
    }
    i++;
  }
  return 0;
 }
 static float calc_intersection_over_union(DetectRegion &region0,
                                          DetectRegion &region1) {
  float sx0 = region0.topleft.x;
  float sy0 = region0.topleft.y;
  float ex0 = region0.btmright.x;
  float ey0 = region0.btmright.y;
  float sx1 = region1.topleft.x;
  float sy1 = region1.topleft.y;
  float ex1 = region1.btmright.x;
  float ey1 = region1.btmright.y;
  float xmin0 = std::min(sx0, ex0);
  float ymin0 = std::min(sy0, ey0);
  float xmax0 = std::max(sx0, ex0);
  float ymax0 = std::max(sy0, ey0);
  float xmin1 = std::min(sx1, ex1);
  float ymin1 = std::min(sy1, ey1);
  float xmax1 = std::max(sx1, ex1);
  float ymax1 = std::max(sy1, ey1);
  float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
  float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
  if (area0 <= 0 || area1 <= 0)
    return 0.0f;
  float intersect_xmin = std::max(xmin0, xmin1);
  float intersect_ymin = std::max(ymin0, ymin1);
  float intersect_xmax = std::min(xmax0, xmax1);
  float intersect_ymax = std::min(ymax0, ymax1);
  float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
                         std::max(intersect_xmax - intersect_xmin, 0.0f);
  return intersect_area / (area0 + area1 - intersect_area);
 }
 static int non_max_suppression(std::list<DetectRegion> &region_list,
                               std::list<DetectRegion> &region_nms_list,
                               float iou_thresh) {
  region_list.sort([](DetectRegion &v1, DetectRegion &v2) {
    return v1.score > v2.score ? true : false;
  });
  for (auto itr = region_list.begin(); itr != region_list.end(); itr++) {
    DetectRegion region_candidate = *itr;
    int ignore_candidate = false;
    for (auto itr_nms = region_nms_list.rbegin();
         itr_nms != region_nms_list.rend(); itr_nms++) {
      DetectRegion region_nms = *itr_nms;
      float iou = calc_intersection_over_union(region_candidate, region_nms);
      if (iou >= iou_thresh) {
        ignore_candidate = true;
        break;
      }
    }
    if (!ignore_candidate) {
      region_nms_list.push_back(region_candidate);
      if (region_nms_list.size() >= 5)
        break;
    }
  }
  return 0;
 }
 static float normalize_radians(float angle) {
  return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
 }
 static void compute_rotation(DetectRegion &region) {
  float x0 = region.landmarks[0].x;
  float y0 = region.landmarks[0].y;
  float x1 = region.landmarks[2].x;
  float y1 = region.landmarks[2].y;
  float target_angle = M_PI * 0.5f;
  float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
  region.rotation = normalize_radians(rotation);
 }
 void rot_vec(ov::Point2f &vec, float rotation) {
  float sx = vec.x;
  float sy = vec.y;
  vec.x = sx * cos(rotation) - sy * sin(rotation);
  vec.y = sx * sin(rotation) + sy * cos(rotation);
 }
 void compute_detect_to_roi(DetectRegion &region, const int &target_size,
                           PalmObject &palm) {
  float width = region.btmright.x - region.topleft.x;
  float height = region.btmright.y - region.topleft.y;
  float palm_cx = region.topleft.x + width * 0.5f;
  float palm_cy = region.topleft.y + height * 0.5f;
  float hand_cx;
  float hand_cy;
  float rotation = region.rotation;
  float shift_x = 0.0f;
  float shift_y = -0.5f;
  if (rotation == 0.0f) {
    hand_cx = palm_cx + (width * shift_x);
    hand_cy = palm_cy + (height * shift_y);
  } else {
    float dx =
        (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
    float dy =
        (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
    hand_cx = palm_cx + dx;
    hand_cy = palm_cy + dy;
  }
  float long_side = std::max(width, height);
  width = long_side;
  height = long_side;
  float hand_w = width * 2.6f;
  float hand_h = height * 2.6f;
  palm.hand_cx = hand_cx;
  palm.hand_cy = hand_cy;
  palm.hand_w = hand_w;
  palm.hand_h = hand_h;
  float dx = hand_w * 0.5f;
  float dy = hand_h * 0.5f;
  palm.hand_pos[0].x = -dx;
  palm.hand_pos[0].y = -dy;
  palm.hand_pos[1].x = +dx;
  palm.hand_pos[1].y = -dy;
  palm.hand_pos[2].x = +dx;
  palm.hand_pos[2].y = +dy;
  palm.hand_pos[3].x = -dx;
  palm.hand_pos[3].y = +dy;
  for (int i = 0; i < 4; i++) {
    rot_vec(palm.hand_pos[i], rotation);
    palm.hand_pos[i].x += hand_cx;
    palm.hand_pos[i].y += hand_cy;
  }
  for (int i = 0; i < 7; i++) {
    palm.landmarks[i] = region.landmarks[i];
  }
  palm.score = region.score;
 }
 static void pack_detect_result(std::vector<DetectRegion> &detect_results,
                               std::list<DetectRegion> &region_list,
                               const int &target_size,
                               std::vector<PalmObject> &palmlist) {
  for (auto &region : region_list) {
    compute_rotation(region);
    PalmObject palm;
    compute_detect_to_roi(region, target_size, palm);
    palmlist.push_back(palm);
    detect_results.push_back(region);
  }
 }
 MediapipeHand::MediapipeHand() : ov::EstimatorBase() {
  palm_blob_allocator_.set_size_compare_ratio(0.f);
  palm_workspace_allocator_.set_size_compare_ratio(0.f);
  hand_blob_allocator_.set_size_compare_ratio(0.f);
  hand_workspace_allocator_.set_size_compare_ratio(0.f);
  palm_net_ = new ncnn::Net();
  hand_net_ = new ncnn::Net();
  initialized_ = false;
  if (num_threads > 0) {
    palm_net_->opt.num_threads = num_threads;
    hand_net_->opt.num_threads = num_threads;
  }
  palm_net_->opt.blob_allocator = &palm_blob_allocator_;
  palm_net_->opt.workspace_allocator = &palm_workspace_allocator_;
  palm_net_->opt.lightmode = light_mode_;
  hand_net_->opt.blob_allocator = &hand_blob_allocator_;
  hand_net_->opt.workspace_allocator = &hand_workspace_allocator_;
  hand_net_->opt.lightmode = light_mode_;
 #ifdef OV_VULKAN
  palm_net_->opt.use_vulkan_compute = true;
  hand_net_->opt.use_vulkan_compute = true;
 #endif // OV_VULKAN
 }
 MediapipeHand::~MediapipeHand() {
  if (palm_net_) {
    palm_net_->clear();
  }
  if (hand_net_) {
    hand_net_->clear();
  }
  palm_workspace_allocator_.clear();
  palm_blob_allocator_.clear();
  hand_workspace_allocator_.clear();
  hand_blob_allocator_.clear();
 }
 void MediapipeHand::set_num_threads(int n) {
  EstimatorBase::set_num_threads(n);
  if (palm_net_) {
    palm_net_->opt.num_threads = n;
  }
  if (hand_net_) {
    hand_net_->opt.num_threads = n;
  }
 }
 void MediapipeHand::set_light_mode(bool mode) {
  if (palm_net_) {
    palm_net_->opt.lightmode = mode;
  }
  if (hand_net_) {
    hand_net_->opt.lightmode = mode;
  }
  light_mode_ = mode;
 }
 int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) {
  std::string palm_param_file = std::string(palm_path) + "/param";
  std::string palm_bin_file = std::string(palm_path) + "/bin";
  std::string hand_param_file = std::string(hand_path) + "/param";
  std::string hand_bin_file = std::string(hand_path) + "/bin";
  if (palm_net_->load_param(palm_param_file.c_str()) == -1 ||
      palm_net_->load_model(palm_bin_file.c_str()) == -1) {
    return 10000;
  }
  if (hand_net_->load_param(hand_param_file.c_str()) == -1 ||
      hand_net_->load_model(hand_bin_file.c_str()) == -1) {
    return 10000;
  }
  initialized_ = true;
  anchors.clear();
  create_ssd_anchors(target_size, target_size, anchors);
  return 0;
 }
 int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width,
                          int img_height, std::vector<PalmObject> &objects) {
  if (!initialized_) {
    return 10000;
  }
  if (rgbdata == 0) {
    return 10001;
  }
  int w = img_width;
  int h = img_height;
  float scale = 1.f;
  if (w > h) {
    scale = (float)target_size / w;
    w = target_size;
    h = h * scale;
  } else {
    scale = (float)target_size / h;
    h = target_size;
    w = w * scale;
  }
  ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
                                               img_width, img_height, w, h);
  int wpad = target_size - w;
  int hpad = target_size - h;
  ncnn::Mat in_pad;
  ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
                         wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
  in_pad.substract_mean_normalize(0, norm_vals);
  ncnn::Extractor ex = palm_net_->create_extractor();
  ncnn::Mat cls, reg;
  ex.input("input", in_pad);
  ex.extract("cls", cls);
  ex.extract("reg", reg);
  float *scores = (float *)cls.data;
  float *bboxes = (float *)reg.data;
  std::list<DetectRegion> region_list, region_nms_list;
  std::vector<DetectRegion> detect_results;
  decode_bounds(region_list, prob_threshold, target_size, target_size, scores,
                bboxes, anchors);
  non_max_suppression(region_list, region_nms_list, nms_threshold);
  objects.clear();
  pack_detect_result(detect_results, region_nms_list, target_size, objects);
  for (int i = 0; i < objects.size(); i++) {
    objects[i].hand_pos[0].x =
        (objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale;
    objects[i].hand_pos[0].y =
        (objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale;
    objects[i].hand_pos[1].x =
        (objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale;
    objects[i].hand_pos[1].y =
        (objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale;
    objects[i].hand_pos[2].x =
        (objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale;
    objects[i].hand_pos[2].y =
        (objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale;
    objects[i].hand_pos[3].x =
        (objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale;
    objects[i].hand_pos[3].y =
        (objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale;
    for (int j = 0; j < 7; j++) {
      objects[i].landmarks[j].x =
          (objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale;
      objects[i].landmarks[j].y =
          (objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale;
    }
    const float srcPts[8] = {
        objects[i].hand_pos[0].x, objects[i].hand_pos[0].y,
        objects[i].hand_pos[1].x, objects[i].hand_pos[1].y,
        objects[i].hand_pos[2].x, objects[i].hand_pos[2].y,
        objects[i].hand_pos[3].x, objects[i].hand_pos[3].y,
    };
    const float dstPts[8] = {
        0, 0, 224, 0, 224, 224, 0, 224,
    };
    float tm[6];
    unsigned char *trans_mat =
        (unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char));
    ncnn::get_affine_transform(dstPts, srcPts, 4, tm);
    ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224,
                                 224, tm);
    ncnn::Mat trans_image =
        ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224);
    float score = GetLandmarks(trans_image, tm, objects[i].skeleton,
                               objects[i].skeleton3d);
    free(trans_mat);
  }
  return 0;
 }
 float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6],
                                  std::vector<ov::Point2f> &skeleton,
                                  std::vector<ov::Point3d> &skeleton3d) {
  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
  in.substract_mean_normalize(NULL, norm_vals);
  ncnn::Mat points, score;
  {
    ncnn::Extractor ex = hand_net_->create_extractor();
    ex.input("input", in);
    ex.extract("points", points);
    ex.extract("score", score);
  }
  float *points_data = (float *)points.data;
  float *score_data = (float *)score.data;
  for (int i = 0; i < 21; i++) {
    ov::Point3d pt3d;
    pt3d.x = points_data[i * 3];
    pt3d.y = points_data[i * 3 + 1];
    pt3d.z = points_data[i * 3 + 2];
    ov::Point2f pt;
    pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2];
    pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5];
    skeleton.push_back(pt);
    pt3d.x /= 224.f;
    pt3d.y /= 224.f;
    skeleton3d.push_back(pt3d);
  }
  return score_data[0];
 }
 } // namespace ovhand3d
--- a/src/hand/pose3d/mediapipe/mediapipe.hpp
+++ b/src/hand/pose3d/mediapipe/mediapipe.hpp
@@ -0,0 +1,87 @@
 #ifndef _HAND_POSE3D_MEDIAPIPE_H_
 #define _HAND_POSE3D_MEDIAPIPE_H_
 #include "../../../common/common.hpp"
 #include <net.h>
 namespace ovhand3d {
 struct PalmObject {
  float score;
  ov::Point2f landmarks[7];
  float rotation;
  float hand_cx;
  float hand_cy;
  float hand_w;
  float hand_h;
  ov::Point2f hand_pos[4];
  std::vector<ov::Point2f> skeleton;
  std::vector<ov::Point3d> skeleton3d;
 };
 struct DetectRegion {
  float score;
  ov::Point2f topleft;
  ov::Point2f btmright;
  ov::Point2f landmarks[7];
  float rotation;
  ov::Point2f roi_center;
  ov::Point2f roi_size;
  ov::Point2f roi_coord[4];
 };
 struct Anchor {
  float x_center, y_center, w, h;
 };
 struct AnchorsParams {
  int input_size_width;
  int input_size_height;
  float min_scale;
  float max_scale;
  float anchor_offset_x;
  float anchor_offset_y;
  int num_layers;
  std::vector<int> feature_map_width;
  std::vector<int> feature_map_height;
  std::vector<int> strides;
  std::vector<float> aspect_ratios;
 };
 class MediapipeHand : public ov::EstimatorBase {
 public:
  MediapipeHand();
  ~MediapipeHand();
  int LoadModel(const char *palm_model, const char *hand_model);
  int Detect(const unsigned char *rgbdata, int img_width, int img_heidht,
             std::vector<PalmObject> &objects);
  float GetLandmarks(ncnn::Mat in, float tm[6],
                     std::vector<ov::Point2f> &skeleton,
                     std::vector<ov::Point3d> &skeleton3d);
  void set_light_mode(bool mode);
  void set_num_threads(int n);
 private:
  ncnn::Net *palm_net_ = NULL;
  ncnn::Net *hand_net_ = NULL;
  ncnn::PoolAllocator palm_workspace_allocator_;
  ncnn::UnlockedPoolAllocator palm_blob_allocator_;
  ncnn::PoolAllocator hand_workspace_allocator_;
  ncnn::UnlockedPoolAllocator hand_blob_allocator_;
  bool initialized_ = false;
  bool light_mode_ = true;
  std::vector<Anchor> anchors;
  float prob_threshold = 0.55f;
  float nms_threshold = 0.3f;
  const int target_size = 192;
  const float mean_vals[3] = {0.f, 0.f, 0.f};
  const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
 };
 } // namespace ovhand3d
 #endif // !_HAND_POSE3D_MEDIAPIPE_H_
--- a/src/pose/estimator/pptinypose/pptinypose.bak
+++ b/src/pose/estimator/pptinypose/pptinypose.bak
@@ -0,0 +1,161 @@
 #include "pptinypose.hpp"
 #include <string>
 #ifdef OV_VULKAN
 #include "gpu.h"
 #endif // OV_VULKAN
 namespace ovpose {
 static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob,
                  std::vector<float> &prob) {
  int size = bottom_blob.total();
  const float *ptr = bottom_blob;
  std::vector<std::pair<float, int>> vec;
  vec.resize(size);
  for (int i = 0; i < size; i++) {
    vec[i] = std::make_pair(ptr[i], i);
  }
  top_blob.create(bottom_blob.c, 1, 1, 4u);
  float *outptr = top_blob;
  for (size_t i = 0; i < bottom_blob.c; i++) {
    int size0 = bottom_blob.channel(i).total();
    std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1),
                      vec.begin() + size0 * (i + 1),
                      std::greater<std::pair<float, int>>());
    outptr[i] = vec[size0 * i].second - size0 * i;
    prob.push_back(vec[size0 * i].first);
  }
  return 0;
 }
 static void dark_parse(const ncnn::Mat &heatmap, std::vector<int> &dim,
                       std::vector<float> &coords, int px, int py, int ch) {
  /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
  Representation for Human Pose Estimation (CVPR 2020).
  1) offset = - hassian.inv() * derivative
  2) dx = (heatmap[x+1] - heatmap[x-1])/2.
  3) dxx = (dx[x+1] - dx[x-1])/2.
  4) derivative = Mat([dx, dy])
  5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
  */
  float *heatmap_data = (float *)heatmap.channel(ch).data;
  std::vector<float> heatmap_ch;
  heatmap_ch.insert(heatmap_ch.begin(), heatmap_data,
                    heatmap_data + heatmap.channel(ch).total());
  cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
  heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
  cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
  heatmap_mat = heatmap_mat.reshape(1, 1);
  heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
  ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]);
  heatmap_mat = heatmap_mat.reshape(1);
  heatmap_ch = (float *)heatmap_mat.data;
  float epsilon = 1e-10;
  // sample heatmap to get values in around target location
  float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
  float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
  float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
  float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
  float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
  float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
  float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
  float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
  float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
  float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
  float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
  float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
  float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
  // compute dx/dy and dxx/dyy with sampled values
  float dx = 0.5 * (xr - xl);
  float dy = 0.5 * (yu - yd);
  float dxx = 0.25 * (xr2 - 2 * xy + xl2);
  float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
  float dyy = 0.25 * (yu2 - 2 * xy + yd2);
  // finally get offset by derivative and hassian, which combined by dx/dy and
  // dxx/dyy
  if (dxx * dyy - dxy * dxy != 0) {
    float M[2][2] = {dxx, dxy, dxy, dyy};
    float D[2] = {dx, dy};
    cv::Mat hassian(2, 2, CV_32F, M);
    cv::Mat derivative(2, 1, CV_32F, D);
    cv::Mat offset = -hassian.inv() * derivative;
    coords[ch * 2] += offset.at<float>(0, 0);
    coords[ch * 2 + 1] += offset.at<float>(1, 0);
  }
 }
 static std::vector<float> get_final_preds(const ncnn::Mat &heatmap,
                                          const ncnn::Mat &argmax_out) {
  std::vector<float> coords((size_t)heatmap.c * 2);
  for (int i = 0; i < heatmap.c; i++) {
    int idx = argmax_out[i];
    coords[i * 2] = idx % heatmap.w;
    coords[i * 2 + 1] = (float)idx / heatmap.w;
    int px = int(coords[i * 2] + 0.5);
    int py = int(coords[i * 2 + 1] + 0.5);
    std::vector<int> dim({1, heatmap.c, heatmap.h, heatmap.w});
    dark_parse(heatmap, dim, coords, px, py, i);
  }
  return coords;
 }
 PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() {
  if (target_size == 128) {
    target_width_ = 96;
    target_height_ = 128;
  } else {
    target_width_ = 196;
    target_height_ = 256;
  }
 }
 int PPTinyPoseEstimator::ExtractKeypoints(
    const unsigned char *rgbdata, int img_width, int img_height,
    const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
  if (!initialized_) {
    return 10000;
  }
  if (rgbdata == 0) {
    return 10001;
  }
  keypoints->clear();
  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
      rect.width, rect.height, target_width_, target_height_);
  in.substract_mean_normalize(meanVals, normVals);
  ncnn::Extractor ex = net_->create_extractor();
  ex.set_light_mode(light_mode_);
  ex.set_num_threads(num_threads);
  ex.input("image", in);
  ncnn::Mat out;
  ex.extract("save_infer_model/scale_0.tmp_1", out);
  ncnn::Mat argmax_out;
  std::vector<float> probs;
  argmax(out, argmax_out, probs);
  std::vector<float> coords = get_final_preds(out, argmax_out);
  for (int i = 0; i < coords.size() / 2; i++) {
    ov::KeyPoint keypoint;
    keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x,
                           coords[i * 2 + 1] * rect.h / (float)out.h + rect.y);
    keypoint.score = probs[i];
    keypoints->push_back(keypoint);
  }
  return 0;
 }
 } // namespace ovpose
--- a/src/pose/estimator/pptinypose/pptinypose.hpp
+++ b/src/pose/estimator/pptinypose/pptinypose.hpp
@@ -0,0 +1,25 @@
 #ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_
 #define _POSE_PPTINYPOSE_ESTIMATOR_H_
 #include "../estimator.hpp"
 #include "net.h"
 #include <vector>
 namespace ovpose {
 class PPTinyPoseEstimator : public Estimator {
 public:
  PPTinyPoseEstimator(int target_size);
  int ExtractKeypoints(const unsigned char *rgbdata, int img_width,
                       int img_height, const ov::Rect &rect,
                       std::vector<ov::Keypoint> *keypoints);
 private:
  int target_width_ = 96;
  int target_height_ = 128;
  const float meanVals[3] = {123.675f, 116.28f, 103.53f};
  const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f};
 };
 } // namespace ovpose
 #endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_
		`@@ -0,0 +1,2 @@`
							`// Package pose hand 3d pose estimator`
							`package pose3d`