From 7eab96fa85a1f045250e84ef7ecb1b64ec2159ef Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Mon, 13 Dec 2021 16:39:50 +0800 Subject: [PATCH] feat(hand): add mediapipe hand 3d pose detecter --- .gitignore | 2 + README.md | 2 + go/common/geometry.go | 52 ++ go/common/objectinfo.go | 2 + go/common/palmobject.go | 56 ++ go/examples/hand/main.go | 65 ++- go/face/tracker/cgo.go | 2 +- go/hand/drawer/const.go | 63 +-- go/hand/drawer/drawer.go | 133 ++++- go/hand/drawer/option.go | 18 + go/hand/pose3d/cgo.go | 11 + go/hand/pose3d/cgo_vulkan.go | 11 + go/hand/pose3d/doc.go | 2 + go/hand/pose3d/mediapipe.go | 62 +++ src/CMakeLists.txt | 2 + src/common/common.cpp | 7 + src/common/common.h | 97 ++-- src/common/common.hpp | 7 + src/hand/detecter/nanodet/nanodet.cpp | 418 +++++++-------- src/hand/pose3d.h | 37 ++ src/hand/pose3d/estimator.cpp | 106 ++++ src/hand/pose3d/mediapipe/mediapipe.cpp | 534 +++++++++++++++++++ src/hand/pose3d/mediapipe/mediapipe.hpp | 87 +++ src/pose/estimator/pptinypose/pptinypose.bak | 161 ++++++ src/pose/estimator/pptinypose/pptinypose.hpp | 25 + 25 files changed, 1628 insertions(+), 334 deletions(-) create mode 100644 go/common/palmobject.go create mode 100644 go/hand/pose3d/cgo.go create mode 100644 go/hand/pose3d/cgo_vulkan.go create mode 100644 go/hand/pose3d/doc.go create mode 100644 go/hand/pose3d/mediapipe.go create mode 100644 src/hand/pose3d.h create mode 100644 src/hand/pose3d/estimator.cpp create mode 100644 src/hand/pose3d/mediapipe/mediapipe.cpp create mode 100644 src/hand/pose3d/mediapipe/mediapipe.hpp create mode 100644 src/pose/estimator/pptinypose/pptinypose.bak create mode 100644 src/pose/estimator/pptinypose/pptinypose.hpp diff --git a/.gitignore b/.gitignore index 049c503..5ad6ac4 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,5 @@ _testmain.go test .vim dist/ + +libtorch/ diff --git a/README.md b/README.md index 7bdd285..8f7c9bc 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing) - pose (for hand pose estimation) - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing) + - pose3d (for 3d handpose detection) + - mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing) - styletransfer - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing) - tracker diff --git a/go/common/geometry.go b/go/common/geometry.go index c803e4a..bcdf88c 100644 --- a/go/common/geometry.go +++ b/go/common/geometry.go @@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector { // GoPoint2fVector convert C.Point2fVector to []Point func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point { + if cVector == nil { + return nil + } l := int(cVector.length) ret := make([]Point, 0, l) ptr := unsafe.Pointer(cVector.points) @@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) { C.FreePoint2fVector(c) C.free(unsafe.Pointer(c)) } + +// Point3d represents a 3dPoint +type Point3d struct { + X float64 + Y float64 + Z float64 +} + +// Pt3d returns a New Point3d +func Pt3d(x, y, z float64) Point3d { + return Point3d{x, y, z} +} + +var ZP3d = Point3d{} + +// GoPoint3d conver C.Point3d to Point3d +func GoPoint3d(c *C.Point3d) Point3d { + return Pt3d( + float64(c.x), + float64(c.y), + float64(c.z), + ) +} + +// NewCPoint3dVector retruns C.Point3dVector pointer +func NewCPoint3dVector() *C.Point3dVector { + return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d)) +} + +// GoPoint3dVector convert C.Point3dVector to []Point3d +func GoPoint3dVector(cVector *C.Point3dVector) []Point3d { + if cVector == nil { + return nil + } + l := int(cVector.length) + ret := make([]Point3d, 0, l) + ptr := unsafe.Pointer(cVector.points) + for i := 0; i < l; i++ { + cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i)))) + ret = append(ret, GoPoint3d(cPoint3d)) + } + return ret +} + +// FreeCPoint3dVector release C.Point3dVector memory +func FreeCPoint3dVector(c *C.Point3dVector) { + C.FreePoint3dVector(c) + C.free(unsafe.Pointer(c)) +} diff --git a/go/common/objectinfo.go b/go/common/objectinfo.go index 67ae10a..9d32019 100644 --- a/go/common/objectinfo.go +++ b/go/common/objectinfo.go @@ -20,6 +20,8 @@ type ObjectInfo struct { Rect Rectangle // Points keypoints Keypoints []Keypoint + // Name + Name string } // GoObjectInfo convert C.ObjectInfo to go type diff --git a/go/common/palmobject.go b/go/common/palmobject.go new file mode 100644 index 0000000..c6c1e07 --- /dev/null +++ b/go/common/palmobject.go @@ -0,0 +1,56 @@ +package common + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/hand/pose3d.h" +*/ +import "C" +import ( + "unsafe" +) + +// PalmObject +type PalmObject struct { + Score float64 + Rotation float64 + Rect []Point + Landmarks []Point + Skeleton []Point + Skeleton3d []Point3d +} + +// NewCPalmObjectVector returns *C.PalmObjectVector +func NewCPalmObjectVector() *C.PalmObjectVector { + return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector)) +} + +// FreeCPalmObjectVector release *C.PalmObjectVector memory +func FreeCPalmObjectVector(p *C.PalmObjectVector) { + C.FreePalmObjectVector(p) + C.free(unsafe.Pointer(p)) +} + +// GoPalmObject convert C.PalmObject to Go type +func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject { + return PalmObject{ + Score: float64(cObj.score), + Rotation: float64(cObj.rotation), + Rect: GoPoint2fVector(cObj.rect, w, h), + Landmarks: GoPoint2fVector(cObj.landmarks, w, h), + Skeleton: GoPoint2fVector(cObj.skeleton, w, h), + Skeleton3d: GoPoint3dVector(cObj.skeleton3d), + } +} + +func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject { + l := int(c.length) + ret := make([]PalmObject, 0, l) + ptr := unsafe.Pointer(c.items) + for i := 0; i < l; i++ { + cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i)))) + ret = append(ret, GoPalmObject(cObj, w, h)) + } + return ret +} diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 0461264..836b9df 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -15,6 +15,7 @@ import ( "github.com/bububa/openvision/go/hand/detecter" handdrawer "github.com/bububa/openvision/go/hand/drawer" "github.com/bububa/openvision/go/hand/pose" + "github.com/bububa/openvision/go/hand/pose3d" ) func main() { @@ -27,17 +28,19 @@ func main() { cpuCores := common.GetBigCPUCount() common.SetOMPThreads(cpuCores) log.Printf("CPU big cores:%d\n", cpuCores) - estimator := handpose(modelPath) - defer estimator.Destroy() - common.SetEstimatorThreads(estimator, cpuCores) - for idx, d := range []detecter.Detecter{ - yolox(modelPath), - nanodet(modelPath), - } { - defer d.Destroy() - common.SetEstimatorThreads(d, cpuCores) - detect(d, estimator, imgPath, "hand1.jpg", idx) - } + // estimator := handpose(modelPath) + // defer estimator.Destroy() + // common.SetEstimatorThreads(estimator, cpuCores) + // for idx, d := range []detecter.Detecter{ + // yolox(modelPath), + // nanodet(modelPath), + // } { + // defer d.Destroy() + // common.SetEstimatorThreads(d, cpuCores) + // detect(d, estimator, imgPath, "hand2.jpg", idx) + // } + d3d := mediapipe(modelPath) + detect3d(d3d, imgPath, "hand1.jpg") } func yolox(modelPath string) detecter.Detecter { @@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator { return d } +func mediapipe(modelPath string) *pose3d.Mediapipe { + palmPath := filepath.Join(modelPath, "mediapipe/palm/full") + handPath := filepath.Join(modelPath, "mediapipe/hand/full") + d := pose3d.NewMediapipe() + if err := d.LoadModel(palmPath, handPath); err != nil { + log.Fatalln(err) + } + return d +} + func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) { inPath := filepath.Join(imgPath, filename) imgSrc, err := loadImage(inPath) @@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri if err := saveImage(out, outPath); err != nil { log.Fatalln(err) } +} + +func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) { + inPath := filepath.Join(imgPath, filename) + imgSrc, err := loadImage(inPath) + if err != nil { + log.Fatalln("load image failed,", err) + } + img := common.NewImage(imgSrc) + rois, err := d.Detect(img) + if err != nil { + log.Fatalln(err) + } + log.Printf("%+v\n", rois) + drawer := handdrawer.New() + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename)) + out := drawer.DrawPalm(img, rois) + + if err := saveImage(out, outPath); err != nil { + log.Fatalln(err) + } + + for idx, roi := range rois { + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename)) + out := drawer.DrawPalm3D(roi, 400, "#442519") + + if err := saveImage(out, outPath); err != nil { + log.Fatalln(err) + } + } } diff --git a/go/face/tracker/cgo.go b/go/face/tracker/cgo.go index c64d9fc..0f33239 100644 --- a/go/face/tracker/cgo.go +++ b/go/face/tracker/cgo.go @@ -1,6 +1,6 @@ // +build !vulkan -package eye +package tracker /* #cgo CXXFLAGS: --std=c++11 -fopenmp diff --git a/go/hand/drawer/const.go b/go/hand/drawer/const.go index c1f9887..387d608 100644 --- a/go/hand/drawer/const.go +++ b/go/hand/drawer/const.go @@ -7,71 +7,16 @@ import ( const ( // DefaultBorderColor default drawer border color DefaultBorderColor = common.Green + // DefaultKeypointColor default drawer keypoint color + DefaultKeypointColor = common.Pink // DefaultBorderStrokeWidth default drawer border stroke width DefaultBorderStrokeWidth = 3 // DefaultKeypointRadius default drawer keypoint radius DefaultKeypointRadius = 3 // DefaultKeypointStrokeWidth default drawer keypoint stroke width DefaultKeypointStrokeWidth = 1 -) - -// CocoPart coco part define -type CocoPart = int - -const ( - // CocoPartNose nose - CocoPartNose CocoPart = iota - // CocoPartLEye left eye - CocoPartLEye - // CocoPartREye right eye - CocoPartREye - // CocoPartLEar left ear - CocoPartLEar - // CocoPartREar right ear - CocoPartREar - // CocoPartLShoulder left sholder - CocoPartLShoulder - // CocoPartRShoulder right sholder - CocoPartRShoulder - // CocoPartLElbow left elbow - CocoPartLElbow - // CocoPartRElbow right elbow - CocoPartRElbow - // CocoPartLWrist left wrist - CocoPartLWrist - // CocoPartRWrist right wrist - CocoPartRWrist - // CocoPartLHip left hip - CocoPartLHip - // CocoPartRHip right hip - CocoPartRHip - // CocoPartLKnee left knee - CocoPartLKnee - // CocoPartRKnee right knee - CocoPartRKnee - // CocoPartRAnkle right ankle - CocoPartRAnkle - // CocoPartLAnkle left ankle - CocoPartLAnkle - // CocoPartNeck neck - CocoPartNeck - // CocoPartBackground background - CocoPartBackground -) - -var ( - // CocoPair represents joints pair - CocoPair = [16][2]CocoPart{ - {0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16}, - } - // CocoColors represents color for coco parts - CocoColors = [17]string{ - "#ff0000", "#ff5500", "#ffaa00", "#ffff00", - "#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa", - "#00ffff", "#00aaff", "#0055ff", - "#0000ff", "#aa00ff", "#ff00ff", - "#ff00aa", "#ff0055", - } + // DefaultLabelColor default label color + DefaultLabelColor = common.White ) var ( diff --git a/go/hand/drawer/drawer.go b/go/hand/drawer/drawer.go index 948eef5..2045ac3 100644 --- a/go/hand/drawer/drawer.go +++ b/go/hand/drawer/drawer.go @@ -2,8 +2,10 @@ package drawer import ( "image" + "image/color" "github.com/llgcode/draw2d/draw2dimg" + "github.com/llgcode/draw2d/draw2dkit" "github.com/bububa/openvision/go/common" ) @@ -18,6 +20,12 @@ type Drawer struct { KeypointStrokeWidth float64 // KeypointRadius represents keypoints circle radius KeypointRadius float64 + // KeypointColor represents keypoint color + KeypointColor string + // LabelColor string + LabelColor string + // Font + Font *common.Font } // New returns a new Drawer @@ -27,6 +35,8 @@ func New(options ...Option) *Drawer { BorderStrokeWidth: DefaultBorderStrokeWidth, KeypointStrokeWidth: DefaultKeypointStrokeWidth, KeypointRadius: DefaultKeypointRadius, + KeypointColor: DefaultKeypointColor, + LabelColor: DefaultLabelColor, } for _, opt := range options { opt.apply(d) @@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool gc := draw2dimg.NewGraphicContext(out) gc.DrawImage(img) for _, roi := range rois { + rect := common.Rect( + roi.Rect.X*imgW, + roi.Rect.Y*imgH, + roi.Rect.Width*imgW, + roi.Rect.Height*imgH, + ) + borderColor := d.BorderColor if drawBorder { // draw rect - rect := common.Rect( - roi.Rect.X*imgW, - roi.Rect.Y*imgH, - roi.Rect.Width*imgW, - roi.Rect.Height*imgH, - ) - borderColor := d.BorderColor common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth) } l := len(roi.Keypoints) @@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool poseColor := PoseColors[colorIdx] common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth) } + // draw name + if roi.Name != "" { + common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width) + } + } + return out +} + +// DrawPalm draw PalmObject +func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image { + imgW := float64(img.Bounds().Dx()) + imgH := float64(img.Bounds().Dy()) + out := image.NewRGBA(img.Bounds()) + gc := draw2dimg.NewGraphicContext(out) + gc.DrawImage(img) + for _, roi := range rois { + gc.SetLineWidth(d.BorderStrokeWidth) + gc.SetStrokeColor(common.ColorFromHex(d.BorderColor)) + gc.BeginPath() + for idx, pt := range roi.Rect { + gc.MoveTo(pt.X*imgW, pt.Y*imgH) + if idx == len(roi.Rect)-1 { + gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH) + } else { + gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH) + } + } + gc.Close() + gc.Stroke() + + l := len(roi.Skeleton) + if l == 0 { + continue + } + // draw skeleton + for idx := range roi.Skeleton[:l-1] { + var ( + p0 common.Point + p1 common.Point + poseColor = PoseColors[idx/4] + ) + gc.SetStrokeColor(common.ColorFromHex(poseColor)) + if idx == 5 || idx == 9 || idx == 13 || idx == 17 { + p0 = roi.Skeleton[0] + p1 = roi.Skeleton[idx] + gc.BeginPath() + gc.MoveTo(p0.X*imgW, p0.Y*imgH) + gc.LineTo(p1.X*imgW, p1.Y*imgH) + gc.Close() + gc.Stroke() + } else if idx == 4 || idx == 8 || idx == 12 || idx == 16 { + continue + } + p0 = roi.Skeleton[idx] + p1 = roi.Skeleton[idx+1] + gc.BeginPath() + gc.MoveTo(p0.X*imgW, p0.Y*imgH) + gc.LineTo(p1.X*imgW, p1.Y*imgH) + gc.Close() + gc.Stroke() + } + for _, pt := range roi.Landmarks { + common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth) + } + } + return out +} + +// DrawPalm3D draw 3d PalmObject +func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image { + out := image.NewRGBA(image.Rect(0, 0, int(size), int(size))) + gc := draw2dimg.NewGraphicContext(out) + l := len(roi.Skeleton3d) + if l == 0 { + return out + } + if bg != "" { + bgColor := common.ColorFromHex(bg) + gc.SetFillColor(bgColor) + draw2dkit.Rectangle(gc, 0, 0, size, size) + gc.Fill() + gc.SetFillColor(color.Transparent) + } + // draw skeleton3d + for idx := range roi.Skeleton3d[:l-1] { + var ( + p0 common.Point3d + p1 common.Point3d + poseColor = PoseColors[idx/4] + ) + gc.SetStrokeColor(common.ColorFromHex(poseColor)) + if idx == 5 || idx == 9 || idx == 13 || idx == 17 { + p0 = roi.Skeleton3d[0] + p1 = roi.Skeleton3d[idx] + gc.BeginPath() + gc.MoveTo(p0.X*size, p0.Y*size) + gc.LineTo(p1.X*size, p1.Y*size) + gc.Close() + gc.Stroke() + } else if idx == 4 || idx == 8 || idx == 12 || idx == 16 { + continue + } + p0 = roi.Skeleton3d[idx] + p1 = roi.Skeleton3d[idx+1] + gc.BeginPath() + gc.MoveTo(p0.X*size, p0.Y*size) + gc.LineTo(p1.X*size, p1.Y*size) + gc.Close() + gc.Stroke() } return out } diff --git a/go/hand/drawer/option.go b/go/hand/drawer/option.go index 2ca7e11..9bcddd0 100644 --- a/go/hand/drawer/option.go +++ b/go/hand/drawer/option.go @@ -1,5 +1,9 @@ package drawer +import ( + "github.com/bububa/openvision/go/common" +) + // Option represents Drawer option interface type Option interface { apply(*Drawer) @@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option { d.KeypointStrokeWidth = w }) } + +// WithKeypointColor set Drawer KeypointColor +func WithKeypointColor(color string) Option { + return optionFunc(func(d *Drawer) { + d.KeypointColor = color + }) +} + +// WithFont set Drawer Font +func WithFont(font *common.Font) Option { + return optionFunc(func(d *Drawer) { + d.Font = font + }) +} diff --git a/go/hand/pose3d/cgo.go b/go/hand/pose3d/cgo.go new file mode 100644 index 0000000..714b66e --- /dev/null +++ b/go/hand/pose3d/cgo.go @@ -0,0 +1,11 @@ +// +build !vulkan + +package pose3d + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/hand/pose3d/cgo_vulkan.go b/go/hand/pose3d/cgo_vulkan.go new file mode 100644 index 0000000..f12a81b --- /dev/null +++ b/go/hand/pose3d/cgo_vulkan.go @@ -0,0 +1,11 @@ +// +build vulkan + +package pose3d + +/* +#cgo CXXFLAGS: --std=c++11 -fopenmp +#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include +#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent +#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib +*/ +import "C" diff --git a/go/hand/pose3d/doc.go b/go/hand/pose3d/doc.go new file mode 100644 index 0000000..bfb3a2c --- /dev/null +++ b/go/hand/pose3d/doc.go @@ -0,0 +1,2 @@ +// Package pose hand 3d pose estimator +package pose3d diff --git a/go/hand/pose3d/mediapipe.go b/go/hand/pose3d/mediapipe.go new file mode 100644 index 0000000..6c1f5fa --- /dev/null +++ b/go/hand/pose3d/mediapipe.go @@ -0,0 +1,62 @@ +package pose3d + +/* +#include +#include +#include "openvision/common/common.h" +#include "openvision/hand/pose3d.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" + "github.com/bububa/openvision/go/common" +) + +// Mediapipe represents mediapipe estimator interface +type Mediapipe struct { + d C.IHandPose3DEstimator +} + +func NewMediapipe() *Mediapipe { + return &Mediapipe{ + d: C.new_mediapipe_hand(), + } +} + +func (m *Mediapipe) Destroy() { + C.destroy_mediapipe_hand(m.d) +} + +func (m *Mediapipe) LoadModel(palmPath string, handPath string) error { + cPalm := C.CString(palmPath) + defer C.free(unsafe.Pointer(cPalm)) + cHand := C.CString(handPath) + defer C.free(unsafe.Pointer(cHand)) + retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand) + if retCode != 0 { + return openvision.LoadModelError(int(retCode)) + } + return nil + +} + +// Detect detect hand 3d pose +func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) { + imgWidth := img.WidthF64() + imgHeight := img.HeightF64() + data := img.Bytes() + cObjs := common.NewCPalmObjectVector() + defer common.FreeCPalmObjectVector(cObjs) + errCode := C.mediapipe_hand_detect( + m.d, + (*C.uchar)(unsafe.Pointer(&data[0])), + C.int(imgWidth), C.int(imgHeight), + (*C.PalmObjectVector)(unsafe.Pointer(cObjs)), + ) + if errCode != 0 { + return nil, openvision.DetectHandError(int(errCode)) + } + return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6312ed..1ac27c4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,6 +71,7 @@ target_include_directories(openvision $ $ $ + $ $ $ @@ -109,6 +110,7 @@ file(COPY file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h + ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand ) diff --git a/src/common/common.cpp b/src/common/common.cpp index 706334a..918d69c 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) { } } +void FreePoint3dVector(Point3dVector *p) { + if (p->points != NULL) { + free(p->points); + p->points = NULL; + } +} + void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) { if (p->points == NULL || i >= p->length) { return; diff --git a/src/common/common.h b/src/common/common.h index 1786397..675af10 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -11,123 +11,134 @@ typedef ov::Size Size; typedef ov::Size2f Size2f; typedef ov::Point Point; typedef ov::Point2f Point2f; +typedef ov::Point3d Point3d; typedef ov::Rect Rect; typedef ov::Keypoint Keypoint; #else // Wrapper for an individual cv::cvSize typedef struct Size { - int width; - int height; + int width; + int height; } Size; // // Wrapper for an individual cv::cvSize2f typedef struct Size2f { - int width; - int height; + int width; + int height; } Size2f; // Wrapper for an individual cv::cvPoint typedef struct Point { - int x; - int y; + int x; + int y; } Point; // Wrapper for an individual cv::Point2f typedef struct Point2f { - float x; - float y; + float x; + float y; } Point2f; +typedef struct Point3d { + float x; + float y; + float z; +} Point3d; // Wrapper for an individual cv::Rect typedef struct Rect { - int x; - int y; - int width; - int height; + int x; + int y; + int width; + int height; } Rect; - typedef struct Keypoint { - Point2f p; - float score; - int id; + Point2f p; + float score; + int id; } Keypoint; - #endif -typedef void* IEstimator; +typedef void *IEstimator; int get_gpu_count(); int create_gpu_instance(); void destroy_gpu_instance(); -int get_big_cpu_count(); +int get_big_cpu_count(); void set_omp_num_threads(int n); -int load_model(IEstimator e, const char* root_path); +int load_model(IEstimator e, const char *root_path); void destroy_estimator(IEstimator e); void set_num_threads(IEstimator e, int n); void set_light_mode(IEstimator e, bool mode); typedef struct Point2fVector { - Point2f* points; - int length; + Point2f *points; + int length; } Point2fVector; void FreePoint2fVector(Point2fVector *p); -void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val); +void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val); + +typedef struct Point3dVector { + Point3d *points; + int length; +} Point3dVector; + +void FreePoint3dVector(Point3dVector *p); typedef struct RectVector { - Rect* rects; - int length; + Rect *rects; + int length; } RectVector; void FreeRectVector(RectVector *p); typedef struct FloatVector { - float* values; - int length; + float *values; + int length; } FloatVector; void FreeFloatVector(FloatVector *p); typedef struct Bytes { - unsigned char* values; - int length; + unsigned char *values; + int length; } Bytes; void FreeBytes(Bytes *p); typedef struct KeypointVector { - Keypoint* points; - int length; + Keypoint *points; + int length; } KeypointVector; void FreeKeypointVector(KeypointVector *p); -void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val); +void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val); typedef struct ImageC { - unsigned char* data; - int width; - int height; - int channels; + unsigned char *data; + int width; + int height; + int channels; } Image; -void FreeImage(Image* p); +void FreeImage(Image *p); typedef struct ObjectInfoC { - Rect rect; - float score; - int label; - KeypointVector* pts; + Rect rect; + float score; + int label; + KeypointVector *pts; } ObjectInfo; void FreeObjectInfo(ObjectInfo *p); typedef struct ObjectInfoVector { - ObjectInfo* items; - int length; + ObjectInfo *items; + int length; } ObjectInfoVector; void FreeObjectInfoVector(ObjectInfoVector *p); diff --git a/src/common/common.hpp b/src/common/common.hpp index fa87d8a..cb473e3 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -76,6 +76,13 @@ struct Point2f { }; }; +struct Point3d { + float x; + float y; + float z; + Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {} +}; + // Wrapper for an individual cv::Rect struct Rect { int x; diff --git a/src/hand/detecter/nanodet/nanodet.cpp b/src/hand/detecter/nanodet/nanodet.cpp index 2c48cd7..57e6635 100644 --- a/src/hand/detecter/nanodet/nanodet.cpp +++ b/src/hand/detecter/nanodet/nanodet.cpp @@ -1,6 +1,6 @@ #include "nanodet.hpp" -#include #include +#include #ifdef OV_VULKAN #include "gpu.h" @@ -8,227 +8,219 @@ namespace ovhand { -static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector& objects) -{ +static void generate_nanodet_proposals(const ncnn::Mat &cls_pred, + const ncnn::Mat &dis_pred, int stride, + const ncnn::Mat &in_pad, + float prob_threshold, + std::vector &objects) { - const int num_grid = cls_pred.h; + const int num_grid = cls_pred.h; - int num_grid_x; - int num_grid_y; - if (in_pad.w > in_pad.h) - { - num_grid_x = in_pad.w / stride; - num_grid_y = num_grid / num_grid_x; - } - else - { - num_grid_y = in_pad.h / stride; - num_grid_x = num_grid / num_grid_y; - } + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } else { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } - const int num_class = cls_pred.w; - const int reg_max_1 = dis_pred.w / 4; - //__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w); - //__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1); - for (int i = 0; i < num_grid_y; i++) - { - for (int j = 0; j < num_grid_x; j++) - { - const int idx = i * num_grid_x + j; + const int num_class = cls_pred.w; + const int reg_max_1 = dis_pred.w / 4; - const float* scores = cls_pred.row(idx); + for (int i = 0; i < num_grid_y; i++) { + for (int j = 0; j < num_grid_x; j++) { + const int idx = i * num_grid_x + j; - // find label with max score - int label = -1; - float score = -FLT_MAX; - for (int k = 0; k < num_class; k++) - { - if (scores[k] > score) - { - label = k; - score = scores[k]; - } - } + const float *scores = cls_pred.row(idx); - if (score >= prob_threshold) - { - ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx)); - { - ncnn::Layer* softmax = ncnn::create_layer("Softmax"); - - ncnn::ParamDict pd; - pd.set(0, 1); // axis - pd.set(1, 1); - softmax->load_param(pd); - - ncnn::Option opt; - // opt.num_threads = 1; - opt.use_packing_layout = false; - - softmax->create_pipeline(opt); - - softmax->forward_inplace(bbox_pred, opt); - - softmax->destroy_pipeline(opt); - - delete softmax; - } - - float pred_ltrb[4]; - for (int k = 0; k < 4; k++) - { - float dis = 0.f; - const float* dis_after_sm = bbox_pred.row(k); - for (int l = 0; l < reg_max_1; l++) - { - dis += l * dis_after_sm[l]; - } - - pred_ltrb[k] = dis * stride; - } - - float pb_cx = (j + 0.5f) * stride; - float pb_cy = (i + 0.5f) * stride; - - float x0 = pb_cx - pred_ltrb[0]; - float y0 = pb_cy - pred_ltrb[1]; - float x1 = pb_cx + pred_ltrb[2]; - float y1 = pb_cy + pred_ltrb[3]; - - ov::ObjectInfo obj; - obj.rect.x = x0; - obj.rect.y = y0; - obj.rect.width = x1 - x0; - obj.rect.height = y1 - y0; - obj.label = label; - obj.score= score; - - objects.push_back(obj); - } + // find label with max score + int label = -1; + float score = -FLT_MAX; + for (int k = 0; k < num_class; k++) { + if (scores[k] > score) { + label = k; + score = scores[k]; } - } -} + } -int Nanodet::Detect(const unsigned char* rgbdata, - int img_width, int img_height, - std::vector& rois) { - if (!initialized_) { - return 10000; - } - if (rgbdata == 0){ - return 10001; - } - - int w = img_width; - int h = img_height; - float scale = 1.f; - if (w > h) { - scale = (float)target_size / w; - w = target_size; - h = h * scale; - } else { - scale = (float)target_size / h; - h = target_size; - w = w * scale; - } - - ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h); - - // pad to target_size rectangle - float wpad = 320-w;//(w + 31) / 32 * 32 - w; - float hpad = 320-h;//(h + 31) / 32 * 32 - h; - ncnn::Mat in_pad; - ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); - - in_pad.substract_mean_normalize(mean_vals, norm_vals); - - ncnn::Extractor ex = net_->create_extractor(); - ex.set_light_mode(light_mode_); - ex.set_num_threads(num_threads); - ex.input("input.1", in_pad); - - std::vector proposals; - // stride 8 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_8", cls_pred); - ex.extract("dis_pred_stride_8", dis_pred); - - std::vector objects8; - generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8); - - proposals.insert(proposals.end(), objects8.begin(), objects8.end()); - } - - // stride 16 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_16", cls_pred); - ex.extract("dis_pred_stride_16", dis_pred); - - std::vector objects16; - generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16); - - proposals.insert(proposals.end(), objects16.begin(), objects16.end()); - } - - // stride 32 - { - ncnn::Mat cls_pred; - ncnn::Mat dis_pred; - ex.extract("cls_pred_stride_32", cls_pred); - ex.extract("dis_pred_stride_32", dis_pred); - - std::vector objects32; - generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32); - - proposals.insert(proposals.end(), objects32.begin(), objects32.end()); - } - - // sort all proposals by score from highest to lowest - qsort_descent_inplace(proposals); - - // apply nms with nms_threshold - std::vector picked; - nms_sorted_bboxes(proposals, picked, nms_threshold); - - int count = picked.size(); - rois.resize(count); - - for (int i = 0; i < count; i++) - { - ov::ObjectInfo roi = proposals[picked[i]]; - - // adjust offset to original unpadded - float x0 = (roi.rect.x - (wpad / 2)) / scale; - float y0 = (roi.rect.y - (hpad / 2)) / scale; - float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale; - float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale; - - // clip - x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f); - y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f); - x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f); - y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f); - - roi.rect.x = x0; - roi.rect.y = y0; - roi.rect.width = x1 - x0; - roi.rect.height = y1 - y0; - - rois[i] = roi; - } - // sort objects by area - struct - { - bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const + if (score >= prob_threshold) { + ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx)); { - return a.rect.area() > b.rect.area(); + ncnn::Layer *softmax = ncnn::create_layer("Softmax"); + + ncnn::ParamDict pd; + pd.set(0, 1); // axis + pd.set(1, 1); + softmax->load_param(pd); + + ncnn::Option opt; + opt.num_threads = 1; + opt.use_packing_layout = false; + + softmax->create_pipeline(opt); + + softmax->forward_inplace(bbox_pred, opt); + + softmax->destroy_pipeline(opt); + + delete softmax; } - } objects_area_greater; - std::sort(rois.begin(), rois.end(), objects_area_greater); - return 0; + + float pred_ltrb[4]; + for (int k = 0; k < 4; k++) { + float dis = 0.f; + const float *dis_after_sm = bbox_pred.row(k); + for (int l = 0; l < reg_max_1; l++) { + dis += l * dis_after_sm[l]; + } + pred_ltrb[k] = dis * stride; + } + + float pb_cx = (j + 0.5f) * stride; + float pb_cy = (i + 0.5f) * stride; + + float x0 = pb_cx - pred_ltrb[0]; + float y0 = pb_cy - pred_ltrb[1]; + float x1 = pb_cx + pred_ltrb[2]; + float y1 = pb_cy + pred_ltrb[3]; + + ov::ObjectInfo obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = label; + obj.score = score; + + objects.push_back(obj); + } + } + } } + +int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height, + std::vector &rois) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, + img_width, img_height, w, h); + + // pad to target_size rectangle + float wpad = 320 - w; //(w + 31) / 32 * 32 - w; + float hpad = 320 - h; //(h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, + wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("input.1", in_pad); + + std::vector proposals; + // stride 8 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_8", cls_pred); + ex.extract("dis_pred_stride_8", dis_pred); + + std::vector objects8; + generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, + objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_16", cls_pred); + ex.extract("dis_pred_stride_16", dis_pred); + + std::vector objects16; + generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, + objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat cls_pred; + ncnn::Mat dis_pred; + ex.extract("cls_pred_stride_32", cls_pred); + ex.extract("dis_pred_stride_32", dis_pred); + + std::vector objects32; + generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, + objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + rois.resize(count); + + for (int i = 0; i < count; i++) { + ov::ObjectInfo roi = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (roi.rect.x - (wpad / 2)) / scale; + float y0 = (roi.rect.y - (hpad / 2)) / scale; + float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale; + float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f); + + roi.rect.x = x0; + roi.rect.y = y0; + roi.rect.width = x1 - x0; + roi.rect.height = y1 - y0; + + rois[i] = roi; + } + // sort objects by area + struct { + bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const { + return a.rect.area() > b.rect.area(); + } + } objects_area_greater; + std::sort(rois.begin(), rois.end(), objects_area_greater); + return 0; } +} // namespace ovhand diff --git a/src/hand/pose3d.h b/src/hand/pose3d.h new file mode 100644 index 0000000..a102c3f --- /dev/null +++ b/src/hand/pose3d.h @@ -0,0 +1,37 @@ +#ifndef _HAND_POSE3D_C_H_ +#define _HAND_POSE3D_C_H_ + +#include "../common/common.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct PalmObject { + float score; + float rotation; + Point2fVector *rect; + Point2fVector *landmarks; + Point2fVector *skeleton; + Point3dVector *skeleton3d; +} PalmObject; + +typedef struct PalmObjectVector { + PalmObject *items; + int length; +} PalmObjectVector; + +void FreePalmObject(PalmObject *obj); +void FreePalmObjectVector(PalmObjectVector *vec); + +typedef void *IHandPose3DEstimator; +IHandPose3DEstimator new_mediapipe_hand(); +void destroy_mediapipe_hand(IHandPose3DEstimator d); +int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path, + const char *hand_path); +int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, + int img_width, int img_height, PalmObjectVector *vec); +#ifdef __cplusplus +} +#endif + +#endif // !_HAND_POSE3D_C_H_ diff --git a/src/hand/pose3d/estimator.cpp b/src/hand/pose3d/estimator.cpp new file mode 100644 index 0000000..ceb7589 --- /dev/null +++ b/src/hand/pose3d/estimator.cpp @@ -0,0 +1,106 @@ +#include "../pose3d.h" +#include "mediapipe/mediapipe.hpp" +#include + +void FreePalmObject(PalmObject *obj) { + if (obj->rect != NULL) { + FreePoint2fVector(obj->rect); + obj->rect = NULL; + } + if (obj->skeleton != NULL) { + FreePoint2fVector(obj->skeleton); + obj->skeleton = NULL; + } + if (obj->skeleton3d != NULL) { + FreePoint3dVector(obj->skeleton3d); + obj->skeleton3d = NULL; + } + if (obj->landmarks != NULL) { + FreePoint2fVector(obj->landmarks); + obj->landmarks = NULL; + } +} + +void FreePalmObjectVector(PalmObjectVector *vec) { + if (vec->items != NULL) { + for (int i = 0; i < vec->length; i++) { + FreePalmObject(&vec->items[i]); + } + free(vec->items); + vec->items = NULL; + } +} + +IHandPose3DEstimator new_mediapipe_hand() { + return new ovhand3d::MediapipeHand(); +} + +void destroy_mediapipe_hand(IHandPose3DEstimator d) { + delete static_cast(d); +} + +int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path, + const char *hand_path) { + return static_cast(d)->LoadModel(palm_path, + hand_path); +} + +int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, + int img_width, int img_height, + PalmObjectVector *objects) { + std::vector objs; + + int ret = static_cast(d)->Detect( + rgbdata, img_width, img_height, objs); + if (ret != 0) { + return ret; + } + const size_t total_objs = objs.size(); + objects->length = total_objs; + if (total_objs == 0) { + objects->items = NULL; + return 0; + } + objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject)); + for (size_t i = 0; i < total_objs; ++i) { + objects->items[i].score = objs[i].score; + objects->items[i].rotation = objs[i].rotation; + objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].rect->length = 4; + objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f)); + for (size_t j = 0; j < 4; ++j) { + objects->items[i].rect->points[j] = objs[i].hand_pos[j]; + } + objects->items[i].landmarks = + (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].landmarks->length = 7; + objects->items[i].landmarks->points = + (Point2f *)malloc(4 * sizeof(Point2f)); + for (size_t j = 0; j < 7; ++j) { + objects->items[i].landmarks->points[j] = objs[i].landmarks[j]; + } + const size_t total_skeleton = objs[i].skeleton.size(); + if (total_skeleton == 0) { + objects->items[i].skeleton = NULL; + objects->items[i].skeleton3d = NULL; + continue; + } + objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector)); + objects->items[i].skeleton->length = total_skeleton; + objects->items[i].skeleton->points = + (Point2f *)malloc(total_skeleton * sizeof(Point2f)); + objects->items[i].skeleton3d = + (Point3dVector *)malloc(sizeof(Point3dVector)); + objects->items[i].skeleton3d->length = total_skeleton; + objects->items[i].skeleton3d->points = + (Point3d *)malloc(total_skeleton * sizeof(Point3d)); + for (size_t j = 0; j < total_skeleton; ++j) { + objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x; + objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y; + objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x; + objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y; + objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z; + } + } + return 0; +} diff --git a/src/hand/pose3d/mediapipe/mediapipe.cpp b/src/hand/pose3d/mediapipe/mediapipe.cpp new file mode 100644 index 0000000..e2c5a04 --- /dev/null +++ b/src/hand/pose3d/mediapipe/mediapipe.cpp @@ -0,0 +1,534 @@ +#include "mediapipe.hpp" +#include "mat.h" +#include + +namespace ovhand3d { + +static float calculate_scale(float min_scale, float max_scale, int stride_index, + int num_strides) { + if (num_strides == 1) + return (min_scale + max_scale) * 0.5f; + else + return min_scale + + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f); +} + +static void generate_anchors(std::vector &anchors, + const AnchorsParams &anchor_params) { + int layer_id = 0; + for (int layer_id = 0; layer_id < anchor_params.strides.size();) { + std::vector anchor_height; + std::vector anchor_width; + std::vector aspect_ratios; + std::vector scales; + + int last_same_stride_layer = layer_id; + while (last_same_stride_layer < (int)anchor_params.strides.size() && + anchor_params.strides[last_same_stride_layer] == + anchor_params.strides[layer_id]) { + const float scale = + calculate_scale(anchor_params.min_scale, anchor_params.max_scale, + last_same_stride_layer, anchor_params.strides.size()); + { + for (int aspect_ratio_id = 0; + aspect_ratio_id < (int)anchor_params.aspect_ratios.size(); + aspect_ratio_id++) { + aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]); + scales.push_back(scale); + } + + const float scale_next = + last_same_stride_layer == (int)anchor_params.strides.size() - 1 + ? 1.0f + : calculate_scale( + anchor_params.min_scale, anchor_params.max_scale, + last_same_stride_layer + 1, anchor_params.strides.size()); + scales.push_back(sqrt(scale * scale_next)); + aspect_ratios.push_back(1.0); + } + last_same_stride_layer++; + } + + for (int i = 0; i < (int)aspect_ratios.size(); ++i) { + const float ratio_sqrts = sqrt(aspect_ratios[i]); + anchor_height.push_back(scales[i] / ratio_sqrts); + anchor_width.push_back(scales[i] * ratio_sqrts); + } + + int feature_map_height = 0; + int feature_map_width = 0; + const int stride = anchor_params.strides[layer_id]; + feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride); + feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride); + + for (int y = 0; y < feature_map_height; ++y) { + for (int x = 0; x < feature_map_width; ++x) { + for (int anchor_id = 0; anchor_id < (int)anchor_height.size(); + ++anchor_id) { + const float x_center = + (x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width; + const float y_center = + (y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height; + + Anchor new_anchor; + new_anchor.x_center = x_center; + new_anchor.y_center = y_center; + + new_anchor.w = 1.0f; + new_anchor.h = 1.0f; + + anchors.push_back(new_anchor); + } + } + } + layer_id = last_same_stride_layer; + } +} + +static void create_ssd_anchors(int input_w, int input_h, + std::vector &anchors) { + AnchorsParams anchor_options; + anchor_options.num_layers = 4; + anchor_options.min_scale = 0.1484375; + anchor_options.max_scale = 0.75; + anchor_options.input_size_height = 192; + anchor_options.input_size_width = 192; + anchor_options.anchor_offset_x = 0.5f; + anchor_options.anchor_offset_y = 0.5f; + anchor_options.strides.push_back(8); + anchor_options.strides.push_back(16); + anchor_options.strides.push_back(16); + anchor_options.strides.push_back(16); + anchor_options.aspect_ratios.push_back(1.0); + generate_anchors(anchors, anchor_options); +} + +static int decode_bounds(std::list ®ion_list, + float score_thresh, int input_img_w, int input_img_h, + float *scores_ptr, float *bboxes_ptr, + std::vector &anchors) { + DetectRegion region; + int i = 0; + for (auto &anchor : anchors) { + float score = ov::sigmoid(scores_ptr[i]); + + if (score > score_thresh) { + float *p = bboxes_ptr + (i * 18); + + float cx = p[0] / input_img_w + anchor.x_center; + float cy = p[1] / input_img_h + anchor.y_center; + float w = p[2] / input_img_w; + float h = p[3] / input_img_h; + + ov::Point2f topleft, btmright; + topleft.x = cx - w * 0.5f; + topleft.y = cy - h * 0.5f; + btmright.x = cx + w * 0.5f; + btmright.y = cy + h * 0.5f; + + region.score = score; + region.topleft = topleft; + region.btmright = btmright; + + for (int j = 0; j < 7; j++) { + float lx = p[4 + (2 * j) + 0]; + float ly = p[4 + (2 * j) + 1]; + lx += anchor.x_center * input_img_w; + ly += anchor.y_center * input_img_h; + lx /= (float)input_img_w; + ly /= (float)input_img_h; + + region.landmarks[j].x = lx; + region.landmarks[j].y = ly; + } + + region_list.push_back(region); + } + i++; + } + return 0; +} + +static float calc_intersection_over_union(DetectRegion ®ion0, + DetectRegion ®ion1) { + float sx0 = region0.topleft.x; + float sy0 = region0.topleft.y; + float ex0 = region0.btmright.x; + float ey0 = region0.btmright.y; + float sx1 = region1.topleft.x; + float sy1 = region1.topleft.y; + float ex1 = region1.btmright.x; + float ey1 = region1.btmright.y; + + float xmin0 = std::min(sx0, ex0); + float ymin0 = std::min(sy0, ey0); + float xmax0 = std::max(sx0, ex0); + float ymax0 = std::max(sy0, ey0); + float xmin1 = std::min(sx1, ex1); + float ymin1 = std::min(sy1, ey1); + float xmax1 = std::max(sx1, ex1); + float ymax1 = std::max(sy1, ey1); + + float area0 = (ymax0 - ymin0) * (xmax0 - xmin0); + float area1 = (ymax1 - ymin1) * (xmax1 - xmin1); + if (area0 <= 0 || area1 <= 0) + return 0.0f; + + float intersect_xmin = std::max(xmin0, xmin1); + float intersect_ymin = std::max(ymin0, ymin1); + float intersect_xmax = std::min(xmax0, xmax1); + float intersect_ymax = std::min(ymax0, ymax1); + + float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) * + std::max(intersect_xmax - intersect_xmin, 0.0f); + + return intersect_area / (area0 + area1 - intersect_area); +} + +static int non_max_suppression(std::list ®ion_list, + std::list ®ion_nms_list, + float iou_thresh) { + region_list.sort([](DetectRegion &v1, DetectRegion &v2) { + return v1.score > v2.score ? true : false; + }); + + for (auto itr = region_list.begin(); itr != region_list.end(); itr++) { + DetectRegion region_candidate = *itr; + + int ignore_candidate = false; + for (auto itr_nms = region_nms_list.rbegin(); + itr_nms != region_nms_list.rend(); itr_nms++) { + DetectRegion region_nms = *itr_nms; + + float iou = calc_intersection_over_union(region_candidate, region_nms); + if (iou >= iou_thresh) { + ignore_candidate = true; + break; + } + } + + if (!ignore_candidate) { + region_nms_list.push_back(region_candidate); + if (region_nms_list.size() >= 5) + break; + } + } + return 0; +} + +static float normalize_radians(float angle) { + return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI)); +} + +static void compute_rotation(DetectRegion ®ion) { + float x0 = region.landmarks[0].x; + float y0 = region.landmarks[0].y; + float x1 = region.landmarks[2].x; + float y1 = region.landmarks[2].y; + + float target_angle = M_PI * 0.5f; + float rotation = target_angle - atan2(-(y1 - y0), x1 - x0); + + region.rotation = normalize_radians(rotation); +} + +void rot_vec(ov::Point2f &vec, float rotation) { + float sx = vec.x; + float sy = vec.y; + vec.x = sx * cos(rotation) - sy * sin(rotation); + vec.y = sx * sin(rotation) + sy * cos(rotation); +} + +void compute_detect_to_roi(DetectRegion ®ion, const int &target_size, + PalmObject &palm) { + float width = region.btmright.x - region.topleft.x; + float height = region.btmright.y - region.topleft.y; + float palm_cx = region.topleft.x + width * 0.5f; + float palm_cy = region.topleft.y + height * 0.5f; + + float hand_cx; + float hand_cy; + float rotation = region.rotation; + float shift_x = 0.0f; + float shift_y = -0.5f; + + if (rotation == 0.0f) { + hand_cx = palm_cx + (width * shift_x); + hand_cy = palm_cy + (height * shift_y); + } else { + float dx = + (width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation); + float dy = + (width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation); + hand_cx = palm_cx + dx; + hand_cy = palm_cy + dy; + } + + float long_side = std::max(width, height); + width = long_side; + height = long_side; + float hand_w = width * 2.6f; + float hand_h = height * 2.6f; + + palm.hand_cx = hand_cx; + palm.hand_cy = hand_cy; + palm.hand_w = hand_w; + palm.hand_h = hand_h; + + float dx = hand_w * 0.5f; + float dy = hand_h * 0.5f; + + palm.hand_pos[0].x = -dx; + palm.hand_pos[0].y = -dy; + palm.hand_pos[1].x = +dx; + palm.hand_pos[1].y = -dy; + palm.hand_pos[2].x = +dx; + palm.hand_pos[2].y = +dy; + palm.hand_pos[3].x = -dx; + palm.hand_pos[3].y = +dy; + + for (int i = 0; i < 4; i++) { + rot_vec(palm.hand_pos[i], rotation); + palm.hand_pos[i].x += hand_cx; + palm.hand_pos[i].y += hand_cy; + } + + for (int i = 0; i < 7; i++) { + palm.landmarks[i] = region.landmarks[i]; + } + + palm.score = region.score; +} + +static void pack_detect_result(std::vector &detect_results, + std::list ®ion_list, + const int &target_size, + std::vector &palmlist) { + for (auto ®ion : region_list) { + compute_rotation(region); + PalmObject palm; + compute_detect_to_roi(region, target_size, palm); + palmlist.push_back(palm); + detect_results.push_back(region); + } +} + +MediapipeHand::MediapipeHand() : ov::EstimatorBase() { + palm_blob_allocator_.set_size_compare_ratio(0.f); + palm_workspace_allocator_.set_size_compare_ratio(0.f); + hand_blob_allocator_.set_size_compare_ratio(0.f); + hand_workspace_allocator_.set_size_compare_ratio(0.f); + palm_net_ = new ncnn::Net(); + hand_net_ = new ncnn::Net(); + initialized_ = false; + if (num_threads > 0) { + palm_net_->opt.num_threads = num_threads; + hand_net_->opt.num_threads = num_threads; + } + palm_net_->opt.blob_allocator = &palm_blob_allocator_; + palm_net_->opt.workspace_allocator = &palm_workspace_allocator_; + palm_net_->opt.lightmode = light_mode_; + hand_net_->opt.blob_allocator = &hand_blob_allocator_; + hand_net_->opt.workspace_allocator = &hand_workspace_allocator_; + hand_net_->opt.lightmode = light_mode_; +#ifdef OV_VULKAN + palm_net_->opt.use_vulkan_compute = true; + hand_net_->opt.use_vulkan_compute = true; +#endif // OV_VULKAN +} + +MediapipeHand::~MediapipeHand() { + if (palm_net_) { + palm_net_->clear(); + } + if (hand_net_) { + hand_net_->clear(); + } + palm_workspace_allocator_.clear(); + palm_blob_allocator_.clear(); + hand_workspace_allocator_.clear(); + hand_blob_allocator_.clear(); +} + +void MediapipeHand::set_num_threads(int n) { + EstimatorBase::set_num_threads(n); + if (palm_net_) { + palm_net_->opt.num_threads = n; + } + if (hand_net_) { + hand_net_->opt.num_threads = n; + } +} + +void MediapipeHand::set_light_mode(bool mode) { + if (palm_net_) { + palm_net_->opt.lightmode = mode; + } + if (hand_net_) { + hand_net_->opt.lightmode = mode; + } + light_mode_ = mode; +} + +int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) { + std::string palm_param_file = std::string(palm_path) + "/param"; + std::string palm_bin_file = std::string(palm_path) + "/bin"; + std::string hand_param_file = std::string(hand_path) + "/param"; + std::string hand_bin_file = std::string(hand_path) + "/bin"; + if (palm_net_->load_param(palm_param_file.c_str()) == -1 || + palm_net_->load_model(palm_bin_file.c_str()) == -1) { + return 10000; + } + if (hand_net_->load_param(hand_param_file.c_str()) == -1 || + hand_net_->load_model(hand_bin_file.c_str()) == -1) { + return 10000; + } + + initialized_ = true; + anchors.clear(); + create_ssd_anchors(target_size, target_size, anchors); + + return 0; +} + +int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, + int img_height, std::vector &objects) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } else { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, + img_width, img_height, w, h); + + int wpad = target_size - w; + int hpad = target_size - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, + wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = palm_net_->create_extractor(); + ncnn::Mat cls, reg; + ex.input("input", in_pad); + ex.extract("cls", cls); + ex.extract("reg", reg); + + float *scores = (float *)cls.data; + float *bboxes = (float *)reg.data; + + std::list region_list, region_nms_list; + std::vector detect_results; + + decode_bounds(region_list, prob_threshold, target_size, target_size, scores, + bboxes, anchors); + non_max_suppression(region_list, region_nms_list, nms_threshold); + objects.clear(); + pack_detect_result(detect_results, region_nms_list, target_size, objects); + + for (int i = 0; i < objects.size(); i++) { + objects[i].hand_pos[0].x = + (objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[0].y = + (objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[1].x = + (objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[1].y = + (objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[2].x = + (objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[2].y = + (objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale; + objects[i].hand_pos[3].x = + (objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale; + objects[i].hand_pos[3].y = + (objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale; + + for (int j = 0; j < 7; j++) { + objects[i].landmarks[j].x = + (objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale; + objects[i].landmarks[j].y = + (objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale; + } + + const float srcPts[8] = { + objects[i].hand_pos[0].x, objects[i].hand_pos[0].y, + objects[i].hand_pos[1].x, objects[i].hand_pos[1].y, + objects[i].hand_pos[2].x, objects[i].hand_pos[2].y, + objects[i].hand_pos[3].x, objects[i].hand_pos[3].y, + }; + + const float dstPts[8] = { + 0, 0, 224, 0, 224, 224, 0, 224, + }; + + float tm[6]; + unsigned char *trans_mat = + (unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char)); + ncnn::get_affine_transform(dstPts, srcPts, 4, tm); + + ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224, + 224, tm); + + ncnn::Mat trans_image = + ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224); + + float score = GetLandmarks(trans_image, tm, objects[i].skeleton, + objects[i].skeleton3d); + + free(trans_mat); + } + return 0; +} + +float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6], + std::vector &skeleton, + std::vector &skeleton3d) { + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in.substract_mean_normalize(NULL, norm_vals); + ncnn::Mat points, score; + { + ncnn::Extractor ex = hand_net_->create_extractor(); + ex.input("input", in); + ex.extract("points", points); + ex.extract("score", score); + } + + float *points_data = (float *)points.data; + float *score_data = (float *)score.data; + for (int i = 0; i < 21; i++) { + ov::Point3d pt3d; + pt3d.x = points_data[i * 3]; + pt3d.y = points_data[i * 3 + 1]; + pt3d.z = points_data[i * 3 + 2]; + + ov::Point2f pt; + pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2]; + pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5]; + + skeleton.push_back(pt); + + pt3d.x /= 224.f; + pt3d.y /= 224.f; + skeleton3d.push_back(pt3d); + } + return score_data[0]; +} + +} // namespace ovhand3d diff --git a/src/hand/pose3d/mediapipe/mediapipe.hpp b/src/hand/pose3d/mediapipe/mediapipe.hpp new file mode 100644 index 0000000..31efda4 --- /dev/null +++ b/src/hand/pose3d/mediapipe/mediapipe.hpp @@ -0,0 +1,87 @@ +#ifndef _HAND_POSE3D_MEDIAPIPE_H_ +#define _HAND_POSE3D_MEDIAPIPE_H_ + +#include "../../../common/common.hpp" +#include + +namespace ovhand3d { + +struct PalmObject { + float score; + ov::Point2f landmarks[7]; + float rotation; + + float hand_cx; + float hand_cy; + float hand_w; + float hand_h; + ov::Point2f hand_pos[4]; + + std::vector skeleton; + std::vector skeleton3d; +}; + +struct DetectRegion { + float score; + ov::Point2f topleft; + ov::Point2f btmright; + ov::Point2f landmarks[7]; + + float rotation; + ov::Point2f roi_center; + ov::Point2f roi_size; + ov::Point2f roi_coord[4]; +}; + +struct Anchor { + float x_center, y_center, w, h; +}; + +struct AnchorsParams { + int input_size_width; + int input_size_height; + + float min_scale; + float max_scale; + + float anchor_offset_x; + float anchor_offset_y; + + int num_layers; + std::vector feature_map_width; + std::vector feature_map_height; + std::vector strides; + std::vector aspect_ratios; +}; + +class MediapipeHand : public ov::EstimatorBase { +public: + MediapipeHand(); + ~MediapipeHand(); + int LoadModel(const char *palm_model, const char *hand_model); + int Detect(const unsigned char *rgbdata, int img_width, int img_heidht, + std::vector &objects); + float GetLandmarks(ncnn::Mat in, float tm[6], + std::vector &skeleton, + std::vector &skeleton3d); + void set_light_mode(bool mode); + void set_num_threads(int n); + +private: + ncnn::Net *palm_net_ = NULL; + ncnn::Net *hand_net_ = NULL; + ncnn::PoolAllocator palm_workspace_allocator_; + ncnn::UnlockedPoolAllocator palm_blob_allocator_; + ncnn::PoolAllocator hand_workspace_allocator_; + ncnn::UnlockedPoolAllocator hand_blob_allocator_; + bool initialized_ = false; + bool light_mode_ = true; + std::vector anchors; + float prob_threshold = 0.55f; + float nms_threshold = 0.3f; + const int target_size = 192; + const float mean_vals[3] = {0.f, 0.f, 0.f}; + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; +}; +} // namespace ovhand3d +#endif // !_HAND_POSE3D_MEDIAPIPE_H_ diff --git a/src/pose/estimator/pptinypose/pptinypose.bak b/src/pose/estimator/pptinypose/pptinypose.bak new file mode 100644 index 0000000..c41ae1e --- /dev/null +++ b/src/pose/estimator/pptinypose/pptinypose.bak @@ -0,0 +1,161 @@ +#include "pptinypose.hpp" +#include + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovpose { +static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob, + std::vector &prob) { + int size = bottom_blob.total(); + const float *ptr = bottom_blob; + std::vector> vec; + vec.resize(size); + for (int i = 0; i < size; i++) { + vec[i] = std::make_pair(ptr[i], i); + } + top_blob.create(bottom_blob.c, 1, 1, 4u); + float *outptr = top_blob; + + for (size_t i = 0; i < bottom_blob.c; i++) { + int size0 = bottom_blob.channel(i).total(); + std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1), + vec.begin() + size0 * (i + 1), + std::greater>()); + outptr[i] = vec[size0 * i].second - size0 * i; + prob.push_back(vec[size0 * i].first); + } + + return 0; +} + +static void dark_parse(const ncnn::Mat &heatmap, std::vector &dim, + std::vector &coords, int px, int py, int ch) { + /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + 1) offset = - hassian.inv() * derivative + 2) dx = (heatmap[x+1] - heatmap[x-1])/2. + 3) dxx = (dx[x+1] - dx[x-1])/2. + 4) derivative = Mat([dx, dy]) + 5) hassian = Mat([[dxx, dxy], [dxy, dyy]]) + */ + + float *heatmap_data = (float *)heatmap.channel(ch).data; + std::vector heatmap_ch; + heatmap_ch.insert(heatmap_ch.begin(), heatmap_data, + heatmap_data + heatmap.channel(ch).total()); + cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]); + heatmap_mat.convertTo(heatmap_mat, CV_32FC1); + cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0); + heatmap_mat = heatmap_mat.reshape(1, 1); + heatmap_ch = std::vector(heatmap_mat.reshape(1, 1)); + + ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]); + heatmap_mat = heatmap_mat.reshape(1); + heatmap_ch = (float *)heatmap_mat.data; + + float epsilon = 1e-10; + // sample heatmap to get values in around target location + float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon)); + float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon)); + float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon)); + + float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon)); + float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon)); + float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon)); + float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon)); + float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon)); + float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon)); + float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon)); + float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon)); + float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon)); + float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon)); + + // compute dx/dy and dxx/dyy with sampled values + float dx = 0.5 * (xr - xl); + float dy = 0.5 * (yu - yd); + float dxx = 0.25 * (xr2 - 2 * xy + xl2); + float dxy = 0.25 * (xryu - xryd - xlyu + xlyd); + float dyy = 0.25 * (yu2 - 2 * xy + yd2); + + // finally get offset by derivative and hassian, which combined by dx/dy and + // dxx/dyy + if (dxx * dyy - dxy * dxy != 0) { + float M[2][2] = {dxx, dxy, dxy, dyy}; + float D[2] = {dx, dy}; + cv::Mat hassian(2, 2, CV_32F, M); + cv::Mat derivative(2, 1, CV_32F, D); + cv::Mat offset = -hassian.inv() * derivative; + coords[ch * 2] += offset.at(0, 0); + coords[ch * 2 + 1] += offset.at(1, 0); + } +} + +static std::vector get_final_preds(const ncnn::Mat &heatmap, + const ncnn::Mat &argmax_out) { + std::vector coords((size_t)heatmap.c * 2); + for (int i = 0; i < heatmap.c; i++) { + int idx = argmax_out[i]; + coords[i * 2] = idx % heatmap.w; + coords[i * 2 + 1] = (float)idx / heatmap.w; + + int px = int(coords[i * 2] + 0.5); + int py = int(coords[i * 2 + 1] + 0.5); + + std::vector dim({1, heatmap.c, heatmap.h, heatmap.w}); + dark_parse(heatmap, dim, coords, px, py, i); + } + + return coords; +} +PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() { + if (target_size == 128) { + target_width_ = 96; + target_height_ = 128; + } else { + target_width_ = 196; + target_height_ = 256; + } +} + +int PPTinyPoseEstimator::ExtractKeypoints( + const unsigned char *rgbdata, int img_width, int img_height, + const ov::Rect &rect, std::vector *keypoints) { + if (!initialized_) { + return 10000; + } + if (rgbdata == 0) { + return 10001; + } + keypoints->clear(); + + ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize( + rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y, + rect.width, rect.height, target_width_, target_height_); + in.substract_mean_normalize(meanVals, normVals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("image", in); + ncnn::Mat out; + ex.extract("save_infer_model/scale_0.tmp_1", out); + + ncnn::Mat argmax_out; + std::vector probs; + argmax(out, argmax_out, probs); + std::vector coords = get_final_preds(out, argmax_out); + + for (int i = 0; i < coords.size() / 2; i++) { + ov::KeyPoint keypoint; + keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x, + coords[i * 2 + 1] * rect.h / (float)out.h + rect.y); + keypoint.score = probs[i]; + keypoints->push_back(keypoint); + } + + return 0; +} + +} // namespace ovpose diff --git a/src/pose/estimator/pptinypose/pptinypose.hpp b/src/pose/estimator/pptinypose/pptinypose.hpp new file mode 100644 index 0000000..4bae1ba --- /dev/null +++ b/src/pose/estimator/pptinypose/pptinypose.hpp @@ -0,0 +1,25 @@ +#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_ +#define _POSE_PPTINYPOSE_ESTIMATOR_H_ + +#include "../estimator.hpp" +#include "net.h" +#include + +namespace ovpose { +class PPTinyPoseEstimator : public Estimator { +public: + PPTinyPoseEstimator(int target_size); + int ExtractKeypoints(const unsigned char *rgbdata, int img_width, + int img_height, const ov::Rect &rect, + std::vector *keypoints); + +private: + int target_width_ = 96; + int target_height_ = 128; + const float meanVals[3] = {123.675f, 116.28f, 103.53f}; + const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f}; +}; + +} // namespace ovpose + +#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_