feat(hand): add mediapipe hand 3d pose detecter

This commit is contained in:
Syd Xu
2021-12-13 16:39:50 +08:00
parent 429e30db91
commit 7eab96fa85
25 changed files with 1628 additions and 334 deletions

2
.gitignore vendored
View File

@@ -70,3 +70,5 @@ _testmain.go
test test
.vim .vim
dist/ dist/
libtorch/

View File

@@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
- nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing) - nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
- pose (for hand pose estimation) - pose (for hand pose estimation)
- handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing) - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
- pose3d (for 3d handpose detection)
- mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing)
- styletransfer - styletransfer
- animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing) - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
- tracker - tracker

View File

@@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector {
// GoPoint2fVector convert C.Point2fVector to []Point // GoPoint2fVector convert C.Point2fVector to []Point
func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point { func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
if cVector == nil {
return nil
}
l := int(cVector.length) l := int(cVector.length)
ret := make([]Point, 0, l) ret := make([]Point, 0, l)
ptr := unsafe.Pointer(cVector.points) ptr := unsafe.Pointer(cVector.points)
@@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) {
C.FreePoint2fVector(c) C.FreePoint2fVector(c)
C.free(unsafe.Pointer(c)) C.free(unsafe.Pointer(c))
} }
// Point3d represents a 3dPoint
type Point3d struct {
X float64
Y float64
Z float64
}
// Pt3d returns a New Point3d
func Pt3d(x, y, z float64) Point3d {
return Point3d{x, y, z}
}
var ZP3d = Point3d{}
// GoPoint3d conver C.Point3d to Point3d
func GoPoint3d(c *C.Point3d) Point3d {
return Pt3d(
float64(c.x),
float64(c.y),
float64(c.z),
)
}
// NewCPoint3dVector retruns C.Point3dVector pointer
func NewCPoint3dVector() *C.Point3dVector {
return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d))
}
// GoPoint3dVector convert C.Point3dVector to []Point3d
func GoPoint3dVector(cVector *C.Point3dVector) []Point3d {
if cVector == nil {
return nil
}
l := int(cVector.length)
ret := make([]Point3d, 0, l)
ptr := unsafe.Pointer(cVector.points)
for i := 0; i < l; i++ {
cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i))))
ret = append(ret, GoPoint3d(cPoint3d))
}
return ret
}
// FreeCPoint3dVector release C.Point3dVector memory
func FreeCPoint3dVector(c *C.Point3dVector) {
C.FreePoint3dVector(c)
C.free(unsafe.Pointer(c))
}

View File

@@ -20,6 +20,8 @@ type ObjectInfo struct {
Rect Rectangle Rect Rectangle
// Points keypoints // Points keypoints
Keypoints []Keypoint Keypoints []Keypoint
// Name
Name string
} }
// GoObjectInfo convert C.ObjectInfo to go type // GoObjectInfo convert C.ObjectInfo to go type

56
go/common/palmobject.go Normal file
View File

@@ -0,0 +1,56 @@
package common
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
#include "openvision/hand/pose3d.h"
*/
import "C"
import (
"unsafe"
)
// PalmObject
type PalmObject struct {
Score float64
Rotation float64
Rect []Point
Landmarks []Point
Skeleton []Point
Skeleton3d []Point3d
}
// NewCPalmObjectVector returns *C.PalmObjectVector
func NewCPalmObjectVector() *C.PalmObjectVector {
return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector))
}
// FreeCPalmObjectVector release *C.PalmObjectVector memory
func FreeCPalmObjectVector(p *C.PalmObjectVector) {
C.FreePalmObjectVector(p)
C.free(unsafe.Pointer(p))
}
// GoPalmObject convert C.PalmObject to Go type
func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject {
return PalmObject{
Score: float64(cObj.score),
Rotation: float64(cObj.rotation),
Rect: GoPoint2fVector(cObj.rect, w, h),
Landmarks: GoPoint2fVector(cObj.landmarks, w, h),
Skeleton: GoPoint2fVector(cObj.skeleton, w, h),
Skeleton3d: GoPoint3dVector(cObj.skeleton3d),
}
}
func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject {
l := int(c.length)
ret := make([]PalmObject, 0, l)
ptr := unsafe.Pointer(c.items)
for i := 0; i < l; i++ {
cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i))))
ret = append(ret, GoPalmObject(cObj, w, h))
}
return ret
}

View File

@@ -15,6 +15,7 @@ import (
"github.com/bububa/openvision/go/hand/detecter" "github.com/bububa/openvision/go/hand/detecter"
handdrawer "github.com/bububa/openvision/go/hand/drawer" handdrawer "github.com/bububa/openvision/go/hand/drawer"
"github.com/bububa/openvision/go/hand/pose" "github.com/bububa/openvision/go/hand/pose"
"github.com/bububa/openvision/go/hand/pose3d"
) )
func main() { func main() {
@@ -27,17 +28,19 @@ func main() {
cpuCores := common.GetBigCPUCount() cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores) common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores) log.Printf("CPU big cores:%d\n", cpuCores)
estimator := handpose(modelPath) // estimator := handpose(modelPath)
defer estimator.Destroy() // defer estimator.Destroy()
common.SetEstimatorThreads(estimator, cpuCores) // common.SetEstimatorThreads(estimator, cpuCores)
for idx, d := range []detecter.Detecter{ // for idx, d := range []detecter.Detecter{
yolox(modelPath), // yolox(modelPath),
nanodet(modelPath), // nanodet(modelPath),
} { // } {
defer d.Destroy() // defer d.Destroy()
common.SetEstimatorThreads(d, cpuCores) // common.SetEstimatorThreads(d, cpuCores)
detect(d, estimator, imgPath, "hand1.jpg", idx) // detect(d, estimator, imgPath, "hand2.jpg", idx)
} // }
d3d := mediapipe(modelPath)
detect3d(d3d, imgPath, "hand1.jpg")
} }
func yolox(modelPath string) detecter.Detecter { func yolox(modelPath string) detecter.Detecter {
@@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator {
return d return d
} }
func mediapipe(modelPath string) *pose3d.Mediapipe {
palmPath := filepath.Join(modelPath, "mediapipe/palm/full")
handPath := filepath.Join(modelPath, "mediapipe/hand/full")
d := pose3d.NewMediapipe()
if err := d.LoadModel(palmPath, handPath); err != nil {
log.Fatalln(err)
}
return d
}
func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) { func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
inPath := filepath.Join(imgPath, filename) inPath := filepath.Join(imgPath, filename)
imgSrc, err := loadImage(inPath) imgSrc, err := loadImage(inPath)
@@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
if err := saveImage(out, outPath); err != nil { if err := saveImage(out, outPath); err != nil {
log.Fatalln(err) log.Fatalln(err)
} }
}
func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) {
inPath := filepath.Join(imgPath, filename)
imgSrc, err := loadImage(inPath)
if err != nil {
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgSrc)
rois, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}
log.Printf("%+v\n", rois)
drawer := handdrawer.New()
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename))
out := drawer.DrawPalm(img, rois)
if err := saveImage(out, outPath); err != nil {
log.Fatalln(err)
}
for idx, roi := range rois {
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename))
out := drawer.DrawPalm3D(roi, 400, "#442519")
if err := saveImage(out, outPath); err != nil {
log.Fatalln(err)
}
}
} }

View File

@@ -1,6 +1,6 @@
// +build !vulkan // +build !vulkan
package eye package tracker
/* /*
#cgo CXXFLAGS: --std=c++11 -fopenmp #cgo CXXFLAGS: --std=c++11 -fopenmp

View File

@@ -7,71 +7,16 @@ import (
const ( const (
// DefaultBorderColor default drawer border color // DefaultBorderColor default drawer border color
DefaultBorderColor = common.Green DefaultBorderColor = common.Green
// DefaultKeypointColor default drawer keypoint color
DefaultKeypointColor = common.Pink
// DefaultBorderStrokeWidth default drawer border stroke width // DefaultBorderStrokeWidth default drawer border stroke width
DefaultBorderStrokeWidth = 3 DefaultBorderStrokeWidth = 3
// DefaultKeypointRadius default drawer keypoint radius // DefaultKeypointRadius default drawer keypoint radius
DefaultKeypointRadius = 3 DefaultKeypointRadius = 3
// DefaultKeypointStrokeWidth default drawer keypoint stroke width // DefaultKeypointStrokeWidth default drawer keypoint stroke width
DefaultKeypointStrokeWidth = 1 DefaultKeypointStrokeWidth = 1
) // DefaultLabelColor default label color
DefaultLabelColor = common.White
// CocoPart coco part define
type CocoPart = int
const (
// CocoPartNose nose
CocoPartNose CocoPart = iota
// CocoPartLEye left eye
CocoPartLEye
// CocoPartREye right eye
CocoPartREye
// CocoPartLEar left ear
CocoPartLEar
// CocoPartREar right ear
CocoPartREar
// CocoPartLShoulder left sholder
CocoPartLShoulder
// CocoPartRShoulder right sholder
CocoPartRShoulder
// CocoPartLElbow left elbow
CocoPartLElbow
// CocoPartRElbow right elbow
CocoPartRElbow
// CocoPartLWrist left wrist
CocoPartLWrist
// CocoPartRWrist right wrist
CocoPartRWrist
// CocoPartLHip left hip
CocoPartLHip
// CocoPartRHip right hip
CocoPartRHip
// CocoPartLKnee left knee
CocoPartLKnee
// CocoPartRKnee right knee
CocoPartRKnee
// CocoPartRAnkle right ankle
CocoPartRAnkle
// CocoPartLAnkle left ankle
CocoPartLAnkle
// CocoPartNeck neck
CocoPartNeck
// CocoPartBackground background
CocoPartBackground
)
var (
// CocoPair represents joints pair
CocoPair = [16][2]CocoPart{
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16},
}
// CocoColors represents color for coco parts
CocoColors = [17]string{
"#ff0000", "#ff5500", "#ffaa00", "#ffff00",
"#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa",
"#00ffff", "#00aaff", "#0055ff",
"#0000ff", "#aa00ff", "#ff00ff",
"#ff00aa", "#ff0055",
}
) )
var ( var (

View File

@@ -2,8 +2,10 @@ package drawer
import ( import (
"image" "image"
"image/color"
"github.com/llgcode/draw2d/draw2dimg" "github.com/llgcode/draw2d/draw2dimg"
"github.com/llgcode/draw2d/draw2dkit"
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
) )
@@ -18,6 +20,12 @@ type Drawer struct {
KeypointStrokeWidth float64 KeypointStrokeWidth float64
// KeypointRadius represents keypoints circle radius // KeypointRadius represents keypoints circle radius
KeypointRadius float64 KeypointRadius float64
// KeypointColor represents keypoint color
KeypointColor string
// LabelColor string
LabelColor string
// Font
Font *common.Font
} }
// New returns a new Drawer // New returns a new Drawer
@@ -27,6 +35,8 @@ func New(options ...Option) *Drawer {
BorderStrokeWidth: DefaultBorderStrokeWidth, BorderStrokeWidth: DefaultBorderStrokeWidth,
KeypointStrokeWidth: DefaultKeypointStrokeWidth, KeypointStrokeWidth: DefaultKeypointStrokeWidth,
KeypointRadius: DefaultKeypointRadius, KeypointRadius: DefaultKeypointRadius,
KeypointColor: DefaultKeypointColor,
LabelColor: DefaultLabelColor,
} }
for _, opt := range options { for _, opt := range options {
opt.apply(d) opt.apply(d)
@@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
gc := draw2dimg.NewGraphicContext(out) gc := draw2dimg.NewGraphicContext(out)
gc.DrawImage(img) gc.DrawImage(img)
for _, roi := range rois { for _, roi := range rois {
rect := common.Rect(
roi.Rect.X*imgW,
roi.Rect.Y*imgH,
roi.Rect.Width*imgW,
roi.Rect.Height*imgH,
)
borderColor := d.BorderColor
if drawBorder { if drawBorder {
// draw rect // draw rect
rect := common.Rect(
roi.Rect.X*imgW,
roi.Rect.Y*imgH,
roi.Rect.Width*imgW,
roi.Rect.Height*imgH,
)
borderColor := d.BorderColor
common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth) common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
} }
l := len(roi.Keypoints) l := len(roi.Keypoints)
@@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
poseColor := PoseColors[colorIdx] poseColor := PoseColors[colorIdx]
common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth) common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
} }
// draw name
if roi.Name != "" {
common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width)
}
}
return out
}
// DrawPalm draw PalmObject
func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image {
imgW := float64(img.Bounds().Dx())
imgH := float64(img.Bounds().Dy())
out := image.NewRGBA(img.Bounds())
gc := draw2dimg.NewGraphicContext(out)
gc.DrawImage(img)
for _, roi := range rois {
gc.SetLineWidth(d.BorderStrokeWidth)
gc.SetStrokeColor(common.ColorFromHex(d.BorderColor))
gc.BeginPath()
for idx, pt := range roi.Rect {
gc.MoveTo(pt.X*imgW, pt.Y*imgH)
if idx == len(roi.Rect)-1 {
gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH)
} else {
gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH)
}
}
gc.Close()
gc.Stroke()
l := len(roi.Skeleton)
if l == 0 {
continue
}
// draw skeleton
for idx := range roi.Skeleton[:l-1] {
var (
p0 common.Point
p1 common.Point
poseColor = PoseColors[idx/4]
)
gc.SetStrokeColor(common.ColorFromHex(poseColor))
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
p0 = roi.Skeleton[0]
p1 = roi.Skeleton[idx]
gc.BeginPath()
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
gc.LineTo(p1.X*imgW, p1.Y*imgH)
gc.Close()
gc.Stroke()
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
continue
}
p0 = roi.Skeleton[idx]
p1 = roi.Skeleton[idx+1]
gc.BeginPath()
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
gc.LineTo(p1.X*imgW, p1.Y*imgH)
gc.Close()
gc.Stroke()
}
for _, pt := range roi.Landmarks {
common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth)
}
}
return out
}
// DrawPalm3D draw 3d PalmObject
func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image {
out := image.NewRGBA(image.Rect(0, 0, int(size), int(size)))
gc := draw2dimg.NewGraphicContext(out)
l := len(roi.Skeleton3d)
if l == 0 {
return out
}
if bg != "" {
bgColor := common.ColorFromHex(bg)
gc.SetFillColor(bgColor)
draw2dkit.Rectangle(gc, 0, 0, size, size)
gc.Fill()
gc.SetFillColor(color.Transparent)
}
// draw skeleton3d
for idx := range roi.Skeleton3d[:l-1] {
var (
p0 common.Point3d
p1 common.Point3d
poseColor = PoseColors[idx/4]
)
gc.SetStrokeColor(common.ColorFromHex(poseColor))
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
p0 = roi.Skeleton3d[0]
p1 = roi.Skeleton3d[idx]
gc.BeginPath()
gc.MoveTo(p0.X*size, p0.Y*size)
gc.LineTo(p1.X*size, p1.Y*size)
gc.Close()
gc.Stroke()
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
continue
}
p0 = roi.Skeleton3d[idx]
p1 = roi.Skeleton3d[idx+1]
gc.BeginPath()
gc.MoveTo(p0.X*size, p0.Y*size)
gc.LineTo(p1.X*size, p1.Y*size)
gc.Close()
gc.Stroke()
} }
return out return out
} }

View File

@@ -1,5 +1,9 @@
package drawer package drawer
import (
"github.com/bububa/openvision/go/common"
)
// Option represents Drawer option interface // Option represents Drawer option interface
type Option interface { type Option interface {
apply(*Drawer) apply(*Drawer)
@@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option {
d.KeypointStrokeWidth = w d.KeypointStrokeWidth = w
}) })
} }
// WithKeypointColor set Drawer KeypointColor
func WithKeypointColor(color string) Option {
return optionFunc(func(d *Drawer) {
d.KeypointColor = color
})
}
// WithFont set Drawer Font
func WithFont(font *common.Font) Option {
return optionFunc(func(d *Drawer) {
d.Font = font
})
}

11
go/hand/pose3d/cgo.go Normal file
View File

@@ -0,0 +1,11 @@
// +build !vulkan
package pose3d
/*
#cgo CXXFLAGS: --std=c++11 -fopenmp
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
*/
import "C"

View File

@@ -0,0 +1,11 @@
// +build vulkan
package pose3d
/*
#cgo CXXFLAGS: --std=c++11 -fopenmp
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
*/
import "C"

2
go/hand/pose3d/doc.go Normal file
View File

@@ -0,0 +1,2 @@
// Package pose hand 3d pose estimator
package pose3d

View File

@@ -0,0 +1,62 @@
package pose3d
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
#include "openvision/hand/pose3d.h"
*/
import "C"
import (
"unsafe"
openvision "github.com/bububa/openvision/go"
"github.com/bububa/openvision/go/common"
)
// Mediapipe represents mediapipe estimator interface
type Mediapipe struct {
d C.IHandPose3DEstimator
}
func NewMediapipe() *Mediapipe {
return &Mediapipe{
d: C.new_mediapipe_hand(),
}
}
func (m *Mediapipe) Destroy() {
C.destroy_mediapipe_hand(m.d)
}
func (m *Mediapipe) LoadModel(palmPath string, handPath string) error {
cPalm := C.CString(palmPath)
defer C.free(unsafe.Pointer(cPalm))
cHand := C.CString(handPath)
defer C.free(unsafe.Pointer(cHand))
retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Detect detect hand 3d pose
func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) {
imgWidth := img.WidthF64()
imgHeight := img.HeightF64()
data := img.Bytes()
cObjs := common.NewCPalmObjectVector()
defer common.FreeCPalmObjectVector(cObjs)
errCode := C.mediapipe_hand_detect(
m.d,
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.PalmObjectVector)(unsafe.Pointer(cObjs)),
)
if errCode != 0 {
return nil, openvision.DetectHandError(int(errCode))
}
return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil
}

View File

@@ -71,6 +71,7 @@ target_include_directories(openvision
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
@@ -109,6 +110,7 @@ file(COPY
file(COPY file(COPY
${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
) )

View File

@@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) {
} }
} }
void FreePoint3dVector(Point3dVector *p) {
if (p->points != NULL) {
free(p->points);
p->points = NULL;
}
}
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) { void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
if (p->points == NULL || i >= p->length) { if (p->points == NULL || i >= p->length) {
return; return;

View File

@@ -11,123 +11,134 @@ typedef ov::Size Size;
typedef ov::Size2f Size2f; typedef ov::Size2f Size2f;
typedef ov::Point Point; typedef ov::Point Point;
typedef ov::Point2f Point2f; typedef ov::Point2f Point2f;
typedef ov::Point3d Point3d;
typedef ov::Rect Rect; typedef ov::Rect Rect;
typedef ov::Keypoint Keypoint; typedef ov::Keypoint Keypoint;
#else #else
// Wrapper for an individual cv::cvSize // Wrapper for an individual cv::cvSize
typedef struct Size { typedef struct Size {
int width; int width;
int height; int height;
} Size; } Size;
// //
// Wrapper for an individual cv::cvSize2f // Wrapper for an individual cv::cvSize2f
typedef struct Size2f { typedef struct Size2f {
int width; int width;
int height; int height;
} Size2f; } Size2f;
// Wrapper for an individual cv::cvPoint // Wrapper for an individual cv::cvPoint
typedef struct Point { typedef struct Point {
int x; int x;
int y; int y;
} Point; } Point;
// Wrapper for an individual cv::Point2f // Wrapper for an individual cv::Point2f
typedef struct Point2f { typedef struct Point2f {
float x; float x;
float y; float y;
} Point2f; } Point2f;
typedef struct Point3d {
float x;
float y;
float z;
} Point3d;
// Wrapper for an individual cv::Rect // Wrapper for an individual cv::Rect
typedef struct Rect { typedef struct Rect {
int x; int x;
int y; int y;
int width; int width;
int height; int height;
} Rect; } Rect;
typedef struct Keypoint { typedef struct Keypoint {
Point2f p; Point2f p;
float score; float score;
int id; int id;
} Keypoint; } Keypoint;
#endif #endif
typedef void* IEstimator; typedef void *IEstimator;
int get_gpu_count(); int get_gpu_count();
int create_gpu_instance(); int create_gpu_instance();
void destroy_gpu_instance(); void destroy_gpu_instance();
int get_big_cpu_count(); int get_big_cpu_count();
void set_omp_num_threads(int n); void set_omp_num_threads(int n);
int load_model(IEstimator e, const char* root_path); int load_model(IEstimator e, const char *root_path);
void destroy_estimator(IEstimator e); void destroy_estimator(IEstimator e);
void set_num_threads(IEstimator e, int n); void set_num_threads(IEstimator e, int n);
void set_light_mode(IEstimator e, bool mode); void set_light_mode(IEstimator e, bool mode);
typedef struct Point2fVector { typedef struct Point2fVector {
Point2f* points; Point2f *points;
int length; int length;
} Point2fVector; } Point2fVector;
void FreePoint2fVector(Point2fVector *p); void FreePoint2fVector(Point2fVector *p);
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val); void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val);
typedef struct Point3dVector {
Point3d *points;
int length;
} Point3dVector;
void FreePoint3dVector(Point3dVector *p);
typedef struct RectVector { typedef struct RectVector {
Rect* rects; Rect *rects;
int length; int length;
} RectVector; } RectVector;
void FreeRectVector(RectVector *p); void FreeRectVector(RectVector *p);
typedef struct FloatVector { typedef struct FloatVector {
float* values; float *values;
int length; int length;
} FloatVector; } FloatVector;
void FreeFloatVector(FloatVector *p); void FreeFloatVector(FloatVector *p);
typedef struct Bytes { typedef struct Bytes {
unsigned char* values; unsigned char *values;
int length; int length;
} Bytes; } Bytes;
void FreeBytes(Bytes *p); void FreeBytes(Bytes *p);
typedef struct KeypointVector { typedef struct KeypointVector {
Keypoint* points; Keypoint *points;
int length; int length;
} KeypointVector; } KeypointVector;
void FreeKeypointVector(KeypointVector *p); void FreeKeypointVector(KeypointVector *p);
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val); void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val);
typedef struct ImageC { typedef struct ImageC {
unsigned char* data; unsigned char *data;
int width; int width;
int height; int height;
int channels; int channels;
} Image; } Image;
void FreeImage(Image* p); void FreeImage(Image *p);
typedef struct ObjectInfoC { typedef struct ObjectInfoC {
Rect rect; Rect rect;
float score; float score;
int label; int label;
KeypointVector* pts; KeypointVector *pts;
} ObjectInfo; } ObjectInfo;
void FreeObjectInfo(ObjectInfo *p); void FreeObjectInfo(ObjectInfo *p);
typedef struct ObjectInfoVector { typedef struct ObjectInfoVector {
ObjectInfo* items; ObjectInfo *items;
int length; int length;
} ObjectInfoVector; } ObjectInfoVector;
void FreeObjectInfoVector(ObjectInfoVector *p); void FreeObjectInfoVector(ObjectInfoVector *p);

View File

@@ -76,6 +76,13 @@ struct Point2f {
}; };
}; };
struct Point3d {
float x;
float y;
float z;
Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {}
};
// Wrapper for an individual cv::Rect // Wrapper for an individual cv::Rect
struct Rect { struct Rect {
int x; int x;

View File

@@ -1,6 +1,6 @@
#include "nanodet.hpp" #include "nanodet.hpp"
#include <string>
#include <float.h> #include <float.h>
#include <string>
#ifdef OV_VULKAN #ifdef OV_VULKAN
#include "gpu.h" #include "gpu.h"
@@ -8,227 +8,219 @@
namespace ovhand { namespace ovhand {
static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects) static void generate_nanodet_proposals(const ncnn::Mat &cls_pred,
{ const ncnn::Mat &dis_pred, int stride,
const ncnn::Mat &in_pad,
float prob_threshold,
std::vector<ov::ObjectInfo> &objects) {
const int num_grid = cls_pred.h; const int num_grid = cls_pred.h;
int num_grid_x; int num_grid_x;
int num_grid_y; int num_grid_y;
if (in_pad.w > in_pad.h) if (in_pad.w > in_pad.h) {
{ num_grid_x = in_pad.w / stride;
num_grid_x = in_pad.w / stride; num_grid_y = num_grid / num_grid_x;
num_grid_y = num_grid / num_grid_x; } else {
} num_grid_y = in_pad.h / stride;
else num_grid_x = num_grid / num_grid_y;
{ }
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = cls_pred.w; const int num_class = cls_pred.w;
const int reg_max_1 = dis_pred.w / 4; const int reg_max_1 = dis_pred.w / 4;
//__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
//__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const int idx = i * num_grid_x + j;
const float* scores = cls_pred.row(idx); for (int i = 0; i < num_grid_y; i++) {
for (int j = 0; j < num_grid_x; j++) {
const int idx = i * num_grid_x + j;
// find label with max score const float *scores = cls_pred.row(idx);
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
if (scores[k] > score)
{
label = k;
score = scores[k];
}
}
if (score >= prob_threshold) // find label with max score
{ int label = -1;
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx)); float score = -FLT_MAX;
{ for (int k = 0; k < num_class; k++) {
ncnn::Layer* softmax = ncnn::create_layer("Softmax"); if (scores[k] > score) {
label = k;
ncnn::ParamDict pd; score = scores[k];
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
// opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (j + 0.5f) * stride;
float pb_cy = (i + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
ov::ObjectInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.score= score;
objects.push_back(obj);
}
} }
} }
}
int Nanodet::Detect(const unsigned char* rgbdata, if (score >= prob_threshold) {
int img_width, int img_height, ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx));
std::vector<ov::ObjectInfo>& rois) {
if (!initialized_) {
return 10000;
}
if (rgbdata == 0){
return 10001;
}
int w = img_width;
int h = img_height;
float scale = 1.f;
if (w > h) {
scale = (float)target_size / w;
w = target_size;
h = h * scale;
} else {
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
// pad to target_size rectangle
float wpad = 320-w;//(w + 31) / 32 * 32 - w;
float hpad = 320-h;//(h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = net_->create_extractor();
ex.set_light_mode(light_mode_);
ex.set_num_threads(num_threads);
ex.input("input.1", in_pad);
std::vector<ov::ObjectInfo> proposals;
// stride 8
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_8", cls_pred);
ex.extract("dis_pred_stride_8", dis_pred);
std::vector<ov::ObjectInfo> objects8;
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_16", cls_pred);
ex.extract("dis_pred_stride_16", dis_pred);
std::vector<ov::ObjectInfo> objects16;
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_32", cls_pred);
ex.extract("dis_pred_stride_32", dis_pred);
std::vector<ov::ObjectInfo> objects32;
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
rois.resize(count);
for (int i = 0; i < count; i++)
{
ov::ObjectInfo roi = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (roi.rect.x - (wpad / 2)) / scale;
float y0 = (roi.rect.y - (hpad / 2)) / scale;
float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
roi.rect.x = x0;
roi.rect.y = y0;
roi.rect.width = x1 - x0;
roi.rect.height = y1 - y0;
rois[i] = roi;
}
// sort objects by area
struct
{
bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
{ {
return a.rect.area() > b.rect.area(); ncnn::Layer *softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
} }
} objects_area_greater;
std::sort(rois.begin(), rois.end(), objects_area_greater); float pred_ltrb[4];
return 0; for (int k = 0; k < 4; k++) {
float dis = 0.f;
const float *dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++) {
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (j + 0.5f) * stride;
float pb_cy = (i + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
ov::ObjectInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.score = score;
objects.push_back(obj);
}
}
}
} }
int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
std::vector<ov::ObjectInfo> &rois) {
if (!initialized_) {
return 10000;
}
if (rgbdata == 0) {
return 10001;
}
int w = img_width;
int h = img_height;
float scale = 1.f;
if (w > h) {
scale = (float)target_size / w;
w = target_size;
h = h * scale;
} else {
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
img_width, img_height, w, h);
// pad to target_size rectangle
float wpad = 320 - w; //(w + 31) / 32 * 32 - w;
float hpad = 320 - h; //(h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = net_->create_extractor();
ex.set_light_mode(light_mode_);
ex.set_num_threads(num_threads);
ex.input("input.1", in_pad);
std::vector<ov::ObjectInfo> proposals;
// stride 8
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_8", cls_pred);
ex.extract("dis_pred_stride_8", dis_pred);
std::vector<ov::ObjectInfo> objects8;
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold,
objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_16", cls_pred);
ex.extract("dis_pred_stride_16", dis_pred);
std::vector<ov::ObjectInfo> objects16;
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold,
objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat cls_pred;
ncnn::Mat dis_pred;
ex.extract("cls_pred_stride_32", cls_pred);
ex.extract("dis_pred_stride_32", dis_pred);
std::vector<ov::ObjectInfo> objects32;
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold,
objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
rois.resize(count);
for (int i = 0; i < count; i++) {
ov::ObjectInfo roi = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (roi.rect.x - (wpad / 2)) / scale;
float y0 = (roi.rect.y - (hpad / 2)) / scale;
float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
roi.rect.x = x0;
roi.rect.y = y0;
roi.rect.width = x1 - x0;
roi.rect.height = y1 - y0;
rois[i] = roi;
}
// sort objects by area
struct {
bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const {
return a.rect.area() > b.rect.area();
}
} objects_area_greater;
std::sort(rois.begin(), rois.end(), objects_area_greater);
return 0;
} }
} // namespace ovhand

37
src/hand/pose3d.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef _HAND_POSE3D_C_H_
#define _HAND_POSE3D_C_H_
#include "../common/common.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct PalmObject {
float score;
float rotation;
Point2fVector *rect;
Point2fVector *landmarks;
Point2fVector *skeleton;
Point3dVector *skeleton3d;
} PalmObject;
typedef struct PalmObjectVector {
PalmObject *items;
int length;
} PalmObjectVector;
void FreePalmObject(PalmObject *obj);
void FreePalmObjectVector(PalmObjectVector *vec);
typedef void *IHandPose3DEstimator;
IHandPose3DEstimator new_mediapipe_hand();
void destroy_mediapipe_hand(IHandPose3DEstimator d);
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
const char *hand_path);
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
int img_width, int img_height, PalmObjectVector *vec);
#ifdef __cplusplus
}
#endif
#endif // !_HAND_POSE3D_C_H_

View File

@@ -0,0 +1,106 @@
#include "../pose3d.h"
#include "mediapipe/mediapipe.hpp"
#include <iostream>
void FreePalmObject(PalmObject *obj) {
if (obj->rect != NULL) {
FreePoint2fVector(obj->rect);
obj->rect = NULL;
}
if (obj->skeleton != NULL) {
FreePoint2fVector(obj->skeleton);
obj->skeleton = NULL;
}
if (obj->skeleton3d != NULL) {
FreePoint3dVector(obj->skeleton3d);
obj->skeleton3d = NULL;
}
if (obj->landmarks != NULL) {
FreePoint2fVector(obj->landmarks);
obj->landmarks = NULL;
}
}
void FreePalmObjectVector(PalmObjectVector *vec) {
if (vec->items != NULL) {
for (int i = 0; i < vec->length; i++) {
FreePalmObject(&vec->items[i]);
}
free(vec->items);
vec->items = NULL;
}
}
IHandPose3DEstimator new_mediapipe_hand() {
return new ovhand3d::MediapipeHand();
}
void destroy_mediapipe_hand(IHandPose3DEstimator d) {
delete static_cast<ovhand3d::MediapipeHand *>(d);
}
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
const char *hand_path) {
return static_cast<ovhand3d::MediapipeHand *>(d)->LoadModel(palm_path,
hand_path);
}
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
int img_width, int img_height,
PalmObjectVector *objects) {
std::vector<ovhand3d::PalmObject> objs;
int ret = static_cast<ovhand3d::MediapipeHand *>(d)->Detect(
rgbdata, img_width, img_height, objs);
if (ret != 0) {
return ret;
}
const size_t total_objs = objs.size();
objects->length = total_objs;
if (total_objs == 0) {
objects->items = NULL;
return 0;
}
objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject));
for (size_t i = 0; i < total_objs; ++i) {
objects->items[i].score = objs[i].score;
objects->items[i].rotation = objs[i].rotation;
objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector));
objects->items[i].rect->length = 4;
objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f));
for (size_t j = 0; j < 4; ++j) {
objects->items[i].rect->points[j] = objs[i].hand_pos[j];
}
objects->items[i].landmarks =
(Point2fVector *)malloc(sizeof(Point2fVector));
objects->items[i].landmarks->length = 7;
objects->items[i].landmarks->points =
(Point2f *)malloc(4 * sizeof(Point2f));
for (size_t j = 0; j < 7; ++j) {
objects->items[i].landmarks->points[j] = objs[i].landmarks[j];
}
const size_t total_skeleton = objs[i].skeleton.size();
if (total_skeleton == 0) {
objects->items[i].skeleton = NULL;
objects->items[i].skeleton3d = NULL;
continue;
}
objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector));
objects->items[i].skeleton->length = total_skeleton;
objects->items[i].skeleton->points =
(Point2f *)malloc(total_skeleton * sizeof(Point2f));
objects->items[i].skeleton3d =
(Point3dVector *)malloc(sizeof(Point3dVector));
objects->items[i].skeleton3d->length = total_skeleton;
objects->items[i].skeleton3d->points =
(Point3d *)malloc(total_skeleton * sizeof(Point3d));
for (size_t j = 0; j < total_skeleton; ++j) {
objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x;
objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y;
objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x;
objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y;
objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z;
}
}
return 0;
}

View File

@@ -0,0 +1,534 @@
#include "mediapipe.hpp"
#include "mat.h"
#include <math.h>
namespace ovhand3d {
static float calculate_scale(float min_scale, float max_scale, int stride_index,
int num_strides) {
if (num_strides == 1)
return (min_scale + max_scale) * 0.5f;
else
return min_scale +
(max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
}
static void generate_anchors(std::vector<Anchor> &anchors,
const AnchorsParams &anchor_params) {
int layer_id = 0;
for (int layer_id = 0; layer_id < anchor_params.strides.size();) {
std::vector<float> anchor_height;
std::vector<float> anchor_width;
std::vector<float> aspect_ratios;
std::vector<float> scales;
int last_same_stride_layer = layer_id;
while (last_same_stride_layer < (int)anchor_params.strides.size() &&
anchor_params.strides[last_same_stride_layer] ==
anchor_params.strides[layer_id]) {
const float scale =
calculate_scale(anchor_params.min_scale, anchor_params.max_scale,
last_same_stride_layer, anchor_params.strides.size());
{
for (int aspect_ratio_id = 0;
aspect_ratio_id < (int)anchor_params.aspect_ratios.size();
aspect_ratio_id++) {
aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]);
scales.push_back(scale);
}
const float scale_next =
last_same_stride_layer == (int)anchor_params.strides.size() - 1
? 1.0f
: calculate_scale(
anchor_params.min_scale, anchor_params.max_scale,
last_same_stride_layer + 1, anchor_params.strides.size());
scales.push_back(sqrt(scale * scale_next));
aspect_ratios.push_back(1.0);
}
last_same_stride_layer++;
}
for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
const float ratio_sqrts = sqrt(aspect_ratios[i]);
anchor_height.push_back(scales[i] / ratio_sqrts);
anchor_width.push_back(scales[i] * ratio_sqrts);
}
int feature_map_height = 0;
int feature_map_width = 0;
const int stride = anchor_params.strides[layer_id];
feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride);
feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride);
for (int y = 0; y < feature_map_height; ++y) {
for (int x = 0; x < feature_map_width; ++x) {
for (int anchor_id = 0; anchor_id < (int)anchor_height.size();
++anchor_id) {
const float x_center =
(x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width;
const float y_center =
(y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height;
Anchor new_anchor;
new_anchor.x_center = x_center;
new_anchor.y_center = y_center;
new_anchor.w = 1.0f;
new_anchor.h = 1.0f;
anchors.push_back(new_anchor);
}
}
}
layer_id = last_same_stride_layer;
}
}
static void create_ssd_anchors(int input_w, int input_h,
std::vector<Anchor> &anchors) {
AnchorsParams anchor_options;
anchor_options.num_layers = 4;
anchor_options.min_scale = 0.1484375;
anchor_options.max_scale = 0.75;
anchor_options.input_size_height = 192;
anchor_options.input_size_width = 192;
anchor_options.anchor_offset_x = 0.5f;
anchor_options.anchor_offset_y = 0.5f;
anchor_options.strides.push_back(8);
anchor_options.strides.push_back(16);
anchor_options.strides.push_back(16);
anchor_options.strides.push_back(16);
anchor_options.aspect_ratios.push_back(1.0);
generate_anchors(anchors, anchor_options);
}
static int decode_bounds(std::list<DetectRegion> &region_list,
float score_thresh, int input_img_w, int input_img_h,
float *scores_ptr, float *bboxes_ptr,
std::vector<Anchor> &anchors) {
DetectRegion region;
int i = 0;
for (auto &anchor : anchors) {
float score = ov::sigmoid(scores_ptr[i]);
if (score > score_thresh) {
float *p = bboxes_ptr + (i * 18);
float cx = p[0] / input_img_w + anchor.x_center;
float cy = p[1] / input_img_h + anchor.y_center;
float w = p[2] / input_img_w;
float h = p[3] / input_img_h;
ov::Point2f topleft, btmright;
topleft.x = cx - w * 0.5f;
topleft.y = cy - h * 0.5f;
btmright.x = cx + w * 0.5f;
btmright.y = cy + h * 0.5f;
region.score = score;
region.topleft = topleft;
region.btmright = btmright;
for (int j = 0; j < 7; j++) {
float lx = p[4 + (2 * j) + 0];
float ly = p[4 + (2 * j) + 1];
lx += anchor.x_center * input_img_w;
ly += anchor.y_center * input_img_h;
lx /= (float)input_img_w;
ly /= (float)input_img_h;
region.landmarks[j].x = lx;
region.landmarks[j].y = ly;
}
region_list.push_back(region);
}
i++;
}
return 0;
}
static float calc_intersection_over_union(DetectRegion &region0,
DetectRegion &region1) {
float sx0 = region0.topleft.x;
float sy0 = region0.topleft.y;
float ex0 = region0.btmright.x;
float ey0 = region0.btmright.y;
float sx1 = region1.topleft.x;
float sy1 = region1.topleft.y;
float ex1 = region1.btmright.x;
float ey1 = region1.btmright.y;
float xmin0 = std::min(sx0, ex0);
float ymin0 = std::min(sy0, ey0);
float xmax0 = std::max(sx0, ex0);
float ymax0 = std::max(sy0, ey0);
float xmin1 = std::min(sx1, ex1);
float ymin1 = std::min(sy1, ey1);
float xmax1 = std::max(sx1, ex1);
float ymax1 = std::max(sy1, ey1);
float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
if (area0 <= 0 || area1 <= 0)
return 0.0f;
float intersect_xmin = std::max(xmin0, xmin1);
float intersect_ymin = std::max(ymin0, ymin1);
float intersect_xmax = std::min(xmax0, xmax1);
float intersect_ymax = std::min(ymax0, ymax1);
float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
std::max(intersect_xmax - intersect_xmin, 0.0f);
return intersect_area / (area0 + area1 - intersect_area);
}
static int non_max_suppression(std::list<DetectRegion> &region_list,
std::list<DetectRegion> &region_nms_list,
float iou_thresh) {
region_list.sort([](DetectRegion &v1, DetectRegion &v2) {
return v1.score > v2.score ? true : false;
});
for (auto itr = region_list.begin(); itr != region_list.end(); itr++) {
DetectRegion region_candidate = *itr;
int ignore_candidate = false;
for (auto itr_nms = region_nms_list.rbegin();
itr_nms != region_nms_list.rend(); itr_nms++) {
DetectRegion region_nms = *itr_nms;
float iou = calc_intersection_over_union(region_candidate, region_nms);
if (iou >= iou_thresh) {
ignore_candidate = true;
break;
}
}
if (!ignore_candidate) {
region_nms_list.push_back(region_candidate);
if (region_nms_list.size() >= 5)
break;
}
}
return 0;
}
static float normalize_radians(float angle) {
return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
}
static void compute_rotation(DetectRegion &region) {
float x0 = region.landmarks[0].x;
float y0 = region.landmarks[0].y;
float x1 = region.landmarks[2].x;
float y1 = region.landmarks[2].y;
float target_angle = M_PI * 0.5f;
float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
region.rotation = normalize_radians(rotation);
}
void rot_vec(ov::Point2f &vec, float rotation) {
float sx = vec.x;
float sy = vec.y;
vec.x = sx * cos(rotation) - sy * sin(rotation);
vec.y = sx * sin(rotation) + sy * cos(rotation);
}
void compute_detect_to_roi(DetectRegion &region, const int &target_size,
PalmObject &palm) {
float width = region.btmright.x - region.topleft.x;
float height = region.btmright.y - region.topleft.y;
float palm_cx = region.topleft.x + width * 0.5f;
float palm_cy = region.topleft.y + height * 0.5f;
float hand_cx;
float hand_cy;
float rotation = region.rotation;
float shift_x = 0.0f;
float shift_y = -0.5f;
if (rotation == 0.0f) {
hand_cx = palm_cx + (width * shift_x);
hand_cy = palm_cy + (height * shift_y);
} else {
float dx =
(width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
float dy =
(width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
hand_cx = palm_cx + dx;
hand_cy = palm_cy + dy;
}
float long_side = std::max(width, height);
width = long_side;
height = long_side;
float hand_w = width * 2.6f;
float hand_h = height * 2.6f;
palm.hand_cx = hand_cx;
palm.hand_cy = hand_cy;
palm.hand_w = hand_w;
palm.hand_h = hand_h;
float dx = hand_w * 0.5f;
float dy = hand_h * 0.5f;
palm.hand_pos[0].x = -dx;
palm.hand_pos[0].y = -dy;
palm.hand_pos[1].x = +dx;
palm.hand_pos[1].y = -dy;
palm.hand_pos[2].x = +dx;
palm.hand_pos[2].y = +dy;
palm.hand_pos[3].x = -dx;
palm.hand_pos[3].y = +dy;
for (int i = 0; i < 4; i++) {
rot_vec(palm.hand_pos[i], rotation);
palm.hand_pos[i].x += hand_cx;
palm.hand_pos[i].y += hand_cy;
}
for (int i = 0; i < 7; i++) {
palm.landmarks[i] = region.landmarks[i];
}
palm.score = region.score;
}
static void pack_detect_result(std::vector<DetectRegion> &detect_results,
std::list<DetectRegion> &region_list,
const int &target_size,
std::vector<PalmObject> &palmlist) {
for (auto &region : region_list) {
compute_rotation(region);
PalmObject palm;
compute_detect_to_roi(region, target_size, palm);
palmlist.push_back(palm);
detect_results.push_back(region);
}
}
MediapipeHand::MediapipeHand() : ov::EstimatorBase() {
palm_blob_allocator_.set_size_compare_ratio(0.f);
palm_workspace_allocator_.set_size_compare_ratio(0.f);
hand_blob_allocator_.set_size_compare_ratio(0.f);
hand_workspace_allocator_.set_size_compare_ratio(0.f);
palm_net_ = new ncnn::Net();
hand_net_ = new ncnn::Net();
initialized_ = false;
if (num_threads > 0) {
palm_net_->opt.num_threads = num_threads;
hand_net_->opt.num_threads = num_threads;
}
palm_net_->opt.blob_allocator = &palm_blob_allocator_;
palm_net_->opt.workspace_allocator = &palm_workspace_allocator_;
palm_net_->opt.lightmode = light_mode_;
hand_net_->opt.blob_allocator = &hand_blob_allocator_;
hand_net_->opt.workspace_allocator = &hand_workspace_allocator_;
hand_net_->opt.lightmode = light_mode_;
#ifdef OV_VULKAN
palm_net_->opt.use_vulkan_compute = true;
hand_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
MediapipeHand::~MediapipeHand() {
if (palm_net_) {
palm_net_->clear();
}
if (hand_net_) {
hand_net_->clear();
}
palm_workspace_allocator_.clear();
palm_blob_allocator_.clear();
hand_workspace_allocator_.clear();
hand_blob_allocator_.clear();
}
void MediapipeHand::set_num_threads(int n) {
EstimatorBase::set_num_threads(n);
if (palm_net_) {
palm_net_->opt.num_threads = n;
}
if (hand_net_) {
hand_net_->opt.num_threads = n;
}
}
void MediapipeHand::set_light_mode(bool mode) {
if (palm_net_) {
palm_net_->opt.lightmode = mode;
}
if (hand_net_) {
hand_net_->opt.lightmode = mode;
}
light_mode_ = mode;
}
int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) {
std::string palm_param_file = std::string(palm_path) + "/param";
std::string palm_bin_file = std::string(palm_path) + "/bin";
std::string hand_param_file = std::string(hand_path) + "/param";
std::string hand_bin_file = std::string(hand_path) + "/bin";
if (palm_net_->load_param(palm_param_file.c_str()) == -1 ||
palm_net_->load_model(palm_bin_file.c_str()) == -1) {
return 10000;
}
if (hand_net_->load_param(hand_param_file.c_str()) == -1 ||
hand_net_->load_model(hand_bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
anchors.clear();
create_ssd_anchors(target_size, target_size, anchors);
return 0;
}
int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width,
int img_height, std::vector<PalmObject> &objects) {
if (!initialized_) {
return 10000;
}
if (rgbdata == 0) {
return 10001;
}
int w = img_width;
int h = img_height;
float scale = 1.f;
if (w > h) {
scale = (float)target_size / w;
w = target_size;
h = h * scale;
} else {
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
img_width, img_height, w, h);
int wpad = target_size - w;
int hpad = target_size - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = palm_net_->create_extractor();
ncnn::Mat cls, reg;
ex.input("input", in_pad);
ex.extract("cls", cls);
ex.extract("reg", reg);
float *scores = (float *)cls.data;
float *bboxes = (float *)reg.data;
std::list<DetectRegion> region_list, region_nms_list;
std::vector<DetectRegion> detect_results;
decode_bounds(region_list, prob_threshold, target_size, target_size, scores,
bboxes, anchors);
non_max_suppression(region_list, region_nms_list, nms_threshold);
objects.clear();
pack_detect_result(detect_results, region_nms_list, target_size, objects);
for (int i = 0; i < objects.size(); i++) {
objects[i].hand_pos[0].x =
(objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale;
objects[i].hand_pos[0].y =
(objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale;
objects[i].hand_pos[1].x =
(objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale;
objects[i].hand_pos[1].y =
(objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale;
objects[i].hand_pos[2].x =
(objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale;
objects[i].hand_pos[2].y =
(objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale;
objects[i].hand_pos[3].x =
(objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale;
objects[i].hand_pos[3].y =
(objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale;
for (int j = 0; j < 7; j++) {
objects[i].landmarks[j].x =
(objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale;
objects[i].landmarks[j].y =
(objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale;
}
const float srcPts[8] = {
objects[i].hand_pos[0].x, objects[i].hand_pos[0].y,
objects[i].hand_pos[1].x, objects[i].hand_pos[1].y,
objects[i].hand_pos[2].x, objects[i].hand_pos[2].y,
objects[i].hand_pos[3].x, objects[i].hand_pos[3].y,
};
const float dstPts[8] = {
0, 0, 224, 0, 224, 224, 0, 224,
};
float tm[6];
unsigned char *trans_mat =
(unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char));
ncnn::get_affine_transform(dstPts, srcPts, 4, tm);
ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224,
224, tm);
ncnn::Mat trans_image =
ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224);
float score = GetLandmarks(trans_image, tm, objects[i].skeleton,
objects[i].skeleton3d);
free(trans_mat);
}
return 0;
}
float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6],
std::vector<ov::Point2f> &skeleton,
std::vector<ov::Point3d> &skeleton3d) {
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
in.substract_mean_normalize(NULL, norm_vals);
ncnn::Mat points, score;
{
ncnn::Extractor ex = hand_net_->create_extractor();
ex.input("input", in);
ex.extract("points", points);
ex.extract("score", score);
}
float *points_data = (float *)points.data;
float *score_data = (float *)score.data;
for (int i = 0; i < 21; i++) {
ov::Point3d pt3d;
pt3d.x = points_data[i * 3];
pt3d.y = points_data[i * 3 + 1];
pt3d.z = points_data[i * 3 + 2];
ov::Point2f pt;
pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2];
pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5];
skeleton.push_back(pt);
pt3d.x /= 224.f;
pt3d.y /= 224.f;
skeleton3d.push_back(pt3d);
}
return score_data[0];
}
} // namespace ovhand3d

View File

@@ -0,0 +1,87 @@
#ifndef _HAND_POSE3D_MEDIAPIPE_H_
#define _HAND_POSE3D_MEDIAPIPE_H_
#include "../../../common/common.hpp"
#include <net.h>
namespace ovhand3d {
struct PalmObject {
float score;
ov::Point2f landmarks[7];
float rotation;
float hand_cx;
float hand_cy;
float hand_w;
float hand_h;
ov::Point2f hand_pos[4];
std::vector<ov::Point2f> skeleton;
std::vector<ov::Point3d> skeleton3d;
};
struct DetectRegion {
float score;
ov::Point2f topleft;
ov::Point2f btmright;
ov::Point2f landmarks[7];
float rotation;
ov::Point2f roi_center;
ov::Point2f roi_size;
ov::Point2f roi_coord[4];
};
struct Anchor {
float x_center, y_center, w, h;
};
struct AnchorsParams {
int input_size_width;
int input_size_height;
float min_scale;
float max_scale;
float anchor_offset_x;
float anchor_offset_y;
int num_layers;
std::vector<int> feature_map_width;
std::vector<int> feature_map_height;
std::vector<int> strides;
std::vector<float> aspect_ratios;
};
class MediapipeHand : public ov::EstimatorBase {
public:
MediapipeHand();
~MediapipeHand();
int LoadModel(const char *palm_model, const char *hand_model);
int Detect(const unsigned char *rgbdata, int img_width, int img_heidht,
std::vector<PalmObject> &objects);
float GetLandmarks(ncnn::Mat in, float tm[6],
std::vector<ov::Point2f> &skeleton,
std::vector<ov::Point3d> &skeleton3d);
void set_light_mode(bool mode);
void set_num_threads(int n);
private:
ncnn::Net *palm_net_ = NULL;
ncnn::Net *hand_net_ = NULL;
ncnn::PoolAllocator palm_workspace_allocator_;
ncnn::UnlockedPoolAllocator palm_blob_allocator_;
ncnn::PoolAllocator hand_workspace_allocator_;
ncnn::UnlockedPoolAllocator hand_blob_allocator_;
bool initialized_ = false;
bool light_mode_ = true;
std::vector<Anchor> anchors;
float prob_threshold = 0.55f;
float nms_threshold = 0.3f;
const int target_size = 192;
const float mean_vals[3] = {0.f, 0.f, 0.f};
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
};
} // namespace ovhand3d
#endif // !_HAND_POSE3D_MEDIAPIPE_H_

View File

@@ -0,0 +1,161 @@
#include "pptinypose.hpp"
#include <string>
#ifdef OV_VULKAN
#include "gpu.h"
#endif // OV_VULKAN
namespace ovpose {
static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob,
std::vector<float> &prob) {
int size = bottom_blob.total();
const float *ptr = bottom_blob;
std::vector<std::pair<float, int>> vec;
vec.resize(size);
for (int i = 0; i < size; i++) {
vec[i] = std::make_pair(ptr[i], i);
}
top_blob.create(bottom_blob.c, 1, 1, 4u);
float *outptr = top_blob;
for (size_t i = 0; i < bottom_blob.c; i++) {
int size0 = bottom_blob.channel(i).total();
std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1),
vec.begin() + size0 * (i + 1),
std::greater<std::pair<float, int>>());
outptr[i] = vec[size0 * i].second - size0 * i;
prob.push_back(vec[size0 * i].first);
}
return 0;
}
static void dark_parse(const ncnn::Mat &heatmap, std::vector<int> &dim,
std::vector<float> &coords, int px, int py, int ch) {
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
1) offset = - hassian.inv() * derivative
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
3) dxx = (dx[x+1] - dx[x-1])/2.
4) derivative = Mat([dx, dy])
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
*/
float *heatmap_data = (float *)heatmap.channel(ch).data;
std::vector<float> heatmap_ch;
heatmap_ch.insert(heatmap_ch.begin(), heatmap_data,
heatmap_data + heatmap.channel(ch).total());
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
heatmap_mat = heatmap_mat.reshape(1, 1);
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]);
heatmap_mat = heatmap_mat.reshape(1);
heatmap_ch = (float *)heatmap_mat.data;
float epsilon = 1e-10;
// sample heatmap to get values in around target location
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
// compute dx/dy and dxx/dyy with sampled values
float dx = 0.5 * (xr - xl);
float dy = 0.5 * (yu - yd);
float dxx = 0.25 * (xr2 - 2 * xy + xl2);
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
float dyy = 0.25 * (yu2 - 2 * xy + yd2);
// finally get offset by derivative and hassian, which combined by dx/dy and
// dxx/dyy
if (dxx * dyy - dxy * dxy != 0) {
float M[2][2] = {dxx, dxy, dxy, dyy};
float D[2] = {dx, dy};
cv::Mat hassian(2, 2, CV_32F, M);
cv::Mat derivative(2, 1, CV_32F, D);
cv::Mat offset = -hassian.inv() * derivative;
coords[ch * 2] += offset.at<float>(0, 0);
coords[ch * 2 + 1] += offset.at<float>(1, 0);
}
}
static std::vector<float> get_final_preds(const ncnn::Mat &heatmap,
const ncnn::Mat &argmax_out) {
std::vector<float> coords((size_t)heatmap.c * 2);
for (int i = 0; i < heatmap.c; i++) {
int idx = argmax_out[i];
coords[i * 2] = idx % heatmap.w;
coords[i * 2 + 1] = (float)idx / heatmap.w;
int px = int(coords[i * 2] + 0.5);
int py = int(coords[i * 2 + 1] + 0.5);
std::vector<int> dim({1, heatmap.c, heatmap.h, heatmap.w});
dark_parse(heatmap, dim, coords, px, py, i);
}
return coords;
}
PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() {
if (target_size == 128) {
target_width_ = 96;
target_height_ = 128;
} else {
target_width_ = 196;
target_height_ = 256;
}
}
int PPTinyPoseEstimator::ExtractKeypoints(
const unsigned char *rgbdata, int img_width, int img_height,
const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
if (!initialized_) {
return 10000;
}
if (rgbdata == 0) {
return 10001;
}
keypoints->clear();
ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
rect.width, rect.height, target_width_, target_height_);
in.substract_mean_normalize(meanVals, normVals);
ncnn::Extractor ex = net_->create_extractor();
ex.set_light_mode(light_mode_);
ex.set_num_threads(num_threads);
ex.input("image", in);
ncnn::Mat out;
ex.extract("save_infer_model/scale_0.tmp_1", out);
ncnn::Mat argmax_out;
std::vector<float> probs;
argmax(out, argmax_out, probs);
std::vector<float> coords = get_final_preds(out, argmax_out);
for (int i = 0; i < coords.size() / 2; i++) {
ov::KeyPoint keypoint;
keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x,
coords[i * 2 + 1] * rect.h / (float)out.h + rect.y);
keypoint.score = probs[i];
keypoints->push_back(keypoint);
}
return 0;
}
} // namespace ovpose

View File

@@ -0,0 +1,25 @@
#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_
#define _POSE_PPTINYPOSE_ESTIMATOR_H_
#include "../estimator.hpp"
#include "net.h"
#include <vector>
namespace ovpose {
class PPTinyPoseEstimator : public Estimator {
public:
PPTinyPoseEstimator(int target_size);
int ExtractKeypoints(const unsigned char *rgbdata, int img_width,
int img_height, const ov::Rect &rect,
std::vector<ov::Keypoint> *keypoints);
private:
int target_width_ = 96;
int target_height_ = 128;
const float meanVals[3] = {123.675f, 116.28f, 103.53f};
const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f};
};
} // namespace ovpose
#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_