mirror of
https://github.com/bububa/openvision.git
synced 2025-09-26 17:51:13 +08:00
feat(hand): add mediapipe hand 3d pose detecter
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -70,3 +70,5 @@ _testmain.go
|
||||
test
|
||||
.vim
|
||||
dist/
|
||||
|
||||
libtorch/
|
||||
|
@@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
|
||||
- nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
|
||||
- pose (for hand pose estimation)
|
||||
- handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
|
||||
- pose3d (for 3d handpose detection)
|
||||
- mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing)
|
||||
- styletransfer
|
||||
- animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
|
||||
- tracker
|
||||
|
@@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector {
|
||||
|
||||
// GoPoint2fVector convert C.Point2fVector to []Point
|
||||
func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
|
||||
if cVector == nil {
|
||||
return nil
|
||||
}
|
||||
l := int(cVector.length)
|
||||
ret := make([]Point, 0, l)
|
||||
ptr := unsafe.Pointer(cVector.points)
|
||||
@@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) {
|
||||
C.FreePoint2fVector(c)
|
||||
C.free(unsafe.Pointer(c))
|
||||
}
|
||||
|
||||
// Point3d represents a 3dPoint
|
||||
type Point3d struct {
|
||||
X float64
|
||||
Y float64
|
||||
Z float64
|
||||
}
|
||||
|
||||
// Pt3d returns a New Point3d
|
||||
func Pt3d(x, y, z float64) Point3d {
|
||||
return Point3d{x, y, z}
|
||||
}
|
||||
|
||||
var ZP3d = Point3d{}
|
||||
|
||||
// GoPoint3d conver C.Point3d to Point3d
|
||||
func GoPoint3d(c *C.Point3d) Point3d {
|
||||
return Pt3d(
|
||||
float64(c.x),
|
||||
float64(c.y),
|
||||
float64(c.z),
|
||||
)
|
||||
}
|
||||
|
||||
// NewCPoint3dVector retruns C.Point3dVector pointer
|
||||
func NewCPoint3dVector() *C.Point3dVector {
|
||||
return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d))
|
||||
}
|
||||
|
||||
// GoPoint3dVector convert C.Point3dVector to []Point3d
|
||||
func GoPoint3dVector(cVector *C.Point3dVector) []Point3d {
|
||||
if cVector == nil {
|
||||
return nil
|
||||
}
|
||||
l := int(cVector.length)
|
||||
ret := make([]Point3d, 0, l)
|
||||
ptr := unsafe.Pointer(cVector.points)
|
||||
for i := 0; i < l; i++ {
|
||||
cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i))))
|
||||
ret = append(ret, GoPoint3d(cPoint3d))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// FreeCPoint3dVector release C.Point3dVector memory
|
||||
func FreeCPoint3dVector(c *C.Point3dVector) {
|
||||
C.FreePoint3dVector(c)
|
||||
C.free(unsafe.Pointer(c))
|
||||
}
|
||||
|
@@ -20,6 +20,8 @@ type ObjectInfo struct {
|
||||
Rect Rectangle
|
||||
// Points keypoints
|
||||
Keypoints []Keypoint
|
||||
// Name
|
||||
Name string
|
||||
}
|
||||
|
||||
// GoObjectInfo convert C.ObjectInfo to go type
|
||||
|
56
go/common/palmobject.go
Normal file
56
go/common/palmobject.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package common
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "openvision/common/common.h"
|
||||
#include "openvision/hand/pose3d.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// PalmObject
|
||||
type PalmObject struct {
|
||||
Score float64
|
||||
Rotation float64
|
||||
Rect []Point
|
||||
Landmarks []Point
|
||||
Skeleton []Point
|
||||
Skeleton3d []Point3d
|
||||
}
|
||||
|
||||
// NewCPalmObjectVector returns *C.PalmObjectVector
|
||||
func NewCPalmObjectVector() *C.PalmObjectVector {
|
||||
return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector))
|
||||
}
|
||||
|
||||
// FreeCPalmObjectVector release *C.PalmObjectVector memory
|
||||
func FreeCPalmObjectVector(p *C.PalmObjectVector) {
|
||||
C.FreePalmObjectVector(p)
|
||||
C.free(unsafe.Pointer(p))
|
||||
}
|
||||
|
||||
// GoPalmObject convert C.PalmObject to Go type
|
||||
func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject {
|
||||
return PalmObject{
|
||||
Score: float64(cObj.score),
|
||||
Rotation: float64(cObj.rotation),
|
||||
Rect: GoPoint2fVector(cObj.rect, w, h),
|
||||
Landmarks: GoPoint2fVector(cObj.landmarks, w, h),
|
||||
Skeleton: GoPoint2fVector(cObj.skeleton, w, h),
|
||||
Skeleton3d: GoPoint3dVector(cObj.skeleton3d),
|
||||
}
|
||||
}
|
||||
|
||||
func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject {
|
||||
l := int(c.length)
|
||||
ret := make([]PalmObject, 0, l)
|
||||
ptr := unsafe.Pointer(c.items)
|
||||
for i := 0; i < l; i++ {
|
||||
cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i))))
|
||||
ret = append(ret, GoPalmObject(cObj, w, h))
|
||||
}
|
||||
return ret
|
||||
}
|
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/bububa/openvision/go/hand/detecter"
|
||||
handdrawer "github.com/bububa/openvision/go/hand/drawer"
|
||||
"github.com/bububa/openvision/go/hand/pose"
|
||||
"github.com/bububa/openvision/go/hand/pose3d"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -27,17 +28,19 @@ func main() {
|
||||
cpuCores := common.GetBigCPUCount()
|
||||
common.SetOMPThreads(cpuCores)
|
||||
log.Printf("CPU big cores:%d\n", cpuCores)
|
||||
estimator := handpose(modelPath)
|
||||
defer estimator.Destroy()
|
||||
common.SetEstimatorThreads(estimator, cpuCores)
|
||||
for idx, d := range []detecter.Detecter{
|
||||
yolox(modelPath),
|
||||
nanodet(modelPath),
|
||||
} {
|
||||
defer d.Destroy()
|
||||
common.SetEstimatorThreads(d, cpuCores)
|
||||
detect(d, estimator, imgPath, "hand1.jpg", idx)
|
||||
}
|
||||
// estimator := handpose(modelPath)
|
||||
// defer estimator.Destroy()
|
||||
// common.SetEstimatorThreads(estimator, cpuCores)
|
||||
// for idx, d := range []detecter.Detecter{
|
||||
// yolox(modelPath),
|
||||
// nanodet(modelPath),
|
||||
// } {
|
||||
// defer d.Destroy()
|
||||
// common.SetEstimatorThreads(d, cpuCores)
|
||||
// detect(d, estimator, imgPath, "hand2.jpg", idx)
|
||||
// }
|
||||
d3d := mediapipe(modelPath)
|
||||
detect3d(d3d, imgPath, "hand1.jpg")
|
||||
}
|
||||
|
||||
func yolox(modelPath string) detecter.Detecter {
|
||||
@@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator {
|
||||
return d
|
||||
}
|
||||
|
||||
func mediapipe(modelPath string) *pose3d.Mediapipe {
|
||||
palmPath := filepath.Join(modelPath, "mediapipe/palm/full")
|
||||
handPath := filepath.Join(modelPath, "mediapipe/hand/full")
|
||||
d := pose3d.NewMediapipe()
|
||||
if err := d.LoadModel(palmPath, handPath); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
|
||||
inPath := filepath.Join(imgPath, filename)
|
||||
imgSrc, err := loadImage(inPath)
|
||||
@@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
|
||||
if err := saveImage(out, outPath); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
|
||||
func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) {
|
||||
inPath := filepath.Join(imgPath, filename)
|
||||
imgSrc, err := loadImage(inPath)
|
||||
if err != nil {
|
||||
log.Fatalln("load image failed,", err)
|
||||
}
|
||||
img := common.NewImage(imgSrc)
|
||||
rois, err := d.Detect(img)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("%+v\n", rois)
|
||||
drawer := handdrawer.New()
|
||||
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename))
|
||||
out := drawer.DrawPalm(img, rois)
|
||||
|
||||
if err := saveImage(out, outPath); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
for idx, roi := range rois {
|
||||
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename))
|
||||
out := drawer.DrawPalm3D(roi, 400, "#442519")
|
||||
|
||||
if err := saveImage(out, outPath); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
// +build !vulkan
|
||||
|
||||
package eye
|
||||
package tracker
|
||||
|
||||
/*
|
||||
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||
|
@@ -7,71 +7,16 @@ import (
|
||||
const (
|
||||
// DefaultBorderColor default drawer border color
|
||||
DefaultBorderColor = common.Green
|
||||
// DefaultKeypointColor default drawer keypoint color
|
||||
DefaultKeypointColor = common.Pink
|
||||
// DefaultBorderStrokeWidth default drawer border stroke width
|
||||
DefaultBorderStrokeWidth = 3
|
||||
// DefaultKeypointRadius default drawer keypoint radius
|
||||
DefaultKeypointRadius = 3
|
||||
// DefaultKeypointStrokeWidth default drawer keypoint stroke width
|
||||
DefaultKeypointStrokeWidth = 1
|
||||
)
|
||||
|
||||
// CocoPart coco part define
|
||||
type CocoPart = int
|
||||
|
||||
const (
|
||||
// CocoPartNose nose
|
||||
CocoPartNose CocoPart = iota
|
||||
// CocoPartLEye left eye
|
||||
CocoPartLEye
|
||||
// CocoPartREye right eye
|
||||
CocoPartREye
|
||||
// CocoPartLEar left ear
|
||||
CocoPartLEar
|
||||
// CocoPartREar right ear
|
||||
CocoPartREar
|
||||
// CocoPartLShoulder left sholder
|
||||
CocoPartLShoulder
|
||||
// CocoPartRShoulder right sholder
|
||||
CocoPartRShoulder
|
||||
// CocoPartLElbow left elbow
|
||||
CocoPartLElbow
|
||||
// CocoPartRElbow right elbow
|
||||
CocoPartRElbow
|
||||
// CocoPartLWrist left wrist
|
||||
CocoPartLWrist
|
||||
// CocoPartRWrist right wrist
|
||||
CocoPartRWrist
|
||||
// CocoPartLHip left hip
|
||||
CocoPartLHip
|
||||
// CocoPartRHip right hip
|
||||
CocoPartRHip
|
||||
// CocoPartLKnee left knee
|
||||
CocoPartLKnee
|
||||
// CocoPartRKnee right knee
|
||||
CocoPartRKnee
|
||||
// CocoPartRAnkle right ankle
|
||||
CocoPartRAnkle
|
||||
// CocoPartLAnkle left ankle
|
||||
CocoPartLAnkle
|
||||
// CocoPartNeck neck
|
||||
CocoPartNeck
|
||||
// CocoPartBackground background
|
||||
CocoPartBackground
|
||||
)
|
||||
|
||||
var (
|
||||
// CocoPair represents joints pair
|
||||
CocoPair = [16][2]CocoPart{
|
||||
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16},
|
||||
}
|
||||
// CocoColors represents color for coco parts
|
||||
CocoColors = [17]string{
|
||||
"#ff0000", "#ff5500", "#ffaa00", "#ffff00",
|
||||
"#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa",
|
||||
"#00ffff", "#00aaff", "#0055ff",
|
||||
"#0000ff", "#aa00ff", "#ff00ff",
|
||||
"#ff00aa", "#ff0055",
|
||||
}
|
||||
// DefaultLabelColor default label color
|
||||
DefaultLabelColor = common.White
|
||||
)
|
||||
|
||||
var (
|
||||
|
@@ -2,8 +2,10 @@ package drawer
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
|
||||
"github.com/llgcode/draw2d/draw2dimg"
|
||||
"github.com/llgcode/draw2d/draw2dkit"
|
||||
|
||||
"github.com/bububa/openvision/go/common"
|
||||
)
|
||||
@@ -18,6 +20,12 @@ type Drawer struct {
|
||||
KeypointStrokeWidth float64
|
||||
// KeypointRadius represents keypoints circle radius
|
||||
KeypointRadius float64
|
||||
// KeypointColor represents keypoint color
|
||||
KeypointColor string
|
||||
// LabelColor string
|
||||
LabelColor string
|
||||
// Font
|
||||
Font *common.Font
|
||||
}
|
||||
|
||||
// New returns a new Drawer
|
||||
@@ -27,6 +35,8 @@ func New(options ...Option) *Drawer {
|
||||
BorderStrokeWidth: DefaultBorderStrokeWidth,
|
||||
KeypointStrokeWidth: DefaultKeypointStrokeWidth,
|
||||
KeypointRadius: DefaultKeypointRadius,
|
||||
KeypointColor: DefaultKeypointColor,
|
||||
LabelColor: DefaultLabelColor,
|
||||
}
|
||||
for _, opt := range options {
|
||||
opt.apply(d)
|
||||
@@ -42,15 +52,15 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
|
||||
gc := draw2dimg.NewGraphicContext(out)
|
||||
gc.DrawImage(img)
|
||||
for _, roi := range rois {
|
||||
rect := common.Rect(
|
||||
roi.Rect.X*imgW,
|
||||
roi.Rect.Y*imgH,
|
||||
roi.Rect.Width*imgW,
|
||||
roi.Rect.Height*imgH,
|
||||
)
|
||||
borderColor := d.BorderColor
|
||||
if drawBorder {
|
||||
// draw rect
|
||||
rect := common.Rect(
|
||||
roi.Rect.X*imgW,
|
||||
roi.Rect.Y*imgH,
|
||||
roi.Rect.Width*imgW,
|
||||
roi.Rect.Height*imgH,
|
||||
)
|
||||
borderColor := d.BorderColor
|
||||
common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
|
||||
}
|
||||
l := len(roi.Keypoints)
|
||||
@@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
|
||||
poseColor := PoseColors[colorIdx]
|
||||
common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
|
||||
}
|
||||
// draw name
|
||||
if roi.Name != "" {
|
||||
common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// DrawPalm draw PalmObject
|
||||
func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image {
|
||||
imgW := float64(img.Bounds().Dx())
|
||||
imgH := float64(img.Bounds().Dy())
|
||||
out := image.NewRGBA(img.Bounds())
|
||||
gc := draw2dimg.NewGraphicContext(out)
|
||||
gc.DrawImage(img)
|
||||
for _, roi := range rois {
|
||||
gc.SetLineWidth(d.BorderStrokeWidth)
|
||||
gc.SetStrokeColor(common.ColorFromHex(d.BorderColor))
|
||||
gc.BeginPath()
|
||||
for idx, pt := range roi.Rect {
|
||||
gc.MoveTo(pt.X*imgW, pt.Y*imgH)
|
||||
if idx == len(roi.Rect)-1 {
|
||||
gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH)
|
||||
} else {
|
||||
gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH)
|
||||
}
|
||||
}
|
||||
gc.Close()
|
||||
gc.Stroke()
|
||||
|
||||
l := len(roi.Skeleton)
|
||||
if l == 0 {
|
||||
continue
|
||||
}
|
||||
// draw skeleton
|
||||
for idx := range roi.Skeleton[:l-1] {
|
||||
var (
|
||||
p0 common.Point
|
||||
p1 common.Point
|
||||
poseColor = PoseColors[idx/4]
|
||||
)
|
||||
gc.SetStrokeColor(common.ColorFromHex(poseColor))
|
||||
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
|
||||
p0 = roi.Skeleton[0]
|
||||
p1 = roi.Skeleton[idx]
|
||||
gc.BeginPath()
|
||||
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
|
||||
gc.LineTo(p1.X*imgW, p1.Y*imgH)
|
||||
gc.Close()
|
||||
gc.Stroke()
|
||||
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
|
||||
continue
|
||||
}
|
||||
p0 = roi.Skeleton[idx]
|
||||
p1 = roi.Skeleton[idx+1]
|
||||
gc.BeginPath()
|
||||
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
|
||||
gc.LineTo(p1.X*imgW, p1.Y*imgH)
|
||||
gc.Close()
|
||||
gc.Stroke()
|
||||
}
|
||||
for _, pt := range roi.Landmarks {
|
||||
common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// DrawPalm3D draw 3d PalmObject
|
||||
func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image {
|
||||
out := image.NewRGBA(image.Rect(0, 0, int(size), int(size)))
|
||||
gc := draw2dimg.NewGraphicContext(out)
|
||||
l := len(roi.Skeleton3d)
|
||||
if l == 0 {
|
||||
return out
|
||||
}
|
||||
if bg != "" {
|
||||
bgColor := common.ColorFromHex(bg)
|
||||
gc.SetFillColor(bgColor)
|
||||
draw2dkit.Rectangle(gc, 0, 0, size, size)
|
||||
gc.Fill()
|
||||
gc.SetFillColor(color.Transparent)
|
||||
}
|
||||
// draw skeleton3d
|
||||
for idx := range roi.Skeleton3d[:l-1] {
|
||||
var (
|
||||
p0 common.Point3d
|
||||
p1 common.Point3d
|
||||
poseColor = PoseColors[idx/4]
|
||||
)
|
||||
gc.SetStrokeColor(common.ColorFromHex(poseColor))
|
||||
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
|
||||
p0 = roi.Skeleton3d[0]
|
||||
p1 = roi.Skeleton3d[idx]
|
||||
gc.BeginPath()
|
||||
gc.MoveTo(p0.X*size, p0.Y*size)
|
||||
gc.LineTo(p1.X*size, p1.Y*size)
|
||||
gc.Close()
|
||||
gc.Stroke()
|
||||
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
|
||||
continue
|
||||
}
|
||||
p0 = roi.Skeleton3d[idx]
|
||||
p1 = roi.Skeleton3d[idx+1]
|
||||
gc.BeginPath()
|
||||
gc.MoveTo(p0.X*size, p0.Y*size)
|
||||
gc.LineTo(p1.X*size, p1.Y*size)
|
||||
gc.Close()
|
||||
gc.Stroke()
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
@@ -1,5 +1,9 @@
|
||||
package drawer
|
||||
|
||||
import (
|
||||
"github.com/bububa/openvision/go/common"
|
||||
)
|
||||
|
||||
// Option represents Drawer option interface
|
||||
type Option interface {
|
||||
apply(*Drawer)
|
||||
@@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option {
|
||||
d.KeypointStrokeWidth = w
|
||||
})
|
||||
}
|
||||
|
||||
// WithKeypointColor set Drawer KeypointColor
|
||||
func WithKeypointColor(color string) Option {
|
||||
return optionFunc(func(d *Drawer) {
|
||||
d.KeypointColor = color
|
||||
})
|
||||
}
|
||||
|
||||
// WithFont set Drawer Font
|
||||
func WithFont(font *common.Font) Option {
|
||||
return optionFunc(func(d *Drawer) {
|
||||
d.Font = font
|
||||
})
|
||||
}
|
||||
|
11
go/hand/pose3d/cgo.go
Normal file
11
go/hand/pose3d/cgo.go
Normal file
@@ -0,0 +1,11 @@
|
||||
// +build !vulkan
|
||||
|
||||
package pose3d
|
||||
|
||||
/*
|
||||
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
|
||||
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
|
||||
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
|
||||
*/
|
||||
import "C"
|
11
go/hand/pose3d/cgo_vulkan.go
Normal file
11
go/hand/pose3d/cgo_vulkan.go
Normal file
@@ -0,0 +1,11 @@
|
||||
// +build vulkan
|
||||
|
||||
package pose3d
|
||||
|
||||
/*
|
||||
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
|
||||
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
|
||||
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
|
||||
*/
|
||||
import "C"
|
2
go/hand/pose3d/doc.go
Normal file
2
go/hand/pose3d/doc.go
Normal file
@@ -0,0 +1,2 @@
|
||||
// Package pose hand 3d pose estimator
|
||||
package pose3d
|
62
go/hand/pose3d/mediapipe.go
Normal file
62
go/hand/pose3d/mediapipe.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package pose3d
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "openvision/common/common.h"
|
||||
#include "openvision/hand/pose3d.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
openvision "github.com/bububa/openvision/go"
|
||||
"github.com/bububa/openvision/go/common"
|
||||
)
|
||||
|
||||
// Mediapipe represents mediapipe estimator interface
|
||||
type Mediapipe struct {
|
||||
d C.IHandPose3DEstimator
|
||||
}
|
||||
|
||||
func NewMediapipe() *Mediapipe {
|
||||
return &Mediapipe{
|
||||
d: C.new_mediapipe_hand(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Mediapipe) Destroy() {
|
||||
C.destroy_mediapipe_hand(m.d)
|
||||
}
|
||||
|
||||
func (m *Mediapipe) LoadModel(palmPath string, handPath string) error {
|
||||
cPalm := C.CString(palmPath)
|
||||
defer C.free(unsafe.Pointer(cPalm))
|
||||
cHand := C.CString(handPath)
|
||||
defer C.free(unsafe.Pointer(cHand))
|
||||
retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand)
|
||||
if retCode != 0 {
|
||||
return openvision.LoadModelError(int(retCode))
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// Detect detect hand 3d pose
|
||||
func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) {
|
||||
imgWidth := img.WidthF64()
|
||||
imgHeight := img.HeightF64()
|
||||
data := img.Bytes()
|
||||
cObjs := common.NewCPalmObjectVector()
|
||||
defer common.FreeCPalmObjectVector(cObjs)
|
||||
errCode := C.mediapipe_hand_detect(
|
||||
m.d,
|
||||
(*C.uchar)(unsafe.Pointer(&data[0])),
|
||||
C.int(imgWidth), C.int(imgHeight),
|
||||
(*C.PalmObjectVector)(unsafe.Pointer(cObjs)),
|
||||
)
|
||||
if errCode != 0 {
|
||||
return nil, openvision.DetectHandError(int(errCode))
|
||||
}
|
||||
return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil
|
||||
}
|
@@ -71,6 +71,7 @@ target_include_directories(openvision
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d>
|
||||
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
|
||||
@@ -109,6 +110,7 @@ file(COPY
|
||||
file(COPY
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h
|
||||
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
|
||||
)
|
||||
|
||||
|
@@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) {
|
||||
}
|
||||
}
|
||||
|
||||
void FreePoint3dVector(Point3dVector *p) {
|
||||
if (p->points != NULL) {
|
||||
free(p->points);
|
||||
p->points = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
|
||||
if (p->points == NULL || i >= p->length) {
|
||||
return;
|
||||
|
@@ -11,123 +11,134 @@ typedef ov::Size Size;
|
||||
typedef ov::Size2f Size2f;
|
||||
typedef ov::Point Point;
|
||||
typedef ov::Point2f Point2f;
|
||||
typedef ov::Point3d Point3d;
|
||||
typedef ov::Rect Rect;
|
||||
typedef ov::Keypoint Keypoint;
|
||||
#else
|
||||
|
||||
// Wrapper for an individual cv::cvSize
|
||||
typedef struct Size {
|
||||
int width;
|
||||
int height;
|
||||
int width;
|
||||
int height;
|
||||
} Size;
|
||||
//
|
||||
// Wrapper for an individual cv::cvSize2f
|
||||
typedef struct Size2f {
|
||||
int width;
|
||||
int height;
|
||||
int width;
|
||||
int height;
|
||||
} Size2f;
|
||||
|
||||
// Wrapper for an individual cv::cvPoint
|
||||
typedef struct Point {
|
||||
int x;
|
||||
int y;
|
||||
int x;
|
||||
int y;
|
||||
} Point;
|
||||
|
||||
// Wrapper for an individual cv::Point2f
|
||||
typedef struct Point2f {
|
||||
float x;
|
||||
float y;
|
||||
float x;
|
||||
float y;
|
||||
} Point2f;
|
||||
|
||||
typedef struct Point3d {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
} Point3d;
|
||||
|
||||
// Wrapper for an individual cv::Rect
|
||||
typedef struct Rect {
|
||||
int x;
|
||||
int y;
|
||||
int width;
|
||||
int height;
|
||||
int x;
|
||||
int y;
|
||||
int width;
|
||||
int height;
|
||||
} Rect;
|
||||
|
||||
|
||||
typedef struct Keypoint {
|
||||
Point2f p;
|
||||
float score;
|
||||
int id;
|
||||
Point2f p;
|
||||
float score;
|
||||
int id;
|
||||
} Keypoint;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
typedef void* IEstimator;
|
||||
typedef void *IEstimator;
|
||||
|
||||
int get_gpu_count();
|
||||
int create_gpu_instance();
|
||||
void destroy_gpu_instance();
|
||||
int get_big_cpu_count();
|
||||
int get_big_cpu_count();
|
||||
void set_omp_num_threads(int n);
|
||||
int load_model(IEstimator e, const char* root_path);
|
||||
int load_model(IEstimator e, const char *root_path);
|
||||
void destroy_estimator(IEstimator e);
|
||||
void set_num_threads(IEstimator e, int n);
|
||||
void set_light_mode(IEstimator e, bool mode);
|
||||
|
||||
typedef struct Point2fVector {
|
||||
Point2f* points;
|
||||
int length;
|
||||
Point2f *points;
|
||||
int length;
|
||||
} Point2fVector;
|
||||
|
||||
void FreePoint2fVector(Point2fVector *p);
|
||||
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val);
|
||||
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val);
|
||||
|
||||
typedef struct Point3dVector {
|
||||
Point3d *points;
|
||||
int length;
|
||||
} Point3dVector;
|
||||
|
||||
void FreePoint3dVector(Point3dVector *p);
|
||||
|
||||
typedef struct RectVector {
|
||||
Rect* rects;
|
||||
int length;
|
||||
Rect *rects;
|
||||
int length;
|
||||
} RectVector;
|
||||
|
||||
void FreeRectVector(RectVector *p);
|
||||
|
||||
typedef struct FloatVector {
|
||||
float* values;
|
||||
int length;
|
||||
float *values;
|
||||
int length;
|
||||
} FloatVector;
|
||||
|
||||
void FreeFloatVector(FloatVector *p);
|
||||
|
||||
typedef struct Bytes {
|
||||
unsigned char* values;
|
||||
int length;
|
||||
unsigned char *values;
|
||||
int length;
|
||||
} Bytes;
|
||||
|
||||
void FreeBytes(Bytes *p);
|
||||
|
||||
typedef struct KeypointVector {
|
||||
Keypoint* points;
|
||||
int length;
|
||||
Keypoint *points;
|
||||
int length;
|
||||
} KeypointVector;
|
||||
|
||||
void FreeKeypointVector(KeypointVector *p);
|
||||
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
|
||||
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val);
|
||||
|
||||
typedef struct ImageC {
|
||||
unsigned char* data;
|
||||
int width;
|
||||
int height;
|
||||
int channels;
|
||||
unsigned char *data;
|
||||
int width;
|
||||
int height;
|
||||
int channels;
|
||||
} Image;
|
||||
|
||||
void FreeImage(Image* p);
|
||||
void FreeImage(Image *p);
|
||||
|
||||
typedef struct ObjectInfoC {
|
||||
Rect rect;
|
||||
float score;
|
||||
int label;
|
||||
KeypointVector* pts;
|
||||
Rect rect;
|
||||
float score;
|
||||
int label;
|
||||
KeypointVector *pts;
|
||||
} ObjectInfo;
|
||||
|
||||
void FreeObjectInfo(ObjectInfo *p);
|
||||
|
||||
typedef struct ObjectInfoVector {
|
||||
ObjectInfo* items;
|
||||
int length;
|
||||
ObjectInfo *items;
|
||||
int length;
|
||||
} ObjectInfoVector;
|
||||
|
||||
void FreeObjectInfoVector(ObjectInfoVector *p);
|
||||
|
@@ -76,6 +76,13 @@ struct Point2f {
|
||||
};
|
||||
};
|
||||
|
||||
struct Point3d {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {}
|
||||
};
|
||||
|
||||
// Wrapper for an individual cv::Rect
|
||||
struct Rect {
|
||||
int x;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include "nanodet.hpp"
|
||||
#include <string>
|
||||
#include <float.h>
|
||||
#include <string>
|
||||
|
||||
#ifdef OV_VULKAN
|
||||
#include "gpu.h"
|
||||
@@ -8,227 +8,219 @@
|
||||
|
||||
namespace ovhand {
|
||||
|
||||
static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
|
||||
{
|
||||
static void generate_nanodet_proposals(const ncnn::Mat &cls_pred,
|
||||
const ncnn::Mat &dis_pred, int stride,
|
||||
const ncnn::Mat &in_pad,
|
||||
float prob_threshold,
|
||||
std::vector<ov::ObjectInfo> &objects) {
|
||||
|
||||
const int num_grid = cls_pred.h;
|
||||
const int num_grid = cls_pred.h;
|
||||
|
||||
int num_grid_x;
|
||||
int num_grid_y;
|
||||
if (in_pad.w > in_pad.h)
|
||||
{
|
||||
num_grid_x = in_pad.w / stride;
|
||||
num_grid_y = num_grid / num_grid_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
num_grid_y = in_pad.h / stride;
|
||||
num_grid_x = num_grid / num_grid_y;
|
||||
}
|
||||
int num_grid_x;
|
||||
int num_grid_y;
|
||||
if (in_pad.w > in_pad.h) {
|
||||
num_grid_x = in_pad.w / stride;
|
||||
num_grid_y = num_grid / num_grid_x;
|
||||
} else {
|
||||
num_grid_y = in_pad.h / stride;
|
||||
num_grid_x = num_grid / num_grid_y;
|
||||
}
|
||||
|
||||
const int num_class = cls_pred.w;
|
||||
const int reg_max_1 = dis_pred.w / 4;
|
||||
//__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
|
||||
//__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
|
||||
for (int i = 0; i < num_grid_y; i++)
|
||||
{
|
||||
for (int j = 0; j < num_grid_x; j++)
|
||||
{
|
||||
const int idx = i * num_grid_x + j;
|
||||
const int num_class = cls_pred.w;
|
||||
const int reg_max_1 = dis_pred.w / 4;
|
||||
|
||||
const float* scores = cls_pred.row(idx);
|
||||
for (int i = 0; i < num_grid_y; i++) {
|
||||
for (int j = 0; j < num_grid_x; j++) {
|
||||
const int idx = i * num_grid_x + j;
|
||||
|
||||
// find label with max score
|
||||
int label = -1;
|
||||
float score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++)
|
||||
{
|
||||
if (scores[k] > score)
|
||||
{
|
||||
label = k;
|
||||
score = scores[k];
|
||||
}
|
||||
}
|
||||
const float *scores = cls_pred.row(idx);
|
||||
|
||||
if (score >= prob_threshold)
|
||||
{
|
||||
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
|
||||
{
|
||||
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
|
||||
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 1); // axis
|
||||
pd.set(1, 1);
|
||||
softmax->load_param(pd);
|
||||
|
||||
ncnn::Option opt;
|
||||
// opt.num_threads = 1;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
softmax->create_pipeline(opt);
|
||||
|
||||
softmax->forward_inplace(bbox_pred, opt);
|
||||
|
||||
softmax->destroy_pipeline(opt);
|
||||
|
||||
delete softmax;
|
||||
}
|
||||
|
||||
float pred_ltrb[4];
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
float dis = 0.f;
|
||||
const float* dis_after_sm = bbox_pred.row(k);
|
||||
for (int l = 0; l < reg_max_1; l++)
|
||||
{
|
||||
dis += l * dis_after_sm[l];
|
||||
}
|
||||
|
||||
pred_ltrb[k] = dis * stride;
|
||||
}
|
||||
|
||||
float pb_cx = (j + 0.5f) * stride;
|
||||
float pb_cy = (i + 0.5f) * stride;
|
||||
|
||||
float x0 = pb_cx - pred_ltrb[0];
|
||||
float y0 = pb_cy - pred_ltrb[1];
|
||||
float x1 = pb_cx + pred_ltrb[2];
|
||||
float y1 = pb_cy + pred_ltrb[3];
|
||||
|
||||
ov::ObjectInfo obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = label;
|
||||
obj.score= score;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
// find label with max score
|
||||
int label = -1;
|
||||
float score = -FLT_MAX;
|
||||
for (int k = 0; k < num_class; k++) {
|
||||
if (scores[k] > score) {
|
||||
label = k;
|
||||
score = scores[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Nanodet::Detect(const unsigned char* rgbdata,
|
||||
int img_width, int img_height,
|
||||
std::vector<ov::ObjectInfo>& rois) {
|
||||
if (!initialized_) {
|
||||
return 10000;
|
||||
}
|
||||
if (rgbdata == 0){
|
||||
return 10001;
|
||||
}
|
||||
|
||||
int w = img_width;
|
||||
int h = img_height;
|
||||
float scale = 1.f;
|
||||
if (w > h) {
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
} else {
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
float wpad = 320-w;//(w + 31) / 32 * 32 - w;
|
||||
float hpad = 320-h;//(h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<ov::ObjectInfo> proposals;
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_8", cls_pred);
|
||||
ex.extract("dis_pred_stride_8", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects8;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_16", cls_pred);
|
||||
ex.extract("dis_pred_stride_16", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects16;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_32", cls_pred);
|
||||
ex.extract("dis_pred_stride_32", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects32;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
rois.resize(count);
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
ov::ObjectInfo roi = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (roi.rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (roi.rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
|
||||
|
||||
roi.rect.x = x0;
|
||||
roi.rect.y = y0;
|
||||
roi.rect.width = x1 - x0;
|
||||
roi.rect.height = y1 - y0;
|
||||
|
||||
rois[i] = roi;
|
||||
}
|
||||
// sort objects by area
|
||||
struct
|
||||
{
|
||||
bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
|
||||
if (score >= prob_threshold) {
|
||||
ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx));
|
||||
{
|
||||
return a.rect.area() > b.rect.area();
|
||||
ncnn::Layer *softmax = ncnn::create_layer("Softmax");
|
||||
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 1); // axis
|
||||
pd.set(1, 1);
|
||||
softmax->load_param(pd);
|
||||
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 1;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
softmax->create_pipeline(opt);
|
||||
|
||||
softmax->forward_inplace(bbox_pred, opt);
|
||||
|
||||
softmax->destroy_pipeline(opt);
|
||||
|
||||
delete softmax;
|
||||
}
|
||||
} objects_area_greater;
|
||||
std::sort(rois.begin(), rois.end(), objects_area_greater);
|
||||
return 0;
|
||||
|
||||
float pred_ltrb[4];
|
||||
for (int k = 0; k < 4; k++) {
|
||||
float dis = 0.f;
|
||||
const float *dis_after_sm = bbox_pred.row(k);
|
||||
for (int l = 0; l < reg_max_1; l++) {
|
||||
dis += l * dis_after_sm[l];
|
||||
}
|
||||
pred_ltrb[k] = dis * stride;
|
||||
}
|
||||
|
||||
float pb_cx = (j + 0.5f) * stride;
|
||||
float pb_cy = (i + 0.5f) * stride;
|
||||
|
||||
float x0 = pb_cx - pred_ltrb[0];
|
||||
float y0 = pb_cy - pred_ltrb[1];
|
||||
float x1 = pb_cx + pred_ltrb[2];
|
||||
float y1 = pb_cy + pred_ltrb[3];
|
||||
|
||||
ov::ObjectInfo obj;
|
||||
obj.rect.x = x0;
|
||||
obj.rect.y = y0;
|
||||
obj.rect.width = x1 - x0;
|
||||
obj.rect.height = y1 - y0;
|
||||
obj.label = label;
|
||||
obj.score = score;
|
||||
|
||||
objects.push_back(obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
|
||||
std::vector<ov::ObjectInfo> &rois) {
|
||||
if (!initialized_) {
|
||||
return 10000;
|
||||
}
|
||||
if (rgbdata == 0) {
|
||||
return 10001;
|
||||
}
|
||||
|
||||
int w = img_width;
|
||||
int h = img_height;
|
||||
float scale = 1.f;
|
||||
if (w > h) {
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
} else {
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
|
||||
img_width, img_height, w, h);
|
||||
|
||||
// pad to target_size rectangle
|
||||
float wpad = 320 - w; //(w + 31) / 32 * 32 - w;
|
||||
float hpad = 320 - h; //(h + 31) / 32 * 32 - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
|
||||
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<ov::ObjectInfo> proposals;
|
||||
// stride 8
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_8", cls_pred);
|
||||
ex.extract("dis_pred_stride_8", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects8;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold,
|
||||
objects8);
|
||||
|
||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||
}
|
||||
|
||||
// stride 16
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_16", cls_pred);
|
||||
ex.extract("dis_pred_stride_16", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects16;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold,
|
||||
objects16);
|
||||
|
||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||
}
|
||||
|
||||
// stride 32
|
||||
{
|
||||
ncnn::Mat cls_pred;
|
||||
ncnn::Mat dis_pred;
|
||||
ex.extract("cls_pred_stride_32", cls_pred);
|
||||
ex.extract("dis_pred_stride_32", dis_pred);
|
||||
|
||||
std::vector<ov::ObjectInfo> objects32;
|
||||
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold,
|
||||
objects32);
|
||||
|
||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||
}
|
||||
|
||||
// sort all proposals by score from highest to lowest
|
||||
qsort_descent_inplace(proposals);
|
||||
|
||||
// apply nms with nms_threshold
|
||||
std::vector<int> picked;
|
||||
nms_sorted_bboxes(proposals, picked, nms_threshold);
|
||||
|
||||
int count = picked.size();
|
||||
rois.resize(count);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
ov::ObjectInfo roi = proposals[picked[i]];
|
||||
|
||||
// adjust offset to original unpadded
|
||||
float x0 = (roi.rect.x - (wpad / 2)) / scale;
|
||||
float y0 = (roi.rect.y - (hpad / 2)) / scale;
|
||||
float x1 = (roi.rect.x + roi.rect.width - (wpad / 2)) / scale;
|
||||
float y1 = (roi.rect.y + roi.rect.height - (hpad / 2)) / scale;
|
||||
|
||||
// clip
|
||||
x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
|
||||
y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
|
||||
x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
|
||||
y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
|
||||
|
||||
roi.rect.x = x0;
|
||||
roi.rect.y = y0;
|
||||
roi.rect.width = x1 - x0;
|
||||
roi.rect.height = y1 - y0;
|
||||
|
||||
rois[i] = roi;
|
||||
}
|
||||
// sort objects by area
|
||||
struct {
|
||||
bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const {
|
||||
return a.rect.area() > b.rect.area();
|
||||
}
|
||||
} objects_area_greater;
|
||||
std::sort(rois.begin(), rois.end(), objects_area_greater);
|
||||
return 0;
|
||||
}
|
||||
} // namespace ovhand
|
||||
|
37
src/hand/pose3d.h
Normal file
37
src/hand/pose3d.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#ifndef _HAND_POSE3D_C_H_
|
||||
#define _HAND_POSE3D_C_H_
|
||||
|
||||
#include "../common/common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef struct PalmObject {
|
||||
float score;
|
||||
float rotation;
|
||||
Point2fVector *rect;
|
||||
Point2fVector *landmarks;
|
||||
Point2fVector *skeleton;
|
||||
Point3dVector *skeleton3d;
|
||||
} PalmObject;
|
||||
|
||||
typedef struct PalmObjectVector {
|
||||
PalmObject *items;
|
||||
int length;
|
||||
} PalmObjectVector;
|
||||
|
||||
void FreePalmObject(PalmObject *obj);
|
||||
void FreePalmObjectVector(PalmObjectVector *vec);
|
||||
|
||||
typedef void *IHandPose3DEstimator;
|
||||
IHandPose3DEstimator new_mediapipe_hand();
|
||||
void destroy_mediapipe_hand(IHandPose3DEstimator d);
|
||||
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
|
||||
const char *hand_path);
|
||||
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
|
||||
int img_width, int img_height, PalmObjectVector *vec);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !_HAND_POSE3D_C_H_
|
106
src/hand/pose3d/estimator.cpp
Normal file
106
src/hand/pose3d/estimator.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
#include "../pose3d.h"
|
||||
#include "mediapipe/mediapipe.hpp"
|
||||
#include <iostream>
|
||||
|
||||
void FreePalmObject(PalmObject *obj) {
|
||||
if (obj->rect != NULL) {
|
||||
FreePoint2fVector(obj->rect);
|
||||
obj->rect = NULL;
|
||||
}
|
||||
if (obj->skeleton != NULL) {
|
||||
FreePoint2fVector(obj->skeleton);
|
||||
obj->skeleton = NULL;
|
||||
}
|
||||
if (obj->skeleton3d != NULL) {
|
||||
FreePoint3dVector(obj->skeleton3d);
|
||||
obj->skeleton3d = NULL;
|
||||
}
|
||||
if (obj->landmarks != NULL) {
|
||||
FreePoint2fVector(obj->landmarks);
|
||||
obj->landmarks = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void FreePalmObjectVector(PalmObjectVector *vec) {
|
||||
if (vec->items != NULL) {
|
||||
for (int i = 0; i < vec->length; i++) {
|
||||
FreePalmObject(&vec->items[i]);
|
||||
}
|
||||
free(vec->items);
|
||||
vec->items = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
IHandPose3DEstimator new_mediapipe_hand() {
|
||||
return new ovhand3d::MediapipeHand();
|
||||
}
|
||||
|
||||
void destroy_mediapipe_hand(IHandPose3DEstimator d) {
|
||||
delete static_cast<ovhand3d::MediapipeHand *>(d);
|
||||
}
|
||||
|
||||
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
|
||||
const char *hand_path) {
|
||||
return static_cast<ovhand3d::MediapipeHand *>(d)->LoadModel(palm_path,
|
||||
hand_path);
|
||||
}
|
||||
|
||||
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
|
||||
int img_width, int img_height,
|
||||
PalmObjectVector *objects) {
|
||||
std::vector<ovhand3d::PalmObject> objs;
|
||||
|
||||
int ret = static_cast<ovhand3d::MediapipeHand *>(d)->Detect(
|
||||
rgbdata, img_width, img_height, objs);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
const size_t total_objs = objs.size();
|
||||
objects->length = total_objs;
|
||||
if (total_objs == 0) {
|
||||
objects->items = NULL;
|
||||
return 0;
|
||||
}
|
||||
objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject));
|
||||
for (size_t i = 0; i < total_objs; ++i) {
|
||||
objects->items[i].score = objs[i].score;
|
||||
objects->items[i].rotation = objs[i].rotation;
|
||||
objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector));
|
||||
objects->items[i].rect->length = 4;
|
||||
objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f));
|
||||
for (size_t j = 0; j < 4; ++j) {
|
||||
objects->items[i].rect->points[j] = objs[i].hand_pos[j];
|
||||
}
|
||||
objects->items[i].landmarks =
|
||||
(Point2fVector *)malloc(sizeof(Point2fVector));
|
||||
objects->items[i].landmarks->length = 7;
|
||||
objects->items[i].landmarks->points =
|
||||
(Point2f *)malloc(4 * sizeof(Point2f));
|
||||
for (size_t j = 0; j < 7; ++j) {
|
||||
objects->items[i].landmarks->points[j] = objs[i].landmarks[j];
|
||||
}
|
||||
const size_t total_skeleton = objs[i].skeleton.size();
|
||||
if (total_skeleton == 0) {
|
||||
objects->items[i].skeleton = NULL;
|
||||
objects->items[i].skeleton3d = NULL;
|
||||
continue;
|
||||
}
|
||||
objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector));
|
||||
objects->items[i].skeleton->length = total_skeleton;
|
||||
objects->items[i].skeleton->points =
|
||||
(Point2f *)malloc(total_skeleton * sizeof(Point2f));
|
||||
objects->items[i].skeleton3d =
|
||||
(Point3dVector *)malloc(sizeof(Point3dVector));
|
||||
objects->items[i].skeleton3d->length = total_skeleton;
|
||||
objects->items[i].skeleton3d->points =
|
||||
(Point3d *)malloc(total_skeleton * sizeof(Point3d));
|
||||
for (size_t j = 0; j < total_skeleton; ++j) {
|
||||
objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x;
|
||||
objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y;
|
||||
objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x;
|
||||
objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y;
|
||||
objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
534
src/hand/pose3d/mediapipe/mediapipe.cpp
Normal file
534
src/hand/pose3d/mediapipe/mediapipe.cpp
Normal file
@@ -0,0 +1,534 @@
|
||||
#include "mediapipe.hpp"
|
||||
#include "mat.h"
|
||||
#include <math.h>
|
||||
|
||||
namespace ovhand3d {
|
||||
|
||||
static float calculate_scale(float min_scale, float max_scale, int stride_index,
|
||||
int num_strides) {
|
||||
if (num_strides == 1)
|
||||
return (min_scale + max_scale) * 0.5f;
|
||||
else
|
||||
return min_scale +
|
||||
(max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
|
||||
}
|
||||
|
||||
static void generate_anchors(std::vector<Anchor> &anchors,
|
||||
const AnchorsParams &anchor_params) {
|
||||
int layer_id = 0;
|
||||
for (int layer_id = 0; layer_id < anchor_params.strides.size();) {
|
||||
std::vector<float> anchor_height;
|
||||
std::vector<float> anchor_width;
|
||||
std::vector<float> aspect_ratios;
|
||||
std::vector<float> scales;
|
||||
|
||||
int last_same_stride_layer = layer_id;
|
||||
while (last_same_stride_layer < (int)anchor_params.strides.size() &&
|
||||
anchor_params.strides[last_same_stride_layer] ==
|
||||
anchor_params.strides[layer_id]) {
|
||||
const float scale =
|
||||
calculate_scale(anchor_params.min_scale, anchor_params.max_scale,
|
||||
last_same_stride_layer, anchor_params.strides.size());
|
||||
{
|
||||
for (int aspect_ratio_id = 0;
|
||||
aspect_ratio_id < (int)anchor_params.aspect_ratios.size();
|
||||
aspect_ratio_id++) {
|
||||
aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]);
|
||||
scales.push_back(scale);
|
||||
}
|
||||
|
||||
const float scale_next =
|
||||
last_same_stride_layer == (int)anchor_params.strides.size() - 1
|
||||
? 1.0f
|
||||
: calculate_scale(
|
||||
anchor_params.min_scale, anchor_params.max_scale,
|
||||
last_same_stride_layer + 1, anchor_params.strides.size());
|
||||
scales.push_back(sqrt(scale * scale_next));
|
||||
aspect_ratios.push_back(1.0);
|
||||
}
|
||||
last_same_stride_layer++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
|
||||
const float ratio_sqrts = sqrt(aspect_ratios[i]);
|
||||
anchor_height.push_back(scales[i] / ratio_sqrts);
|
||||
anchor_width.push_back(scales[i] * ratio_sqrts);
|
||||
}
|
||||
|
||||
int feature_map_height = 0;
|
||||
int feature_map_width = 0;
|
||||
const int stride = anchor_params.strides[layer_id];
|
||||
feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride);
|
||||
feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride);
|
||||
|
||||
for (int y = 0; y < feature_map_height; ++y) {
|
||||
for (int x = 0; x < feature_map_width; ++x) {
|
||||
for (int anchor_id = 0; anchor_id < (int)anchor_height.size();
|
||||
++anchor_id) {
|
||||
const float x_center =
|
||||
(x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width;
|
||||
const float y_center =
|
||||
(y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height;
|
||||
|
||||
Anchor new_anchor;
|
||||
new_anchor.x_center = x_center;
|
||||
new_anchor.y_center = y_center;
|
||||
|
||||
new_anchor.w = 1.0f;
|
||||
new_anchor.h = 1.0f;
|
||||
|
||||
anchors.push_back(new_anchor);
|
||||
}
|
||||
}
|
||||
}
|
||||
layer_id = last_same_stride_layer;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_ssd_anchors(int input_w, int input_h,
|
||||
std::vector<Anchor> &anchors) {
|
||||
AnchorsParams anchor_options;
|
||||
anchor_options.num_layers = 4;
|
||||
anchor_options.min_scale = 0.1484375;
|
||||
anchor_options.max_scale = 0.75;
|
||||
anchor_options.input_size_height = 192;
|
||||
anchor_options.input_size_width = 192;
|
||||
anchor_options.anchor_offset_x = 0.5f;
|
||||
anchor_options.anchor_offset_y = 0.5f;
|
||||
anchor_options.strides.push_back(8);
|
||||
anchor_options.strides.push_back(16);
|
||||
anchor_options.strides.push_back(16);
|
||||
anchor_options.strides.push_back(16);
|
||||
anchor_options.aspect_ratios.push_back(1.0);
|
||||
generate_anchors(anchors, anchor_options);
|
||||
}
|
||||
|
||||
static int decode_bounds(std::list<DetectRegion> ®ion_list,
|
||||
float score_thresh, int input_img_w, int input_img_h,
|
||||
float *scores_ptr, float *bboxes_ptr,
|
||||
std::vector<Anchor> &anchors) {
|
||||
DetectRegion region;
|
||||
int i = 0;
|
||||
for (auto &anchor : anchors) {
|
||||
float score = ov::sigmoid(scores_ptr[i]);
|
||||
|
||||
if (score > score_thresh) {
|
||||
float *p = bboxes_ptr + (i * 18);
|
||||
|
||||
float cx = p[0] / input_img_w + anchor.x_center;
|
||||
float cy = p[1] / input_img_h + anchor.y_center;
|
||||
float w = p[2] / input_img_w;
|
||||
float h = p[3] / input_img_h;
|
||||
|
||||
ov::Point2f topleft, btmright;
|
||||
topleft.x = cx - w * 0.5f;
|
||||
topleft.y = cy - h * 0.5f;
|
||||
btmright.x = cx + w * 0.5f;
|
||||
btmright.y = cy + h * 0.5f;
|
||||
|
||||
region.score = score;
|
||||
region.topleft = topleft;
|
||||
region.btmright = btmright;
|
||||
|
||||
for (int j = 0; j < 7; j++) {
|
||||
float lx = p[4 + (2 * j) + 0];
|
||||
float ly = p[4 + (2 * j) + 1];
|
||||
lx += anchor.x_center * input_img_w;
|
||||
ly += anchor.y_center * input_img_h;
|
||||
lx /= (float)input_img_w;
|
||||
ly /= (float)input_img_h;
|
||||
|
||||
region.landmarks[j].x = lx;
|
||||
region.landmarks[j].y = ly;
|
||||
}
|
||||
|
||||
region_list.push_back(region);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float calc_intersection_over_union(DetectRegion ®ion0,
|
||||
DetectRegion ®ion1) {
|
||||
float sx0 = region0.topleft.x;
|
||||
float sy0 = region0.topleft.y;
|
||||
float ex0 = region0.btmright.x;
|
||||
float ey0 = region0.btmright.y;
|
||||
float sx1 = region1.topleft.x;
|
||||
float sy1 = region1.topleft.y;
|
||||
float ex1 = region1.btmright.x;
|
||||
float ey1 = region1.btmright.y;
|
||||
|
||||
float xmin0 = std::min(sx0, ex0);
|
||||
float ymin0 = std::min(sy0, ey0);
|
||||
float xmax0 = std::max(sx0, ex0);
|
||||
float ymax0 = std::max(sy0, ey0);
|
||||
float xmin1 = std::min(sx1, ex1);
|
||||
float ymin1 = std::min(sy1, ey1);
|
||||
float xmax1 = std::max(sx1, ex1);
|
||||
float ymax1 = std::max(sy1, ey1);
|
||||
|
||||
float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
|
||||
float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
|
||||
if (area0 <= 0 || area1 <= 0)
|
||||
return 0.0f;
|
||||
|
||||
float intersect_xmin = std::max(xmin0, xmin1);
|
||||
float intersect_ymin = std::max(ymin0, ymin1);
|
||||
float intersect_xmax = std::min(xmax0, xmax1);
|
||||
float intersect_ymax = std::min(ymax0, ymax1);
|
||||
|
||||
float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
|
||||
std::max(intersect_xmax - intersect_xmin, 0.0f);
|
||||
|
||||
return intersect_area / (area0 + area1 - intersect_area);
|
||||
}
|
||||
|
||||
static int non_max_suppression(std::list<DetectRegion> ®ion_list,
|
||||
std::list<DetectRegion> ®ion_nms_list,
|
||||
float iou_thresh) {
|
||||
region_list.sort([](DetectRegion &v1, DetectRegion &v2) {
|
||||
return v1.score > v2.score ? true : false;
|
||||
});
|
||||
|
||||
for (auto itr = region_list.begin(); itr != region_list.end(); itr++) {
|
||||
DetectRegion region_candidate = *itr;
|
||||
|
||||
int ignore_candidate = false;
|
||||
for (auto itr_nms = region_nms_list.rbegin();
|
||||
itr_nms != region_nms_list.rend(); itr_nms++) {
|
||||
DetectRegion region_nms = *itr_nms;
|
||||
|
||||
float iou = calc_intersection_over_union(region_candidate, region_nms);
|
||||
if (iou >= iou_thresh) {
|
||||
ignore_candidate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ignore_candidate) {
|
||||
region_nms_list.push_back(region_candidate);
|
||||
if (region_nms_list.size() >= 5)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float normalize_radians(float angle) {
|
||||
return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
|
||||
}
|
||||
|
||||
static void compute_rotation(DetectRegion ®ion) {
|
||||
float x0 = region.landmarks[0].x;
|
||||
float y0 = region.landmarks[0].y;
|
||||
float x1 = region.landmarks[2].x;
|
||||
float y1 = region.landmarks[2].y;
|
||||
|
||||
float target_angle = M_PI * 0.5f;
|
||||
float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
|
||||
|
||||
region.rotation = normalize_radians(rotation);
|
||||
}
|
||||
|
||||
void rot_vec(ov::Point2f &vec, float rotation) {
|
||||
float sx = vec.x;
|
||||
float sy = vec.y;
|
||||
vec.x = sx * cos(rotation) - sy * sin(rotation);
|
||||
vec.y = sx * sin(rotation) + sy * cos(rotation);
|
||||
}
|
||||
|
||||
void compute_detect_to_roi(DetectRegion ®ion, const int &target_size,
|
||||
PalmObject &palm) {
|
||||
float width = region.btmright.x - region.topleft.x;
|
||||
float height = region.btmright.y - region.topleft.y;
|
||||
float palm_cx = region.topleft.x + width * 0.5f;
|
||||
float palm_cy = region.topleft.y + height * 0.5f;
|
||||
|
||||
float hand_cx;
|
||||
float hand_cy;
|
||||
float rotation = region.rotation;
|
||||
float shift_x = 0.0f;
|
||||
float shift_y = -0.5f;
|
||||
|
||||
if (rotation == 0.0f) {
|
||||
hand_cx = palm_cx + (width * shift_x);
|
||||
hand_cy = palm_cy + (height * shift_y);
|
||||
} else {
|
||||
float dx =
|
||||
(width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
|
||||
float dy =
|
||||
(width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
|
||||
hand_cx = palm_cx + dx;
|
||||
hand_cy = palm_cy + dy;
|
||||
}
|
||||
|
||||
float long_side = std::max(width, height);
|
||||
width = long_side;
|
||||
height = long_side;
|
||||
float hand_w = width * 2.6f;
|
||||
float hand_h = height * 2.6f;
|
||||
|
||||
palm.hand_cx = hand_cx;
|
||||
palm.hand_cy = hand_cy;
|
||||
palm.hand_w = hand_w;
|
||||
palm.hand_h = hand_h;
|
||||
|
||||
float dx = hand_w * 0.5f;
|
||||
float dy = hand_h * 0.5f;
|
||||
|
||||
palm.hand_pos[0].x = -dx;
|
||||
palm.hand_pos[0].y = -dy;
|
||||
palm.hand_pos[1].x = +dx;
|
||||
palm.hand_pos[1].y = -dy;
|
||||
palm.hand_pos[2].x = +dx;
|
||||
palm.hand_pos[2].y = +dy;
|
||||
palm.hand_pos[3].x = -dx;
|
||||
palm.hand_pos[3].y = +dy;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
rot_vec(palm.hand_pos[i], rotation);
|
||||
palm.hand_pos[i].x += hand_cx;
|
||||
palm.hand_pos[i].y += hand_cy;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 7; i++) {
|
||||
palm.landmarks[i] = region.landmarks[i];
|
||||
}
|
||||
|
||||
palm.score = region.score;
|
||||
}
|
||||
|
||||
static void pack_detect_result(std::vector<DetectRegion> &detect_results,
|
||||
std::list<DetectRegion> ®ion_list,
|
||||
const int &target_size,
|
||||
std::vector<PalmObject> &palmlist) {
|
||||
for (auto ®ion : region_list) {
|
||||
compute_rotation(region);
|
||||
PalmObject palm;
|
||||
compute_detect_to_roi(region, target_size, palm);
|
||||
palmlist.push_back(palm);
|
||||
detect_results.push_back(region);
|
||||
}
|
||||
}
|
||||
|
||||
MediapipeHand::MediapipeHand() : ov::EstimatorBase() {
|
||||
palm_blob_allocator_.set_size_compare_ratio(0.f);
|
||||
palm_workspace_allocator_.set_size_compare_ratio(0.f);
|
||||
hand_blob_allocator_.set_size_compare_ratio(0.f);
|
||||
hand_workspace_allocator_.set_size_compare_ratio(0.f);
|
||||
palm_net_ = new ncnn::Net();
|
||||
hand_net_ = new ncnn::Net();
|
||||
initialized_ = false;
|
||||
if (num_threads > 0) {
|
||||
palm_net_->opt.num_threads = num_threads;
|
||||
hand_net_->opt.num_threads = num_threads;
|
||||
}
|
||||
palm_net_->opt.blob_allocator = &palm_blob_allocator_;
|
||||
palm_net_->opt.workspace_allocator = &palm_workspace_allocator_;
|
||||
palm_net_->opt.lightmode = light_mode_;
|
||||
hand_net_->opt.blob_allocator = &hand_blob_allocator_;
|
||||
hand_net_->opt.workspace_allocator = &hand_workspace_allocator_;
|
||||
hand_net_->opt.lightmode = light_mode_;
|
||||
#ifdef OV_VULKAN
|
||||
palm_net_->opt.use_vulkan_compute = true;
|
||||
hand_net_->opt.use_vulkan_compute = true;
|
||||
#endif // OV_VULKAN
|
||||
}
|
||||
|
||||
MediapipeHand::~MediapipeHand() {
|
||||
if (palm_net_) {
|
||||
palm_net_->clear();
|
||||
}
|
||||
if (hand_net_) {
|
||||
hand_net_->clear();
|
||||
}
|
||||
palm_workspace_allocator_.clear();
|
||||
palm_blob_allocator_.clear();
|
||||
hand_workspace_allocator_.clear();
|
||||
hand_blob_allocator_.clear();
|
||||
}
|
||||
|
||||
void MediapipeHand::set_num_threads(int n) {
|
||||
EstimatorBase::set_num_threads(n);
|
||||
if (palm_net_) {
|
||||
palm_net_->opt.num_threads = n;
|
||||
}
|
||||
if (hand_net_) {
|
||||
hand_net_->opt.num_threads = n;
|
||||
}
|
||||
}
|
||||
|
||||
void MediapipeHand::set_light_mode(bool mode) {
|
||||
if (palm_net_) {
|
||||
palm_net_->opt.lightmode = mode;
|
||||
}
|
||||
if (hand_net_) {
|
||||
hand_net_->opt.lightmode = mode;
|
||||
}
|
||||
light_mode_ = mode;
|
||||
}
|
||||
|
||||
int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) {
|
||||
std::string palm_param_file = std::string(palm_path) + "/param";
|
||||
std::string palm_bin_file = std::string(palm_path) + "/bin";
|
||||
std::string hand_param_file = std::string(hand_path) + "/param";
|
||||
std::string hand_bin_file = std::string(hand_path) + "/bin";
|
||||
if (palm_net_->load_param(palm_param_file.c_str()) == -1 ||
|
||||
palm_net_->load_model(palm_bin_file.c_str()) == -1) {
|
||||
return 10000;
|
||||
}
|
||||
if (hand_net_->load_param(hand_param_file.c_str()) == -1 ||
|
||||
hand_net_->load_model(hand_bin_file.c_str()) == -1) {
|
||||
return 10000;
|
||||
}
|
||||
|
||||
initialized_ = true;
|
||||
anchors.clear();
|
||||
create_ssd_anchors(target_size, target_size, anchors);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width,
|
||||
int img_height, std::vector<PalmObject> &objects) {
|
||||
if (!initialized_) {
|
||||
return 10000;
|
||||
}
|
||||
if (rgbdata == 0) {
|
||||
return 10001;
|
||||
}
|
||||
int w = img_width;
|
||||
int h = img_height;
|
||||
float scale = 1.f;
|
||||
if (w > h) {
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
} else {
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
|
||||
img_width, img_height, w, h);
|
||||
|
||||
int wpad = target_size - w;
|
||||
int hpad = target_size - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
|
||||
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in_pad.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = palm_net_->create_extractor();
|
||||
ncnn::Mat cls, reg;
|
||||
ex.input("input", in_pad);
|
||||
ex.extract("cls", cls);
|
||||
ex.extract("reg", reg);
|
||||
|
||||
float *scores = (float *)cls.data;
|
||||
float *bboxes = (float *)reg.data;
|
||||
|
||||
std::list<DetectRegion> region_list, region_nms_list;
|
||||
std::vector<DetectRegion> detect_results;
|
||||
|
||||
decode_bounds(region_list, prob_threshold, target_size, target_size, scores,
|
||||
bboxes, anchors);
|
||||
non_max_suppression(region_list, region_nms_list, nms_threshold);
|
||||
objects.clear();
|
||||
pack_detect_result(detect_results, region_nms_list, target_size, objects);
|
||||
|
||||
for (int i = 0; i < objects.size(); i++) {
|
||||
objects[i].hand_pos[0].x =
|
||||
(objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale;
|
||||
objects[i].hand_pos[0].y =
|
||||
(objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale;
|
||||
objects[i].hand_pos[1].x =
|
||||
(objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale;
|
||||
objects[i].hand_pos[1].y =
|
||||
(objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale;
|
||||
objects[i].hand_pos[2].x =
|
||||
(objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale;
|
||||
objects[i].hand_pos[2].y =
|
||||
(objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale;
|
||||
objects[i].hand_pos[3].x =
|
||||
(objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale;
|
||||
objects[i].hand_pos[3].y =
|
||||
(objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale;
|
||||
|
||||
for (int j = 0; j < 7; j++) {
|
||||
objects[i].landmarks[j].x =
|
||||
(objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale;
|
||||
objects[i].landmarks[j].y =
|
||||
(objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale;
|
||||
}
|
||||
|
||||
const float srcPts[8] = {
|
||||
objects[i].hand_pos[0].x, objects[i].hand_pos[0].y,
|
||||
objects[i].hand_pos[1].x, objects[i].hand_pos[1].y,
|
||||
objects[i].hand_pos[2].x, objects[i].hand_pos[2].y,
|
||||
objects[i].hand_pos[3].x, objects[i].hand_pos[3].y,
|
||||
};
|
||||
|
||||
const float dstPts[8] = {
|
||||
0, 0, 224, 0, 224, 224, 0, 224,
|
||||
};
|
||||
|
||||
float tm[6];
|
||||
unsigned char *trans_mat =
|
||||
(unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char));
|
||||
ncnn::get_affine_transform(dstPts, srcPts, 4, tm);
|
||||
|
||||
ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224,
|
||||
224, tm);
|
||||
|
||||
ncnn::Mat trans_image =
|
||||
ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224);
|
||||
|
||||
float score = GetLandmarks(trans_image, tm, objects[i].skeleton,
|
||||
objects[i].skeleton3d);
|
||||
|
||||
free(trans_mat);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6],
|
||||
std::vector<ov::Point2f> &skeleton,
|
||||
std::vector<ov::Point3d> &skeleton3d) {
|
||||
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
in.substract_mean_normalize(NULL, norm_vals);
|
||||
ncnn::Mat points, score;
|
||||
{
|
||||
ncnn::Extractor ex = hand_net_->create_extractor();
|
||||
ex.input("input", in);
|
||||
ex.extract("points", points);
|
||||
ex.extract("score", score);
|
||||
}
|
||||
|
||||
float *points_data = (float *)points.data;
|
||||
float *score_data = (float *)score.data;
|
||||
for (int i = 0; i < 21; i++) {
|
||||
ov::Point3d pt3d;
|
||||
pt3d.x = points_data[i * 3];
|
||||
pt3d.y = points_data[i * 3 + 1];
|
||||
pt3d.z = points_data[i * 3 + 2];
|
||||
|
||||
ov::Point2f pt;
|
||||
pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2];
|
||||
pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5];
|
||||
|
||||
skeleton.push_back(pt);
|
||||
|
||||
pt3d.x /= 224.f;
|
||||
pt3d.y /= 224.f;
|
||||
skeleton3d.push_back(pt3d);
|
||||
}
|
||||
return score_data[0];
|
||||
}
|
||||
|
||||
} // namespace ovhand3d
|
87
src/hand/pose3d/mediapipe/mediapipe.hpp
Normal file
87
src/hand/pose3d/mediapipe/mediapipe.hpp
Normal file
@@ -0,0 +1,87 @@
|
||||
#ifndef _HAND_POSE3D_MEDIAPIPE_H_
|
||||
#define _HAND_POSE3D_MEDIAPIPE_H_
|
||||
|
||||
#include "../../../common/common.hpp"
|
||||
#include <net.h>
|
||||
|
||||
namespace ovhand3d {
|
||||
|
||||
struct PalmObject {
|
||||
float score;
|
||||
ov::Point2f landmarks[7];
|
||||
float rotation;
|
||||
|
||||
float hand_cx;
|
||||
float hand_cy;
|
||||
float hand_w;
|
||||
float hand_h;
|
||||
ov::Point2f hand_pos[4];
|
||||
|
||||
std::vector<ov::Point2f> skeleton;
|
||||
std::vector<ov::Point3d> skeleton3d;
|
||||
};
|
||||
|
||||
struct DetectRegion {
|
||||
float score;
|
||||
ov::Point2f topleft;
|
||||
ov::Point2f btmright;
|
||||
ov::Point2f landmarks[7];
|
||||
|
||||
float rotation;
|
||||
ov::Point2f roi_center;
|
||||
ov::Point2f roi_size;
|
||||
ov::Point2f roi_coord[4];
|
||||
};
|
||||
|
||||
struct Anchor {
|
||||
float x_center, y_center, w, h;
|
||||
};
|
||||
|
||||
struct AnchorsParams {
|
||||
int input_size_width;
|
||||
int input_size_height;
|
||||
|
||||
float min_scale;
|
||||
float max_scale;
|
||||
|
||||
float anchor_offset_x;
|
||||
float anchor_offset_y;
|
||||
|
||||
int num_layers;
|
||||
std::vector<int> feature_map_width;
|
||||
std::vector<int> feature_map_height;
|
||||
std::vector<int> strides;
|
||||
std::vector<float> aspect_ratios;
|
||||
};
|
||||
|
||||
class MediapipeHand : public ov::EstimatorBase {
|
||||
public:
|
||||
MediapipeHand();
|
||||
~MediapipeHand();
|
||||
int LoadModel(const char *palm_model, const char *hand_model);
|
||||
int Detect(const unsigned char *rgbdata, int img_width, int img_heidht,
|
||||
std::vector<PalmObject> &objects);
|
||||
float GetLandmarks(ncnn::Mat in, float tm[6],
|
||||
std::vector<ov::Point2f> &skeleton,
|
||||
std::vector<ov::Point3d> &skeleton3d);
|
||||
void set_light_mode(bool mode);
|
||||
void set_num_threads(int n);
|
||||
|
||||
private:
|
||||
ncnn::Net *palm_net_ = NULL;
|
||||
ncnn::Net *hand_net_ = NULL;
|
||||
ncnn::PoolAllocator palm_workspace_allocator_;
|
||||
ncnn::UnlockedPoolAllocator palm_blob_allocator_;
|
||||
ncnn::PoolAllocator hand_workspace_allocator_;
|
||||
ncnn::UnlockedPoolAllocator hand_blob_allocator_;
|
||||
bool initialized_ = false;
|
||||
bool light_mode_ = true;
|
||||
std::vector<Anchor> anchors;
|
||||
float prob_threshold = 0.55f;
|
||||
float nms_threshold = 0.3f;
|
||||
const int target_size = 192;
|
||||
const float mean_vals[3] = {0.f, 0.f, 0.f};
|
||||
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||
};
|
||||
} // namespace ovhand3d
|
||||
#endif // !_HAND_POSE3D_MEDIAPIPE_H_
|
161
src/pose/estimator/pptinypose/pptinypose.bak
Normal file
161
src/pose/estimator/pptinypose/pptinypose.bak
Normal file
@@ -0,0 +1,161 @@
|
||||
#include "pptinypose.hpp"
|
||||
#include <string>
|
||||
|
||||
#ifdef OV_VULKAN
|
||||
#include "gpu.h"
|
||||
#endif // OV_VULKAN
|
||||
|
||||
namespace ovpose {
|
||||
static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob,
|
||||
std::vector<float> &prob) {
|
||||
int size = bottom_blob.total();
|
||||
const float *ptr = bottom_blob;
|
||||
std::vector<std::pair<float, int>> vec;
|
||||
vec.resize(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
vec[i] = std::make_pair(ptr[i], i);
|
||||
}
|
||||
top_blob.create(bottom_blob.c, 1, 1, 4u);
|
||||
float *outptr = top_blob;
|
||||
|
||||
for (size_t i = 0; i < bottom_blob.c; i++) {
|
||||
int size0 = bottom_blob.channel(i).total();
|
||||
std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1),
|
||||
vec.begin() + size0 * (i + 1),
|
||||
std::greater<std::pair<float, int>>());
|
||||
outptr[i] = vec[size0 * i].second - size0 * i;
|
||||
prob.push_back(vec[size0 * i].first);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dark_parse(const ncnn::Mat &heatmap, std::vector<int> &dim,
|
||||
std::vector<float> &coords, int px, int py, int ch) {
|
||||
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
|
||||
Representation for Human Pose Estimation (CVPR 2020).
|
||||
1) offset = - hassian.inv() * derivative
|
||||
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
|
||||
3) dxx = (dx[x+1] - dx[x-1])/2.
|
||||
4) derivative = Mat([dx, dy])
|
||||
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
|
||||
*/
|
||||
|
||||
float *heatmap_data = (float *)heatmap.channel(ch).data;
|
||||
std::vector<float> heatmap_ch;
|
||||
heatmap_ch.insert(heatmap_ch.begin(), heatmap_data,
|
||||
heatmap_data + heatmap.channel(ch).total());
|
||||
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
|
||||
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
|
||||
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
|
||||
heatmap_mat = heatmap_mat.reshape(1, 1);
|
||||
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
|
||||
|
||||
ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]);
|
||||
heatmap_mat = heatmap_mat.reshape(1);
|
||||
heatmap_ch = (float *)heatmap_mat.data;
|
||||
|
||||
float epsilon = 1e-10;
|
||||
// sample heatmap to get values in around target location
|
||||
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
|
||||
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
|
||||
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
|
||||
|
||||
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
|
||||
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
|
||||
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
|
||||
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
|
||||
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
|
||||
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
|
||||
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
|
||||
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
|
||||
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
|
||||
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
|
||||
|
||||
// compute dx/dy and dxx/dyy with sampled values
|
||||
float dx = 0.5 * (xr - xl);
|
||||
float dy = 0.5 * (yu - yd);
|
||||
float dxx = 0.25 * (xr2 - 2 * xy + xl2);
|
||||
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
|
||||
float dyy = 0.25 * (yu2 - 2 * xy + yd2);
|
||||
|
||||
// finally get offset by derivative and hassian, which combined by dx/dy and
|
||||
// dxx/dyy
|
||||
if (dxx * dyy - dxy * dxy != 0) {
|
||||
float M[2][2] = {dxx, dxy, dxy, dyy};
|
||||
float D[2] = {dx, dy};
|
||||
cv::Mat hassian(2, 2, CV_32F, M);
|
||||
cv::Mat derivative(2, 1, CV_32F, D);
|
||||
cv::Mat offset = -hassian.inv() * derivative;
|
||||
coords[ch * 2] += offset.at<float>(0, 0);
|
||||
coords[ch * 2 + 1] += offset.at<float>(1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<float> get_final_preds(const ncnn::Mat &heatmap,
|
||||
const ncnn::Mat &argmax_out) {
|
||||
std::vector<float> coords((size_t)heatmap.c * 2);
|
||||
for (int i = 0; i < heatmap.c; i++) {
|
||||
int idx = argmax_out[i];
|
||||
coords[i * 2] = idx % heatmap.w;
|
||||
coords[i * 2 + 1] = (float)idx / heatmap.w;
|
||||
|
||||
int px = int(coords[i * 2] + 0.5);
|
||||
int py = int(coords[i * 2 + 1] + 0.5);
|
||||
|
||||
std::vector<int> dim({1, heatmap.c, heatmap.h, heatmap.w});
|
||||
dark_parse(heatmap, dim, coords, px, py, i);
|
||||
}
|
||||
|
||||
return coords;
|
||||
}
|
||||
PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() {
|
||||
if (target_size == 128) {
|
||||
target_width_ = 96;
|
||||
target_height_ = 128;
|
||||
} else {
|
||||
target_width_ = 196;
|
||||
target_height_ = 256;
|
||||
}
|
||||
}
|
||||
|
||||
int PPTinyPoseEstimator::ExtractKeypoints(
|
||||
const unsigned char *rgbdata, int img_width, int img_height,
|
||||
const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
|
||||
if (!initialized_) {
|
||||
return 10000;
|
||||
}
|
||||
if (rgbdata == 0) {
|
||||
return 10001;
|
||||
}
|
||||
keypoints->clear();
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
|
||||
rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
|
||||
rect.width, rect.height, target_width_, target_height_);
|
||||
in.substract_mean_normalize(meanVals, normVals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("image", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("save_infer_model/scale_0.tmp_1", out);
|
||||
|
||||
ncnn::Mat argmax_out;
|
||||
std::vector<float> probs;
|
||||
argmax(out, argmax_out, probs);
|
||||
std::vector<float> coords = get_final_preds(out, argmax_out);
|
||||
|
||||
for (int i = 0; i < coords.size() / 2; i++) {
|
||||
ov::KeyPoint keypoint;
|
||||
keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x,
|
||||
coords[i * 2 + 1] * rect.h / (float)out.h + rect.y);
|
||||
keypoint.score = probs[i];
|
||||
keypoints->push_back(keypoint);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ovpose
|
25
src/pose/estimator/pptinypose/pptinypose.hpp
Normal file
25
src/pose/estimator/pptinypose/pptinypose.hpp
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_
|
||||
#define _POSE_PPTINYPOSE_ESTIMATOR_H_
|
||||
|
||||
#include "../estimator.hpp"
|
||||
#include "net.h"
|
||||
#include <vector>
|
||||
|
||||
namespace ovpose {
|
||||
class PPTinyPoseEstimator : public Estimator {
|
||||
public:
|
||||
PPTinyPoseEstimator(int target_size);
|
||||
int ExtractKeypoints(const unsigned char *rgbdata, int img_width,
|
||||
int img_height, const ov::Rect &rect,
|
||||
std::vector<ov::Keypoint> *keypoints);
|
||||
|
||||
private:
|
||||
int target_width_ = 96;
|
||||
int target_height_ = 128;
|
||||
const float meanVals[3] = {123.675f, 116.28f, 103.53f};
|
||||
const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f};
|
||||
};
|
||||
|
||||
} // namespace ovpose
|
||||
|
||||
#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_
|
Reference in New Issue
Block a user