mirror of
https://github.com/bububa/openvision.git
synced 2025-09-27 01:56:04 +08:00
feat(hand): add mediapipe hand 3d pose detecter
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -70,3 +70,5 @@ _testmain.go
|
|||||||
test
|
test
|
||||||
.vim
|
.vim
|
||||||
dist/
|
dist/
|
||||||
|
|
||||||
|
libtorch/
|
||||||
|
@@ -53,6 +53,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
|
|||||||
- nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
|
- nanodet [Google Drive](https://drive.google.com/drive/folders/1ywH7r_clqqA_BAOFSzA92Q0lxJtWlN3z?usp=sharing)
|
||||||
- pose (for hand pose estimation)
|
- pose (for hand pose estimation)
|
||||||
- handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
|
- handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
|
||||||
|
- pose3d (for 3d handpose detection)
|
||||||
|
- mediapipe [Google Drive](https://drive.google.com/drive/folders/1LsqIGB55dusZJqmP1uhnQUnNE2tLzifp?usp=sharing)
|
||||||
- styletransfer
|
- styletransfer
|
||||||
- animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
|
- animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
|
||||||
- tracker
|
- tracker
|
||||||
|
@@ -90,6 +90,9 @@ func NewCPoint2fVector() *C.Point2fVector {
|
|||||||
|
|
||||||
// GoPoint2fVector convert C.Point2fVector to []Point
|
// GoPoint2fVector convert C.Point2fVector to []Point
|
||||||
func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
|
func GoPoint2fVector(cVector *C.Point2fVector, w float64, h float64) []Point {
|
||||||
|
if cVector == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
l := int(cVector.length)
|
l := int(cVector.length)
|
||||||
ret := make([]Point, 0, l)
|
ret := make([]Point, 0, l)
|
||||||
ptr := unsafe.Pointer(cVector.points)
|
ptr := unsafe.Pointer(cVector.points)
|
||||||
@@ -105,3 +108,52 @@ func FreeCPoint2fVector(c *C.Point2fVector) {
|
|||||||
C.FreePoint2fVector(c)
|
C.FreePoint2fVector(c)
|
||||||
C.free(unsafe.Pointer(c))
|
C.free(unsafe.Pointer(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Point3d represents a 3dPoint
|
||||||
|
type Point3d struct {
|
||||||
|
X float64
|
||||||
|
Y float64
|
||||||
|
Z float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pt3d returns a New Point3d
|
||||||
|
func Pt3d(x, y, z float64) Point3d {
|
||||||
|
return Point3d{x, y, z}
|
||||||
|
}
|
||||||
|
|
||||||
|
var ZP3d = Point3d{}
|
||||||
|
|
||||||
|
// GoPoint3d conver C.Point3d to Point3d
|
||||||
|
func GoPoint3d(c *C.Point3d) Point3d {
|
||||||
|
return Pt3d(
|
||||||
|
float64(c.x),
|
||||||
|
float64(c.y),
|
||||||
|
float64(c.z),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCPoint3dVector retruns C.Point3dVector pointer
|
||||||
|
func NewCPoint3dVector() *C.Point3dVector {
|
||||||
|
return (*C.Point3dVector)(C.malloc(C.sizeof_Point3d))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GoPoint3dVector convert C.Point3dVector to []Point3d
|
||||||
|
func GoPoint3dVector(cVector *C.Point3dVector) []Point3d {
|
||||||
|
if cVector == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
l := int(cVector.length)
|
||||||
|
ret := make([]Point3d, 0, l)
|
||||||
|
ptr := unsafe.Pointer(cVector.points)
|
||||||
|
for i := 0; i < l; i++ {
|
||||||
|
cPoint3d := (*C.Point3d)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Point3d*C.int(i))))
|
||||||
|
ret = append(ret, GoPoint3d(cPoint3d))
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
// FreeCPoint3dVector release C.Point3dVector memory
|
||||||
|
func FreeCPoint3dVector(c *C.Point3dVector) {
|
||||||
|
C.FreePoint3dVector(c)
|
||||||
|
C.free(unsafe.Pointer(c))
|
||||||
|
}
|
||||||
|
@@ -20,6 +20,8 @@ type ObjectInfo struct {
|
|||||||
Rect Rectangle
|
Rect Rectangle
|
||||||
// Points keypoints
|
// Points keypoints
|
||||||
Keypoints []Keypoint
|
Keypoints []Keypoint
|
||||||
|
// Name
|
||||||
|
Name string
|
||||||
}
|
}
|
||||||
|
|
||||||
// GoObjectInfo convert C.ObjectInfo to go type
|
// GoObjectInfo convert C.ObjectInfo to go type
|
||||||
|
56
go/common/palmobject.go
Normal file
56
go/common/palmobject.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
/*
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "openvision/common/common.h"
|
||||||
|
#include "openvision/hand/pose3d.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PalmObject
|
||||||
|
type PalmObject struct {
|
||||||
|
Score float64
|
||||||
|
Rotation float64
|
||||||
|
Rect []Point
|
||||||
|
Landmarks []Point
|
||||||
|
Skeleton []Point
|
||||||
|
Skeleton3d []Point3d
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCPalmObjectVector returns *C.PalmObjectVector
|
||||||
|
func NewCPalmObjectVector() *C.PalmObjectVector {
|
||||||
|
return (*C.PalmObjectVector)(C.malloc(C.sizeof_PalmObjectVector))
|
||||||
|
}
|
||||||
|
|
||||||
|
// FreeCPalmObjectVector release *C.PalmObjectVector memory
|
||||||
|
func FreeCPalmObjectVector(p *C.PalmObjectVector) {
|
||||||
|
C.FreePalmObjectVector(p)
|
||||||
|
C.free(unsafe.Pointer(p))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GoPalmObject convert C.PalmObject to Go type
|
||||||
|
func GoPalmObject(cObj *C.PalmObject, w float64, h float64) PalmObject {
|
||||||
|
return PalmObject{
|
||||||
|
Score: float64(cObj.score),
|
||||||
|
Rotation: float64(cObj.rotation),
|
||||||
|
Rect: GoPoint2fVector(cObj.rect, w, h),
|
||||||
|
Landmarks: GoPoint2fVector(cObj.landmarks, w, h),
|
||||||
|
Skeleton: GoPoint2fVector(cObj.skeleton, w, h),
|
||||||
|
Skeleton3d: GoPoint3dVector(cObj.skeleton3d),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GoPalmObjectVector(c *C.PalmObjectVector, w float64, h float64) []PalmObject {
|
||||||
|
l := int(c.length)
|
||||||
|
ret := make([]PalmObject, 0, l)
|
||||||
|
ptr := unsafe.Pointer(c.items)
|
||||||
|
for i := 0; i < l; i++ {
|
||||||
|
cObj := (*C.PalmObject)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PalmObject*C.int(i))))
|
||||||
|
ret = append(ret, GoPalmObject(cObj, w, h))
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
@@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/bububa/openvision/go/hand/detecter"
|
"github.com/bububa/openvision/go/hand/detecter"
|
||||||
handdrawer "github.com/bububa/openvision/go/hand/drawer"
|
handdrawer "github.com/bububa/openvision/go/hand/drawer"
|
||||||
"github.com/bububa/openvision/go/hand/pose"
|
"github.com/bububa/openvision/go/hand/pose"
|
||||||
|
"github.com/bububa/openvision/go/hand/pose3d"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -27,17 +28,19 @@ func main() {
|
|||||||
cpuCores := common.GetBigCPUCount()
|
cpuCores := common.GetBigCPUCount()
|
||||||
common.SetOMPThreads(cpuCores)
|
common.SetOMPThreads(cpuCores)
|
||||||
log.Printf("CPU big cores:%d\n", cpuCores)
|
log.Printf("CPU big cores:%d\n", cpuCores)
|
||||||
estimator := handpose(modelPath)
|
// estimator := handpose(modelPath)
|
||||||
defer estimator.Destroy()
|
// defer estimator.Destroy()
|
||||||
common.SetEstimatorThreads(estimator, cpuCores)
|
// common.SetEstimatorThreads(estimator, cpuCores)
|
||||||
for idx, d := range []detecter.Detecter{
|
// for idx, d := range []detecter.Detecter{
|
||||||
yolox(modelPath),
|
// yolox(modelPath),
|
||||||
nanodet(modelPath),
|
// nanodet(modelPath),
|
||||||
} {
|
// } {
|
||||||
defer d.Destroy()
|
// defer d.Destroy()
|
||||||
common.SetEstimatorThreads(d, cpuCores)
|
// common.SetEstimatorThreads(d, cpuCores)
|
||||||
detect(d, estimator, imgPath, "hand1.jpg", idx)
|
// detect(d, estimator, imgPath, "hand2.jpg", idx)
|
||||||
}
|
// }
|
||||||
|
d3d := mediapipe(modelPath)
|
||||||
|
detect3d(d3d, imgPath, "hand1.jpg")
|
||||||
}
|
}
|
||||||
|
|
||||||
func yolox(modelPath string) detecter.Detecter {
|
func yolox(modelPath string) detecter.Detecter {
|
||||||
@@ -67,6 +70,16 @@ func handpose(modelPath string) pose.Estimator {
|
|||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mediapipe(modelPath string) *pose3d.Mediapipe {
|
||||||
|
palmPath := filepath.Join(modelPath, "mediapipe/palm/full")
|
||||||
|
handPath := filepath.Join(modelPath, "mediapipe/hand/full")
|
||||||
|
d := pose3d.NewMediapipe()
|
||||||
|
if err := d.LoadModel(palmPath, handPath); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
|
func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename string, idx int) {
|
||||||
inPath := filepath.Join(imgPath, filename)
|
inPath := filepath.Join(imgPath, filename)
|
||||||
imgSrc, err := loadImage(inPath)
|
imgSrc, err := loadImage(inPath)
|
||||||
@@ -104,6 +117,36 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
|
|||||||
if err := saveImage(out, outPath); err != nil {
|
if err := saveImage(out, outPath); err != nil {
|
||||||
log.Fatalln(err)
|
log.Fatalln(err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func detect3d(d *pose3d.Mediapipe, imgPath string, filename string) {
|
||||||
|
inPath := filepath.Join(imgPath, filename)
|
||||||
|
imgSrc, err := loadImage(inPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln("load image failed,", err)
|
||||||
|
}
|
||||||
|
img := common.NewImage(imgSrc)
|
||||||
|
rois, err := d.Detect(img)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
log.Printf("%+v\n", rois)
|
||||||
|
drawer := handdrawer.New()
|
||||||
|
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-hand-%s", filename))
|
||||||
|
out := drawer.DrawPalm(img, rois)
|
||||||
|
|
||||||
|
if err := saveImage(out, outPath); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx, roi := range rois {
|
||||||
|
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("pose3d-palm3d-%d-%s", idx, filename))
|
||||||
|
out := drawer.DrawPalm3D(roi, 400, "#442519")
|
||||||
|
|
||||||
|
if err := saveImage(out, outPath); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
// +build !vulkan
|
// +build !vulkan
|
||||||
|
|
||||||
package eye
|
package tracker
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||||
|
@@ -7,71 +7,16 @@ import (
|
|||||||
const (
|
const (
|
||||||
// DefaultBorderColor default drawer border color
|
// DefaultBorderColor default drawer border color
|
||||||
DefaultBorderColor = common.Green
|
DefaultBorderColor = common.Green
|
||||||
|
// DefaultKeypointColor default drawer keypoint color
|
||||||
|
DefaultKeypointColor = common.Pink
|
||||||
// DefaultBorderStrokeWidth default drawer border stroke width
|
// DefaultBorderStrokeWidth default drawer border stroke width
|
||||||
DefaultBorderStrokeWidth = 3
|
DefaultBorderStrokeWidth = 3
|
||||||
// DefaultKeypointRadius default drawer keypoint radius
|
// DefaultKeypointRadius default drawer keypoint radius
|
||||||
DefaultKeypointRadius = 3
|
DefaultKeypointRadius = 3
|
||||||
// DefaultKeypointStrokeWidth default drawer keypoint stroke width
|
// DefaultKeypointStrokeWidth default drawer keypoint stroke width
|
||||||
DefaultKeypointStrokeWidth = 1
|
DefaultKeypointStrokeWidth = 1
|
||||||
)
|
// DefaultLabelColor default label color
|
||||||
|
DefaultLabelColor = common.White
|
||||||
// CocoPart coco part define
|
|
||||||
type CocoPart = int
|
|
||||||
|
|
||||||
const (
|
|
||||||
// CocoPartNose nose
|
|
||||||
CocoPartNose CocoPart = iota
|
|
||||||
// CocoPartLEye left eye
|
|
||||||
CocoPartLEye
|
|
||||||
// CocoPartREye right eye
|
|
||||||
CocoPartREye
|
|
||||||
// CocoPartLEar left ear
|
|
||||||
CocoPartLEar
|
|
||||||
// CocoPartREar right ear
|
|
||||||
CocoPartREar
|
|
||||||
// CocoPartLShoulder left sholder
|
|
||||||
CocoPartLShoulder
|
|
||||||
// CocoPartRShoulder right sholder
|
|
||||||
CocoPartRShoulder
|
|
||||||
// CocoPartLElbow left elbow
|
|
||||||
CocoPartLElbow
|
|
||||||
// CocoPartRElbow right elbow
|
|
||||||
CocoPartRElbow
|
|
||||||
// CocoPartLWrist left wrist
|
|
||||||
CocoPartLWrist
|
|
||||||
// CocoPartRWrist right wrist
|
|
||||||
CocoPartRWrist
|
|
||||||
// CocoPartLHip left hip
|
|
||||||
CocoPartLHip
|
|
||||||
// CocoPartRHip right hip
|
|
||||||
CocoPartRHip
|
|
||||||
// CocoPartLKnee left knee
|
|
||||||
CocoPartLKnee
|
|
||||||
// CocoPartRKnee right knee
|
|
||||||
CocoPartRKnee
|
|
||||||
// CocoPartRAnkle right ankle
|
|
||||||
CocoPartRAnkle
|
|
||||||
// CocoPartLAnkle left ankle
|
|
||||||
CocoPartLAnkle
|
|
||||||
// CocoPartNeck neck
|
|
||||||
CocoPartNeck
|
|
||||||
// CocoPartBackground background
|
|
||||||
CocoPartBackground
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
// CocoPair represents joints pair
|
|
||||||
CocoPair = [16][2]CocoPart{
|
|
||||||
{0, 1}, {1, 3}, {0, 2}, {2, 4}, {5, 6}, {5, 7}, {7, 9}, {6, 8}, {8, 10}, {5, 11}, {6, 12}, {11, 12}, {11, 13}, {12, 14}, {13, 15}, {14, 16},
|
|
||||||
}
|
|
||||||
// CocoColors represents color for coco parts
|
|
||||||
CocoColors = [17]string{
|
|
||||||
"#ff0000", "#ff5500", "#ffaa00", "#ffff00",
|
|
||||||
"#aaff00", "#55ff00", "#00ff00", "#00ff55", "#00ffaa",
|
|
||||||
"#00ffff", "#00aaff", "#0055ff",
|
|
||||||
"#0000ff", "#aa00ff", "#ff00ff",
|
|
||||||
"#ff00aa", "#ff0055",
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@@ -2,8 +2,10 @@ package drawer
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"image"
|
"image"
|
||||||
|
"image/color"
|
||||||
|
|
||||||
"github.com/llgcode/draw2d/draw2dimg"
|
"github.com/llgcode/draw2d/draw2dimg"
|
||||||
|
"github.com/llgcode/draw2d/draw2dkit"
|
||||||
|
|
||||||
"github.com/bububa/openvision/go/common"
|
"github.com/bububa/openvision/go/common"
|
||||||
)
|
)
|
||||||
@@ -18,6 +20,12 @@ type Drawer struct {
|
|||||||
KeypointStrokeWidth float64
|
KeypointStrokeWidth float64
|
||||||
// KeypointRadius represents keypoints circle radius
|
// KeypointRadius represents keypoints circle radius
|
||||||
KeypointRadius float64
|
KeypointRadius float64
|
||||||
|
// KeypointColor represents keypoint color
|
||||||
|
KeypointColor string
|
||||||
|
// LabelColor string
|
||||||
|
LabelColor string
|
||||||
|
// Font
|
||||||
|
Font *common.Font
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new Drawer
|
// New returns a new Drawer
|
||||||
@@ -27,6 +35,8 @@ func New(options ...Option) *Drawer {
|
|||||||
BorderStrokeWidth: DefaultBorderStrokeWidth,
|
BorderStrokeWidth: DefaultBorderStrokeWidth,
|
||||||
KeypointStrokeWidth: DefaultKeypointStrokeWidth,
|
KeypointStrokeWidth: DefaultKeypointStrokeWidth,
|
||||||
KeypointRadius: DefaultKeypointRadius,
|
KeypointRadius: DefaultKeypointRadius,
|
||||||
|
KeypointColor: DefaultKeypointColor,
|
||||||
|
LabelColor: DefaultLabelColor,
|
||||||
}
|
}
|
||||||
for _, opt := range options {
|
for _, opt := range options {
|
||||||
opt.apply(d)
|
opt.apply(d)
|
||||||
@@ -42,8 +52,6 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
|
|||||||
gc := draw2dimg.NewGraphicContext(out)
|
gc := draw2dimg.NewGraphicContext(out)
|
||||||
gc.DrawImage(img)
|
gc.DrawImage(img)
|
||||||
for _, roi := range rois {
|
for _, roi := range rois {
|
||||||
if drawBorder {
|
|
||||||
// draw rect
|
|
||||||
rect := common.Rect(
|
rect := common.Rect(
|
||||||
roi.Rect.X*imgW,
|
roi.Rect.X*imgW,
|
||||||
roi.Rect.Y*imgH,
|
roi.Rect.Y*imgH,
|
||||||
@@ -51,6 +59,8 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
|
|||||||
roi.Rect.Height*imgH,
|
roi.Rect.Height*imgH,
|
||||||
)
|
)
|
||||||
borderColor := d.BorderColor
|
borderColor := d.BorderColor
|
||||||
|
if drawBorder {
|
||||||
|
// draw rect
|
||||||
common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
|
common.DrawRectangle(gc, rect, borderColor, "", d.BorderStrokeWidth)
|
||||||
}
|
}
|
||||||
l := len(roi.Keypoints)
|
l := len(roi.Keypoints)
|
||||||
@@ -95,6 +105,115 @@ func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool
|
|||||||
poseColor := PoseColors[colorIdx]
|
poseColor := PoseColors[colorIdx]
|
||||||
common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
|
common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
|
||||||
}
|
}
|
||||||
|
// draw name
|
||||||
|
if roi.Name != "" {
|
||||||
|
common.DrawLabelInWidth(gc, d.Font, roi.Name, common.Pt(rect.X, rect.MaxY()), d.LabelColor, borderColor, rect.Width)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// DrawPalm draw PalmObject
|
||||||
|
func (d *Drawer) DrawPalm(img image.Image, rois []common.PalmObject) image.Image {
|
||||||
|
imgW := float64(img.Bounds().Dx())
|
||||||
|
imgH := float64(img.Bounds().Dy())
|
||||||
|
out := image.NewRGBA(img.Bounds())
|
||||||
|
gc := draw2dimg.NewGraphicContext(out)
|
||||||
|
gc.DrawImage(img)
|
||||||
|
for _, roi := range rois {
|
||||||
|
gc.SetLineWidth(d.BorderStrokeWidth)
|
||||||
|
gc.SetStrokeColor(common.ColorFromHex(d.BorderColor))
|
||||||
|
gc.BeginPath()
|
||||||
|
for idx, pt := range roi.Rect {
|
||||||
|
gc.MoveTo(pt.X*imgW, pt.Y*imgH)
|
||||||
|
if idx == len(roi.Rect)-1 {
|
||||||
|
gc.LineTo(roi.Rect[0].X*imgW, roi.Rect[0].Y*imgH)
|
||||||
|
} else {
|
||||||
|
gc.LineTo(roi.Rect[idx+1].X*imgW, roi.Rect[idx+1].Y*imgH)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gc.Close()
|
||||||
|
gc.Stroke()
|
||||||
|
|
||||||
|
l := len(roi.Skeleton)
|
||||||
|
if l == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// draw skeleton
|
||||||
|
for idx := range roi.Skeleton[:l-1] {
|
||||||
|
var (
|
||||||
|
p0 common.Point
|
||||||
|
p1 common.Point
|
||||||
|
poseColor = PoseColors[idx/4]
|
||||||
|
)
|
||||||
|
gc.SetStrokeColor(common.ColorFromHex(poseColor))
|
||||||
|
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
|
||||||
|
p0 = roi.Skeleton[0]
|
||||||
|
p1 = roi.Skeleton[idx]
|
||||||
|
gc.BeginPath()
|
||||||
|
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
|
||||||
|
gc.LineTo(p1.X*imgW, p1.Y*imgH)
|
||||||
|
gc.Close()
|
||||||
|
gc.Stroke()
|
||||||
|
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
p0 = roi.Skeleton[idx]
|
||||||
|
p1 = roi.Skeleton[idx+1]
|
||||||
|
gc.BeginPath()
|
||||||
|
gc.MoveTo(p0.X*imgW, p0.Y*imgH)
|
||||||
|
gc.LineTo(p1.X*imgW, p1.Y*imgH)
|
||||||
|
gc.Close()
|
||||||
|
gc.Stroke()
|
||||||
|
}
|
||||||
|
for _, pt := range roi.Landmarks {
|
||||||
|
common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, d.KeypointColor, "", d.KeypointStrokeWidth)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// DrawPalm3D draw 3d PalmObject
|
||||||
|
func (d *Drawer) DrawPalm3D(roi common.PalmObject, size float64, bg string) image.Image {
|
||||||
|
out := image.NewRGBA(image.Rect(0, 0, int(size), int(size)))
|
||||||
|
gc := draw2dimg.NewGraphicContext(out)
|
||||||
|
l := len(roi.Skeleton3d)
|
||||||
|
if l == 0 {
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
if bg != "" {
|
||||||
|
bgColor := common.ColorFromHex(bg)
|
||||||
|
gc.SetFillColor(bgColor)
|
||||||
|
draw2dkit.Rectangle(gc, 0, 0, size, size)
|
||||||
|
gc.Fill()
|
||||||
|
gc.SetFillColor(color.Transparent)
|
||||||
|
}
|
||||||
|
// draw skeleton3d
|
||||||
|
for idx := range roi.Skeleton3d[:l-1] {
|
||||||
|
var (
|
||||||
|
p0 common.Point3d
|
||||||
|
p1 common.Point3d
|
||||||
|
poseColor = PoseColors[idx/4]
|
||||||
|
)
|
||||||
|
gc.SetStrokeColor(common.ColorFromHex(poseColor))
|
||||||
|
if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
|
||||||
|
p0 = roi.Skeleton3d[0]
|
||||||
|
p1 = roi.Skeleton3d[idx]
|
||||||
|
gc.BeginPath()
|
||||||
|
gc.MoveTo(p0.X*size, p0.Y*size)
|
||||||
|
gc.LineTo(p1.X*size, p1.Y*size)
|
||||||
|
gc.Close()
|
||||||
|
gc.Stroke()
|
||||||
|
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
p0 = roi.Skeleton3d[idx]
|
||||||
|
p1 = roi.Skeleton3d[idx+1]
|
||||||
|
gc.BeginPath()
|
||||||
|
gc.MoveTo(p0.X*size, p0.Y*size)
|
||||||
|
gc.LineTo(p1.X*size, p1.Y*size)
|
||||||
|
gc.Close()
|
||||||
|
gc.Stroke()
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
@@ -1,5 +1,9 @@
|
|||||||
package drawer
|
package drawer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/bububa/openvision/go/common"
|
||||||
|
)
|
||||||
|
|
||||||
// Option represents Drawer option interface
|
// Option represents Drawer option interface
|
||||||
type Option interface {
|
type Option interface {
|
||||||
apply(*Drawer)
|
apply(*Drawer)
|
||||||
@@ -38,3 +42,17 @@ func WithKeypointStrokeWidth(w float64) Option {
|
|||||||
d.KeypointStrokeWidth = w
|
d.KeypointStrokeWidth = w
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithKeypointColor set Drawer KeypointColor
|
||||||
|
func WithKeypointColor(color string) Option {
|
||||||
|
return optionFunc(func(d *Drawer) {
|
||||||
|
d.KeypointColor = color
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithFont set Drawer Font
|
||||||
|
func WithFont(font *common.Font) Option {
|
||||||
|
return optionFunc(func(d *Drawer) {
|
||||||
|
d.Font = font
|
||||||
|
})
|
||||||
|
}
|
||||||
|
11
go/hand/pose3d/cgo.go
Normal file
11
go/hand/pose3d/cgo.go
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
// +build !vulkan
|
||||||
|
|
||||||
|
package pose3d
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||||
|
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
|
||||||
|
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
|
||||||
|
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
|
||||||
|
*/
|
||||||
|
import "C"
|
11
go/hand/pose3d/cgo_vulkan.go
Normal file
11
go/hand/pose3d/cgo_vulkan.go
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
// +build vulkan
|
||||||
|
|
||||||
|
package pose3d
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo CXXFLAGS: --std=c++11 -fopenmp
|
||||||
|
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
|
||||||
|
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
|
||||||
|
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
|
||||||
|
*/
|
||||||
|
import "C"
|
2
go/hand/pose3d/doc.go
Normal file
2
go/hand/pose3d/doc.go
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
// Package pose hand 3d pose estimator
|
||||||
|
package pose3d
|
62
go/hand/pose3d/mediapipe.go
Normal file
62
go/hand/pose3d/mediapipe.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
package pose3d
|
||||||
|
|
||||||
|
/*
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "openvision/common/common.h"
|
||||||
|
#include "openvision/hand/pose3d.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
openvision "github.com/bububa/openvision/go"
|
||||||
|
"github.com/bububa/openvision/go/common"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Mediapipe represents mediapipe estimator interface
|
||||||
|
type Mediapipe struct {
|
||||||
|
d C.IHandPose3DEstimator
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMediapipe() *Mediapipe {
|
||||||
|
return &Mediapipe{
|
||||||
|
d: C.new_mediapipe_hand(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Mediapipe) Destroy() {
|
||||||
|
C.destroy_mediapipe_hand(m.d)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Mediapipe) LoadModel(palmPath string, handPath string) error {
|
||||||
|
cPalm := C.CString(palmPath)
|
||||||
|
defer C.free(unsafe.Pointer(cPalm))
|
||||||
|
cHand := C.CString(handPath)
|
||||||
|
defer C.free(unsafe.Pointer(cHand))
|
||||||
|
retCode := C.mediapipe_hand_load_model(m.d, cPalm, cHand)
|
||||||
|
if retCode != 0 {
|
||||||
|
return openvision.LoadModelError(int(retCode))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect detect hand 3d pose
|
||||||
|
func (m *Mediapipe) Detect(img *common.Image) ([]common.PalmObject, error) {
|
||||||
|
imgWidth := img.WidthF64()
|
||||||
|
imgHeight := img.HeightF64()
|
||||||
|
data := img.Bytes()
|
||||||
|
cObjs := common.NewCPalmObjectVector()
|
||||||
|
defer common.FreeCPalmObjectVector(cObjs)
|
||||||
|
errCode := C.mediapipe_hand_detect(
|
||||||
|
m.d,
|
||||||
|
(*C.uchar)(unsafe.Pointer(&data[0])),
|
||||||
|
C.int(imgWidth), C.int(imgHeight),
|
||||||
|
(*C.PalmObjectVector)(unsafe.Pointer(cObjs)),
|
||||||
|
)
|
||||||
|
if errCode != 0 {
|
||||||
|
return nil, openvision.DetectHandError(int(errCode))
|
||||||
|
}
|
||||||
|
return common.GoPalmObjectVector(cObjs, imgWidth, imgHeight), nil
|
||||||
|
}
|
@@ -71,6 +71,7 @@ target_include_directories(openvision
|
|||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
|
||||||
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d>
|
||||||
|
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
|
||||||
@@ -109,6 +110,7 @@ file(COPY
|
|||||||
file(COPY
|
file(COPY
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
|
${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
|
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose3d.h
|
||||||
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
|
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -58,6 +58,13 @@ void FreePoint2fVector(Point2fVector *p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FreePoint3dVector(Point3dVector *p) {
|
||||||
|
if (p->points != NULL) {
|
||||||
|
free(p->points);
|
||||||
|
p->points = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
|
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val) {
|
||||||
if (p->points == NULL || i >= p->length) {
|
if (p->points == NULL || i >= p->length) {
|
||||||
return;
|
return;
|
||||||
|
@@ -11,6 +11,7 @@ typedef ov::Size Size;
|
|||||||
typedef ov::Size2f Size2f;
|
typedef ov::Size2f Size2f;
|
||||||
typedef ov::Point Point;
|
typedef ov::Point Point;
|
||||||
typedef ov::Point2f Point2f;
|
typedef ov::Point2f Point2f;
|
||||||
|
typedef ov::Point3d Point3d;
|
||||||
typedef ov::Rect Rect;
|
typedef ov::Rect Rect;
|
||||||
typedef ov::Keypoint Keypoint;
|
typedef ov::Keypoint Keypoint;
|
||||||
#else
|
#else
|
||||||
@@ -39,6 +40,11 @@ typedef struct Point2f {
|
|||||||
float y;
|
float y;
|
||||||
} Point2f;
|
} Point2f;
|
||||||
|
|
||||||
|
typedef struct Point3d {
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
float z;
|
||||||
|
} Point3d;
|
||||||
|
|
||||||
// Wrapper for an individual cv::Rect
|
// Wrapper for an individual cv::Rect
|
||||||
typedef struct Rect {
|
typedef struct Rect {
|
||||||
@@ -48,85 +54,90 @@ typedef struct Rect {
|
|||||||
int height;
|
int height;
|
||||||
} Rect;
|
} Rect;
|
||||||
|
|
||||||
|
|
||||||
typedef struct Keypoint {
|
typedef struct Keypoint {
|
||||||
Point2f p;
|
Point2f p;
|
||||||
float score;
|
float score;
|
||||||
int id;
|
int id;
|
||||||
} Keypoint;
|
} Keypoint;
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef void* IEstimator;
|
typedef void *IEstimator;
|
||||||
|
|
||||||
int get_gpu_count();
|
int get_gpu_count();
|
||||||
int create_gpu_instance();
|
int create_gpu_instance();
|
||||||
void destroy_gpu_instance();
|
void destroy_gpu_instance();
|
||||||
int get_big_cpu_count();
|
int get_big_cpu_count();
|
||||||
void set_omp_num_threads(int n);
|
void set_omp_num_threads(int n);
|
||||||
int load_model(IEstimator e, const char* root_path);
|
int load_model(IEstimator e, const char *root_path);
|
||||||
void destroy_estimator(IEstimator e);
|
void destroy_estimator(IEstimator e);
|
||||||
void set_num_threads(IEstimator e, int n);
|
void set_num_threads(IEstimator e, int n);
|
||||||
void set_light_mode(IEstimator e, bool mode);
|
void set_light_mode(IEstimator e, bool mode);
|
||||||
|
|
||||||
typedef struct Point2fVector {
|
typedef struct Point2fVector {
|
||||||
Point2f* points;
|
Point2f *points;
|
||||||
int length;
|
int length;
|
||||||
} Point2fVector;
|
} Point2fVector;
|
||||||
|
|
||||||
void FreePoint2fVector(Point2fVector *p);
|
void FreePoint2fVector(Point2fVector *p);
|
||||||
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val);
|
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f *val);
|
||||||
|
|
||||||
|
typedef struct Point3dVector {
|
||||||
|
Point3d *points;
|
||||||
|
int length;
|
||||||
|
} Point3dVector;
|
||||||
|
|
||||||
|
void FreePoint3dVector(Point3dVector *p);
|
||||||
|
|
||||||
typedef struct RectVector {
|
typedef struct RectVector {
|
||||||
Rect* rects;
|
Rect *rects;
|
||||||
int length;
|
int length;
|
||||||
} RectVector;
|
} RectVector;
|
||||||
|
|
||||||
void FreeRectVector(RectVector *p);
|
void FreeRectVector(RectVector *p);
|
||||||
|
|
||||||
typedef struct FloatVector {
|
typedef struct FloatVector {
|
||||||
float* values;
|
float *values;
|
||||||
int length;
|
int length;
|
||||||
} FloatVector;
|
} FloatVector;
|
||||||
|
|
||||||
void FreeFloatVector(FloatVector *p);
|
void FreeFloatVector(FloatVector *p);
|
||||||
|
|
||||||
typedef struct Bytes {
|
typedef struct Bytes {
|
||||||
unsigned char* values;
|
unsigned char *values;
|
||||||
int length;
|
int length;
|
||||||
} Bytes;
|
} Bytes;
|
||||||
|
|
||||||
void FreeBytes(Bytes *p);
|
void FreeBytes(Bytes *p);
|
||||||
|
|
||||||
typedef struct KeypointVector {
|
typedef struct KeypointVector {
|
||||||
Keypoint* points;
|
Keypoint *points;
|
||||||
int length;
|
int length;
|
||||||
} KeypointVector;
|
} KeypointVector;
|
||||||
|
|
||||||
void FreeKeypointVector(KeypointVector *p);
|
void FreeKeypointVector(KeypointVector *p);
|
||||||
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
|
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint *val);
|
||||||
|
|
||||||
typedef struct ImageC {
|
typedef struct ImageC {
|
||||||
unsigned char* data;
|
unsigned char *data;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
int channels;
|
int channels;
|
||||||
} Image;
|
} Image;
|
||||||
|
|
||||||
void FreeImage(Image* p);
|
void FreeImage(Image *p);
|
||||||
|
|
||||||
typedef struct ObjectInfoC {
|
typedef struct ObjectInfoC {
|
||||||
Rect rect;
|
Rect rect;
|
||||||
float score;
|
float score;
|
||||||
int label;
|
int label;
|
||||||
KeypointVector* pts;
|
KeypointVector *pts;
|
||||||
} ObjectInfo;
|
} ObjectInfo;
|
||||||
|
|
||||||
void FreeObjectInfo(ObjectInfo *p);
|
void FreeObjectInfo(ObjectInfo *p);
|
||||||
|
|
||||||
typedef struct ObjectInfoVector {
|
typedef struct ObjectInfoVector {
|
||||||
ObjectInfo* items;
|
ObjectInfo *items;
|
||||||
int length;
|
int length;
|
||||||
} ObjectInfoVector;
|
} ObjectInfoVector;
|
||||||
|
|
||||||
|
@@ -76,6 +76,13 @@ struct Point2f {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Point3d {
|
||||||
|
float x;
|
||||||
|
float y;
|
||||||
|
float z;
|
||||||
|
Point3d(float _x = 0, float _y = 0, float _z = 0) : x(_x), y(_y), z(_z) {}
|
||||||
|
};
|
||||||
|
|
||||||
// Wrapper for an individual cv::Rect
|
// Wrapper for an individual cv::Rect
|
||||||
struct Rect {
|
struct Rect {
|
||||||
int x;
|
int x;
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
#include "nanodet.hpp"
|
#include "nanodet.hpp"
|
||||||
#include <string>
|
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#ifdef OV_VULKAN
|
#ifdef OV_VULKAN
|
||||||
#include "gpu.h"
|
#include "gpu.h"
|
||||||
@@ -8,53 +8,47 @@
|
|||||||
|
|
||||||
namespace ovhand {
|
namespace ovhand {
|
||||||
|
|
||||||
static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
|
static void generate_nanodet_proposals(const ncnn::Mat &cls_pred,
|
||||||
{
|
const ncnn::Mat &dis_pred, int stride,
|
||||||
|
const ncnn::Mat &in_pad,
|
||||||
|
float prob_threshold,
|
||||||
|
std::vector<ov::ObjectInfo> &objects) {
|
||||||
|
|
||||||
const int num_grid = cls_pred.h;
|
const int num_grid = cls_pred.h;
|
||||||
|
|
||||||
int num_grid_x;
|
int num_grid_x;
|
||||||
int num_grid_y;
|
int num_grid_y;
|
||||||
if (in_pad.w > in_pad.h)
|
if (in_pad.w > in_pad.h) {
|
||||||
{
|
|
||||||
num_grid_x = in_pad.w / stride;
|
num_grid_x = in_pad.w / stride;
|
||||||
num_grid_y = num_grid / num_grid_x;
|
num_grid_y = num_grid / num_grid_x;
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
num_grid_y = in_pad.h / stride;
|
num_grid_y = in_pad.h / stride;
|
||||||
num_grid_x = num_grid / num_grid_y;
|
num_grid_x = num_grid / num_grid_y;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int num_class = cls_pred.w;
|
const int num_class = cls_pred.w;
|
||||||
const int reg_max_1 = dis_pred.w / 4;
|
const int reg_max_1 = dis_pred.w / 4;
|
||||||
//__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
|
|
||||||
//__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
|
for (int i = 0; i < num_grid_y; i++) {
|
||||||
for (int i = 0; i < num_grid_y; i++)
|
for (int j = 0; j < num_grid_x; j++) {
|
||||||
{
|
|
||||||
for (int j = 0; j < num_grid_x; j++)
|
|
||||||
{
|
|
||||||
const int idx = i * num_grid_x + j;
|
const int idx = i * num_grid_x + j;
|
||||||
|
|
||||||
const float* scores = cls_pred.row(idx);
|
const float *scores = cls_pred.row(idx);
|
||||||
|
|
||||||
// find label with max score
|
// find label with max score
|
||||||
int label = -1;
|
int label = -1;
|
||||||
float score = -FLT_MAX;
|
float score = -FLT_MAX;
|
||||||
for (int k = 0; k < num_class; k++)
|
for (int k = 0; k < num_class; k++) {
|
||||||
{
|
if (scores[k] > score) {
|
||||||
if (scores[k] > score)
|
|
||||||
{
|
|
||||||
label = k;
|
label = k;
|
||||||
score = scores[k];
|
score = scores[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (score >= prob_threshold)
|
if (score >= prob_threshold) {
|
||||||
|
ncnn::Mat bbox_pred(reg_max_1, 4, (void *)dis_pred.row(idx));
|
||||||
{
|
{
|
||||||
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
|
ncnn::Layer *softmax = ncnn::create_layer("Softmax");
|
||||||
{
|
|
||||||
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
|
|
||||||
|
|
||||||
ncnn::ParamDict pd;
|
ncnn::ParamDict pd;
|
||||||
pd.set(0, 1); // axis
|
pd.set(0, 1); // axis
|
||||||
@@ -62,7 +56,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
|
|||||||
softmax->load_param(pd);
|
softmax->load_param(pd);
|
||||||
|
|
||||||
ncnn::Option opt;
|
ncnn::Option opt;
|
||||||
// opt.num_threads = 1;
|
opt.num_threads = 1;
|
||||||
opt.use_packing_layout = false;
|
opt.use_packing_layout = false;
|
||||||
|
|
||||||
softmax->create_pipeline(opt);
|
softmax->create_pipeline(opt);
|
||||||
@@ -75,15 +69,12 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
float pred_ltrb[4];
|
float pred_ltrb[4];
|
||||||
for (int k = 0; k < 4; k++)
|
for (int k = 0; k < 4; k++) {
|
||||||
{
|
|
||||||
float dis = 0.f;
|
float dis = 0.f;
|
||||||
const float* dis_after_sm = bbox_pred.row(k);
|
const float *dis_after_sm = bbox_pred.row(k);
|
||||||
for (int l = 0; l < reg_max_1; l++)
|
for (int l = 0; l < reg_max_1; l++) {
|
||||||
{
|
|
||||||
dis += l * dis_after_sm[l];
|
dis += l * dis_after_sm[l];
|
||||||
}
|
}
|
||||||
|
|
||||||
pred_ltrb[k] = dis * stride;
|
pred_ltrb[k] = dis * stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -101,7 +92,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
|
|||||||
obj.rect.width = x1 - x0;
|
obj.rect.width = x1 - x0;
|
||||||
obj.rect.height = y1 - y0;
|
obj.rect.height = y1 - y0;
|
||||||
obj.label = label;
|
obj.label = label;
|
||||||
obj.score= score;
|
obj.score = score;
|
||||||
|
|
||||||
objects.push_back(obj);
|
objects.push_back(obj);
|
||||||
}
|
}
|
||||||
@@ -109,13 +100,12 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Nanodet::Detect(const unsigned char* rgbdata,
|
int Nanodet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
|
||||||
int img_width, int img_height,
|
std::vector<ov::ObjectInfo> &rois) {
|
||||||
std::vector<ov::ObjectInfo>& rois) {
|
|
||||||
if (!initialized_) {
|
if (!initialized_) {
|
||||||
return 10000;
|
return 10000;
|
||||||
}
|
}
|
||||||
if (rgbdata == 0){
|
if (rgbdata == 0) {
|
||||||
return 10001;
|
return 10001;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -132,13 +122,15 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
w = w * scale;
|
w = w * scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
|
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
|
||||||
|
img_width, img_height, w, h);
|
||||||
|
|
||||||
// pad to target_size rectangle
|
// pad to target_size rectangle
|
||||||
float wpad = 320-w;//(w + 31) / 32 * 32 - w;
|
float wpad = 320 - w; //(w + 31) / 32 * 32 - w;
|
||||||
float hpad = 320-h;//(h + 31) / 32 * 32 - h;
|
float hpad = 320 - h; //(h + 31) / 32 * 32 - h;
|
||||||
ncnn::Mat in_pad;
|
ncnn::Mat in_pad;
|
||||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
|
||||||
|
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||||
|
|
||||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||||
|
|
||||||
@@ -156,7 +148,8 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
ex.extract("dis_pred_stride_8", dis_pred);
|
ex.extract("dis_pred_stride_8", dis_pred);
|
||||||
|
|
||||||
std::vector<ov::ObjectInfo> objects8;
|
std::vector<ov::ObjectInfo> objects8;
|
||||||
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
|
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold,
|
||||||
|
objects8);
|
||||||
|
|
||||||
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
|
||||||
}
|
}
|
||||||
@@ -169,7 +162,8 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
ex.extract("dis_pred_stride_16", dis_pred);
|
ex.extract("dis_pred_stride_16", dis_pred);
|
||||||
|
|
||||||
std::vector<ov::ObjectInfo> objects16;
|
std::vector<ov::ObjectInfo> objects16;
|
||||||
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
|
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold,
|
||||||
|
objects16);
|
||||||
|
|
||||||
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
|
||||||
}
|
}
|
||||||
@@ -182,7 +176,8 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
ex.extract("dis_pred_stride_32", dis_pred);
|
ex.extract("dis_pred_stride_32", dis_pred);
|
||||||
|
|
||||||
std::vector<ov::ObjectInfo> objects32;
|
std::vector<ov::ObjectInfo> objects32;
|
||||||
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
|
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold,
|
||||||
|
objects32);
|
||||||
|
|
||||||
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
|
||||||
}
|
}
|
||||||
@@ -197,8 +192,7 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
int count = picked.size();
|
int count = picked.size();
|
||||||
rois.resize(count);
|
rois.resize(count);
|
||||||
|
|
||||||
for (int i = 0; i < count; i++)
|
for (int i = 0; i < count; i++) {
|
||||||
{
|
|
||||||
ov::ObjectInfo roi = proposals[picked[i]];
|
ov::ObjectInfo roi = proposals[picked[i]];
|
||||||
|
|
||||||
// adjust offset to original unpadded
|
// adjust offset to original unpadded
|
||||||
@@ -221,14 +215,12 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
|||||||
rois[i] = roi;
|
rois[i] = roi;
|
||||||
}
|
}
|
||||||
// sort objects by area
|
// sort objects by area
|
||||||
struct
|
struct {
|
||||||
{
|
bool operator()(const ov::ObjectInfo &a, const ov::ObjectInfo &b) const {
|
||||||
bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
|
|
||||||
{
|
|
||||||
return a.rect.area() > b.rect.area();
|
return a.rect.area() > b.rect.area();
|
||||||
}
|
}
|
||||||
} objects_area_greater;
|
} objects_area_greater;
|
||||||
std::sort(rois.begin(), rois.end(), objects_area_greater);
|
std::sort(rois.begin(), rois.end(), objects_area_greater);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
} // namespace ovhand
|
||||||
|
37
src/hand/pose3d.h
Normal file
37
src/hand/pose3d.h
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#ifndef _HAND_POSE3D_C_H_
|
||||||
|
#define _HAND_POSE3D_C_H_
|
||||||
|
|
||||||
|
#include "../common/common.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
typedef struct PalmObject {
|
||||||
|
float score;
|
||||||
|
float rotation;
|
||||||
|
Point2fVector *rect;
|
||||||
|
Point2fVector *landmarks;
|
||||||
|
Point2fVector *skeleton;
|
||||||
|
Point3dVector *skeleton3d;
|
||||||
|
} PalmObject;
|
||||||
|
|
||||||
|
typedef struct PalmObjectVector {
|
||||||
|
PalmObject *items;
|
||||||
|
int length;
|
||||||
|
} PalmObjectVector;
|
||||||
|
|
||||||
|
void FreePalmObject(PalmObject *obj);
|
||||||
|
void FreePalmObjectVector(PalmObjectVector *vec);
|
||||||
|
|
||||||
|
typedef void *IHandPose3DEstimator;
|
||||||
|
IHandPose3DEstimator new_mediapipe_hand();
|
||||||
|
void destroy_mediapipe_hand(IHandPose3DEstimator d);
|
||||||
|
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
|
||||||
|
const char *hand_path);
|
||||||
|
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
|
||||||
|
int img_width, int img_height, PalmObjectVector *vec);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // !_HAND_POSE3D_C_H_
|
106
src/hand/pose3d/estimator.cpp
Normal file
106
src/hand/pose3d/estimator.cpp
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
#include "../pose3d.h"
|
||||||
|
#include "mediapipe/mediapipe.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
void FreePalmObject(PalmObject *obj) {
|
||||||
|
if (obj->rect != NULL) {
|
||||||
|
FreePoint2fVector(obj->rect);
|
||||||
|
obj->rect = NULL;
|
||||||
|
}
|
||||||
|
if (obj->skeleton != NULL) {
|
||||||
|
FreePoint2fVector(obj->skeleton);
|
||||||
|
obj->skeleton = NULL;
|
||||||
|
}
|
||||||
|
if (obj->skeleton3d != NULL) {
|
||||||
|
FreePoint3dVector(obj->skeleton3d);
|
||||||
|
obj->skeleton3d = NULL;
|
||||||
|
}
|
||||||
|
if (obj->landmarks != NULL) {
|
||||||
|
FreePoint2fVector(obj->landmarks);
|
||||||
|
obj->landmarks = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FreePalmObjectVector(PalmObjectVector *vec) {
|
||||||
|
if (vec->items != NULL) {
|
||||||
|
for (int i = 0; i < vec->length; i++) {
|
||||||
|
FreePalmObject(&vec->items[i]);
|
||||||
|
}
|
||||||
|
free(vec->items);
|
||||||
|
vec->items = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IHandPose3DEstimator new_mediapipe_hand() {
|
||||||
|
return new ovhand3d::MediapipeHand();
|
||||||
|
}
|
||||||
|
|
||||||
|
void destroy_mediapipe_hand(IHandPose3DEstimator d) {
|
||||||
|
delete static_cast<ovhand3d::MediapipeHand *>(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mediapipe_hand_load_model(IHandPose3DEstimator d, const char *palm_path,
|
||||||
|
const char *hand_path) {
|
||||||
|
return static_cast<ovhand3d::MediapipeHand *>(d)->LoadModel(palm_path,
|
||||||
|
hand_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata,
|
||||||
|
int img_width, int img_height,
|
||||||
|
PalmObjectVector *objects) {
|
||||||
|
std::vector<ovhand3d::PalmObject> objs;
|
||||||
|
|
||||||
|
int ret = static_cast<ovhand3d::MediapipeHand *>(d)->Detect(
|
||||||
|
rgbdata, img_width, img_height, objs);
|
||||||
|
if (ret != 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
const size_t total_objs = objs.size();
|
||||||
|
objects->length = total_objs;
|
||||||
|
if (total_objs == 0) {
|
||||||
|
objects->items = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject));
|
||||||
|
for (size_t i = 0; i < total_objs; ++i) {
|
||||||
|
objects->items[i].score = objs[i].score;
|
||||||
|
objects->items[i].rotation = objs[i].rotation;
|
||||||
|
objects->items[i].rect = (Point2fVector *)malloc(sizeof(Point2fVector));
|
||||||
|
objects->items[i].rect->length = 4;
|
||||||
|
objects->items[i].rect->points = (Point2f *)malloc(4 * sizeof(Point2f));
|
||||||
|
for (size_t j = 0; j < 4; ++j) {
|
||||||
|
objects->items[i].rect->points[j] = objs[i].hand_pos[j];
|
||||||
|
}
|
||||||
|
objects->items[i].landmarks =
|
||||||
|
(Point2fVector *)malloc(sizeof(Point2fVector));
|
||||||
|
objects->items[i].landmarks->length = 7;
|
||||||
|
objects->items[i].landmarks->points =
|
||||||
|
(Point2f *)malloc(4 * sizeof(Point2f));
|
||||||
|
for (size_t j = 0; j < 7; ++j) {
|
||||||
|
objects->items[i].landmarks->points[j] = objs[i].landmarks[j];
|
||||||
|
}
|
||||||
|
const size_t total_skeleton = objs[i].skeleton.size();
|
||||||
|
if (total_skeleton == 0) {
|
||||||
|
objects->items[i].skeleton = NULL;
|
||||||
|
objects->items[i].skeleton3d = NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector));
|
||||||
|
objects->items[i].skeleton->length = total_skeleton;
|
||||||
|
objects->items[i].skeleton->points =
|
||||||
|
(Point2f *)malloc(total_skeleton * sizeof(Point2f));
|
||||||
|
objects->items[i].skeleton3d =
|
||||||
|
(Point3dVector *)malloc(sizeof(Point3dVector));
|
||||||
|
objects->items[i].skeleton3d->length = total_skeleton;
|
||||||
|
objects->items[i].skeleton3d->points =
|
||||||
|
(Point3d *)malloc(total_skeleton * sizeof(Point3d));
|
||||||
|
for (size_t j = 0; j < total_skeleton; ++j) {
|
||||||
|
objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x;
|
||||||
|
objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y;
|
||||||
|
objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x;
|
||||||
|
objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y;
|
||||||
|
objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
534
src/hand/pose3d/mediapipe/mediapipe.cpp
Normal file
534
src/hand/pose3d/mediapipe/mediapipe.cpp
Normal file
@@ -0,0 +1,534 @@
|
|||||||
|
#include "mediapipe.hpp"
|
||||||
|
#include "mat.h"
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
namespace ovhand3d {
|
||||||
|
|
||||||
|
static float calculate_scale(float min_scale, float max_scale, int stride_index,
|
||||||
|
int num_strides) {
|
||||||
|
if (num_strides == 1)
|
||||||
|
return (min_scale + max_scale) * 0.5f;
|
||||||
|
else
|
||||||
|
return min_scale +
|
||||||
|
(max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void generate_anchors(std::vector<Anchor> &anchors,
|
||||||
|
const AnchorsParams &anchor_params) {
|
||||||
|
int layer_id = 0;
|
||||||
|
for (int layer_id = 0; layer_id < anchor_params.strides.size();) {
|
||||||
|
std::vector<float> anchor_height;
|
||||||
|
std::vector<float> anchor_width;
|
||||||
|
std::vector<float> aspect_ratios;
|
||||||
|
std::vector<float> scales;
|
||||||
|
|
||||||
|
int last_same_stride_layer = layer_id;
|
||||||
|
while (last_same_stride_layer < (int)anchor_params.strides.size() &&
|
||||||
|
anchor_params.strides[last_same_stride_layer] ==
|
||||||
|
anchor_params.strides[layer_id]) {
|
||||||
|
const float scale =
|
||||||
|
calculate_scale(anchor_params.min_scale, anchor_params.max_scale,
|
||||||
|
last_same_stride_layer, anchor_params.strides.size());
|
||||||
|
{
|
||||||
|
for (int aspect_ratio_id = 0;
|
||||||
|
aspect_ratio_id < (int)anchor_params.aspect_ratios.size();
|
||||||
|
aspect_ratio_id++) {
|
||||||
|
aspect_ratios.push_back(anchor_params.aspect_ratios[aspect_ratio_id]);
|
||||||
|
scales.push_back(scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
const float scale_next =
|
||||||
|
last_same_stride_layer == (int)anchor_params.strides.size() - 1
|
||||||
|
? 1.0f
|
||||||
|
: calculate_scale(
|
||||||
|
anchor_params.min_scale, anchor_params.max_scale,
|
||||||
|
last_same_stride_layer + 1, anchor_params.strides.size());
|
||||||
|
scales.push_back(sqrt(scale * scale_next));
|
||||||
|
aspect_ratios.push_back(1.0);
|
||||||
|
}
|
||||||
|
last_same_stride_layer++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < (int)aspect_ratios.size(); ++i) {
|
||||||
|
const float ratio_sqrts = sqrt(aspect_ratios[i]);
|
||||||
|
anchor_height.push_back(scales[i] / ratio_sqrts);
|
||||||
|
anchor_width.push_back(scales[i] * ratio_sqrts);
|
||||||
|
}
|
||||||
|
|
||||||
|
int feature_map_height = 0;
|
||||||
|
int feature_map_width = 0;
|
||||||
|
const int stride = anchor_params.strides[layer_id];
|
||||||
|
feature_map_height = ceil(1.0f * anchor_params.input_size_height / stride);
|
||||||
|
feature_map_width = ceil(1.0f * anchor_params.input_size_width / stride);
|
||||||
|
|
||||||
|
for (int y = 0; y < feature_map_height; ++y) {
|
||||||
|
for (int x = 0; x < feature_map_width; ++x) {
|
||||||
|
for (int anchor_id = 0; anchor_id < (int)anchor_height.size();
|
||||||
|
++anchor_id) {
|
||||||
|
const float x_center =
|
||||||
|
(x + anchor_params.anchor_offset_x) * 1.0f / feature_map_width;
|
||||||
|
const float y_center =
|
||||||
|
(y + anchor_params.anchor_offset_y) * 1.0f / feature_map_height;
|
||||||
|
|
||||||
|
Anchor new_anchor;
|
||||||
|
new_anchor.x_center = x_center;
|
||||||
|
new_anchor.y_center = y_center;
|
||||||
|
|
||||||
|
new_anchor.w = 1.0f;
|
||||||
|
new_anchor.h = 1.0f;
|
||||||
|
|
||||||
|
anchors.push_back(new_anchor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
layer_id = last_same_stride_layer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void create_ssd_anchors(int input_w, int input_h,
|
||||||
|
std::vector<Anchor> &anchors) {
|
||||||
|
AnchorsParams anchor_options;
|
||||||
|
anchor_options.num_layers = 4;
|
||||||
|
anchor_options.min_scale = 0.1484375;
|
||||||
|
anchor_options.max_scale = 0.75;
|
||||||
|
anchor_options.input_size_height = 192;
|
||||||
|
anchor_options.input_size_width = 192;
|
||||||
|
anchor_options.anchor_offset_x = 0.5f;
|
||||||
|
anchor_options.anchor_offset_y = 0.5f;
|
||||||
|
anchor_options.strides.push_back(8);
|
||||||
|
anchor_options.strides.push_back(16);
|
||||||
|
anchor_options.strides.push_back(16);
|
||||||
|
anchor_options.strides.push_back(16);
|
||||||
|
anchor_options.aspect_ratios.push_back(1.0);
|
||||||
|
generate_anchors(anchors, anchor_options);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int decode_bounds(std::list<DetectRegion> ®ion_list,
|
||||||
|
float score_thresh, int input_img_w, int input_img_h,
|
||||||
|
float *scores_ptr, float *bboxes_ptr,
|
||||||
|
std::vector<Anchor> &anchors) {
|
||||||
|
DetectRegion region;
|
||||||
|
int i = 0;
|
||||||
|
for (auto &anchor : anchors) {
|
||||||
|
float score = ov::sigmoid(scores_ptr[i]);
|
||||||
|
|
||||||
|
if (score > score_thresh) {
|
||||||
|
float *p = bboxes_ptr + (i * 18);
|
||||||
|
|
||||||
|
float cx = p[0] / input_img_w + anchor.x_center;
|
||||||
|
float cy = p[1] / input_img_h + anchor.y_center;
|
||||||
|
float w = p[2] / input_img_w;
|
||||||
|
float h = p[3] / input_img_h;
|
||||||
|
|
||||||
|
ov::Point2f topleft, btmright;
|
||||||
|
topleft.x = cx - w * 0.5f;
|
||||||
|
topleft.y = cy - h * 0.5f;
|
||||||
|
btmright.x = cx + w * 0.5f;
|
||||||
|
btmright.y = cy + h * 0.5f;
|
||||||
|
|
||||||
|
region.score = score;
|
||||||
|
region.topleft = topleft;
|
||||||
|
region.btmright = btmright;
|
||||||
|
|
||||||
|
for (int j = 0; j < 7; j++) {
|
||||||
|
float lx = p[4 + (2 * j) + 0];
|
||||||
|
float ly = p[4 + (2 * j) + 1];
|
||||||
|
lx += anchor.x_center * input_img_w;
|
||||||
|
ly += anchor.y_center * input_img_h;
|
||||||
|
lx /= (float)input_img_w;
|
||||||
|
ly /= (float)input_img_h;
|
||||||
|
|
||||||
|
region.landmarks[j].x = lx;
|
||||||
|
region.landmarks[j].y = ly;
|
||||||
|
}
|
||||||
|
|
||||||
|
region_list.push_back(region);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float calc_intersection_over_union(DetectRegion ®ion0,
|
||||||
|
DetectRegion ®ion1) {
|
||||||
|
float sx0 = region0.topleft.x;
|
||||||
|
float sy0 = region0.topleft.y;
|
||||||
|
float ex0 = region0.btmright.x;
|
||||||
|
float ey0 = region0.btmright.y;
|
||||||
|
float sx1 = region1.topleft.x;
|
||||||
|
float sy1 = region1.topleft.y;
|
||||||
|
float ex1 = region1.btmright.x;
|
||||||
|
float ey1 = region1.btmright.y;
|
||||||
|
|
||||||
|
float xmin0 = std::min(sx0, ex0);
|
||||||
|
float ymin0 = std::min(sy0, ey0);
|
||||||
|
float xmax0 = std::max(sx0, ex0);
|
||||||
|
float ymax0 = std::max(sy0, ey0);
|
||||||
|
float xmin1 = std::min(sx1, ex1);
|
||||||
|
float ymin1 = std::min(sy1, ey1);
|
||||||
|
float xmax1 = std::max(sx1, ex1);
|
||||||
|
float ymax1 = std::max(sy1, ey1);
|
||||||
|
|
||||||
|
float area0 = (ymax0 - ymin0) * (xmax0 - xmin0);
|
||||||
|
float area1 = (ymax1 - ymin1) * (xmax1 - xmin1);
|
||||||
|
if (area0 <= 0 || area1 <= 0)
|
||||||
|
return 0.0f;
|
||||||
|
|
||||||
|
float intersect_xmin = std::max(xmin0, xmin1);
|
||||||
|
float intersect_ymin = std::max(ymin0, ymin1);
|
||||||
|
float intersect_xmax = std::min(xmax0, xmax1);
|
||||||
|
float intersect_ymax = std::min(ymax0, ymax1);
|
||||||
|
|
||||||
|
float intersect_area = std::max(intersect_ymax - intersect_ymin, 0.0f) *
|
||||||
|
std::max(intersect_xmax - intersect_xmin, 0.0f);
|
||||||
|
|
||||||
|
return intersect_area / (area0 + area1 - intersect_area);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int non_max_suppression(std::list<DetectRegion> ®ion_list,
|
||||||
|
std::list<DetectRegion> ®ion_nms_list,
|
||||||
|
float iou_thresh) {
|
||||||
|
region_list.sort([](DetectRegion &v1, DetectRegion &v2) {
|
||||||
|
return v1.score > v2.score ? true : false;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (auto itr = region_list.begin(); itr != region_list.end(); itr++) {
|
||||||
|
DetectRegion region_candidate = *itr;
|
||||||
|
|
||||||
|
int ignore_candidate = false;
|
||||||
|
for (auto itr_nms = region_nms_list.rbegin();
|
||||||
|
itr_nms != region_nms_list.rend(); itr_nms++) {
|
||||||
|
DetectRegion region_nms = *itr_nms;
|
||||||
|
|
||||||
|
float iou = calc_intersection_over_union(region_candidate, region_nms);
|
||||||
|
if (iou >= iou_thresh) {
|
||||||
|
ignore_candidate = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ignore_candidate) {
|
||||||
|
region_nms_list.push_back(region_candidate);
|
||||||
|
if (region_nms_list.size() >= 5)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float normalize_radians(float angle) {
|
||||||
|
return angle - 2 * M_PI * floor((angle - (-M_PI)) / (2 * M_PI));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compute_rotation(DetectRegion ®ion) {
|
||||||
|
float x0 = region.landmarks[0].x;
|
||||||
|
float y0 = region.landmarks[0].y;
|
||||||
|
float x1 = region.landmarks[2].x;
|
||||||
|
float y1 = region.landmarks[2].y;
|
||||||
|
|
||||||
|
float target_angle = M_PI * 0.5f;
|
||||||
|
float rotation = target_angle - atan2(-(y1 - y0), x1 - x0);
|
||||||
|
|
||||||
|
region.rotation = normalize_radians(rotation);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rot_vec(ov::Point2f &vec, float rotation) {
|
||||||
|
float sx = vec.x;
|
||||||
|
float sy = vec.y;
|
||||||
|
vec.x = sx * cos(rotation) - sy * sin(rotation);
|
||||||
|
vec.y = sx * sin(rotation) + sy * cos(rotation);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compute_detect_to_roi(DetectRegion ®ion, const int &target_size,
|
||||||
|
PalmObject &palm) {
|
||||||
|
float width = region.btmright.x - region.topleft.x;
|
||||||
|
float height = region.btmright.y - region.topleft.y;
|
||||||
|
float palm_cx = region.topleft.x + width * 0.5f;
|
||||||
|
float palm_cy = region.topleft.y + height * 0.5f;
|
||||||
|
|
||||||
|
float hand_cx;
|
||||||
|
float hand_cy;
|
||||||
|
float rotation = region.rotation;
|
||||||
|
float shift_x = 0.0f;
|
||||||
|
float shift_y = -0.5f;
|
||||||
|
|
||||||
|
if (rotation == 0.0f) {
|
||||||
|
hand_cx = palm_cx + (width * shift_x);
|
||||||
|
hand_cy = palm_cy + (height * shift_y);
|
||||||
|
} else {
|
||||||
|
float dx =
|
||||||
|
(width * shift_x) * cos(rotation) - (height * shift_y) * sin(rotation);
|
||||||
|
float dy =
|
||||||
|
(width * shift_x) * sin(rotation) + (height * shift_y) * cos(rotation);
|
||||||
|
hand_cx = palm_cx + dx;
|
||||||
|
hand_cy = palm_cy + dy;
|
||||||
|
}
|
||||||
|
|
||||||
|
float long_side = std::max(width, height);
|
||||||
|
width = long_side;
|
||||||
|
height = long_side;
|
||||||
|
float hand_w = width * 2.6f;
|
||||||
|
float hand_h = height * 2.6f;
|
||||||
|
|
||||||
|
palm.hand_cx = hand_cx;
|
||||||
|
palm.hand_cy = hand_cy;
|
||||||
|
palm.hand_w = hand_w;
|
||||||
|
palm.hand_h = hand_h;
|
||||||
|
|
||||||
|
float dx = hand_w * 0.5f;
|
||||||
|
float dy = hand_h * 0.5f;
|
||||||
|
|
||||||
|
palm.hand_pos[0].x = -dx;
|
||||||
|
palm.hand_pos[0].y = -dy;
|
||||||
|
palm.hand_pos[1].x = +dx;
|
||||||
|
palm.hand_pos[1].y = -dy;
|
||||||
|
palm.hand_pos[2].x = +dx;
|
||||||
|
palm.hand_pos[2].y = +dy;
|
||||||
|
palm.hand_pos[3].x = -dx;
|
||||||
|
palm.hand_pos[3].y = +dy;
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
rot_vec(palm.hand_pos[i], rotation);
|
||||||
|
palm.hand_pos[i].x += hand_cx;
|
||||||
|
palm.hand_pos[i].y += hand_cy;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 7; i++) {
|
||||||
|
palm.landmarks[i] = region.landmarks[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
palm.score = region.score;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pack_detect_result(std::vector<DetectRegion> &detect_results,
|
||||||
|
std::list<DetectRegion> ®ion_list,
|
||||||
|
const int &target_size,
|
||||||
|
std::vector<PalmObject> &palmlist) {
|
||||||
|
for (auto ®ion : region_list) {
|
||||||
|
compute_rotation(region);
|
||||||
|
PalmObject palm;
|
||||||
|
compute_detect_to_roi(region, target_size, palm);
|
||||||
|
palmlist.push_back(palm);
|
||||||
|
detect_results.push_back(region);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MediapipeHand::MediapipeHand() : ov::EstimatorBase() {
|
||||||
|
palm_blob_allocator_.set_size_compare_ratio(0.f);
|
||||||
|
palm_workspace_allocator_.set_size_compare_ratio(0.f);
|
||||||
|
hand_blob_allocator_.set_size_compare_ratio(0.f);
|
||||||
|
hand_workspace_allocator_.set_size_compare_ratio(0.f);
|
||||||
|
palm_net_ = new ncnn::Net();
|
||||||
|
hand_net_ = new ncnn::Net();
|
||||||
|
initialized_ = false;
|
||||||
|
if (num_threads > 0) {
|
||||||
|
palm_net_->opt.num_threads = num_threads;
|
||||||
|
hand_net_->opt.num_threads = num_threads;
|
||||||
|
}
|
||||||
|
palm_net_->opt.blob_allocator = &palm_blob_allocator_;
|
||||||
|
palm_net_->opt.workspace_allocator = &palm_workspace_allocator_;
|
||||||
|
palm_net_->opt.lightmode = light_mode_;
|
||||||
|
hand_net_->opt.blob_allocator = &hand_blob_allocator_;
|
||||||
|
hand_net_->opt.workspace_allocator = &hand_workspace_allocator_;
|
||||||
|
hand_net_->opt.lightmode = light_mode_;
|
||||||
|
#ifdef OV_VULKAN
|
||||||
|
palm_net_->opt.use_vulkan_compute = true;
|
||||||
|
hand_net_->opt.use_vulkan_compute = true;
|
||||||
|
#endif // OV_VULKAN
|
||||||
|
}
|
||||||
|
|
||||||
|
MediapipeHand::~MediapipeHand() {
|
||||||
|
if (palm_net_) {
|
||||||
|
palm_net_->clear();
|
||||||
|
}
|
||||||
|
if (hand_net_) {
|
||||||
|
hand_net_->clear();
|
||||||
|
}
|
||||||
|
palm_workspace_allocator_.clear();
|
||||||
|
palm_blob_allocator_.clear();
|
||||||
|
hand_workspace_allocator_.clear();
|
||||||
|
hand_blob_allocator_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MediapipeHand::set_num_threads(int n) {
|
||||||
|
EstimatorBase::set_num_threads(n);
|
||||||
|
if (palm_net_) {
|
||||||
|
palm_net_->opt.num_threads = n;
|
||||||
|
}
|
||||||
|
if (hand_net_) {
|
||||||
|
hand_net_->opt.num_threads = n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MediapipeHand::set_light_mode(bool mode) {
|
||||||
|
if (palm_net_) {
|
||||||
|
palm_net_->opt.lightmode = mode;
|
||||||
|
}
|
||||||
|
if (hand_net_) {
|
||||||
|
hand_net_->opt.lightmode = mode;
|
||||||
|
}
|
||||||
|
light_mode_ = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
int MediapipeHand::LoadModel(const char *palm_path, const char *hand_path) {
|
||||||
|
std::string palm_param_file = std::string(palm_path) + "/param";
|
||||||
|
std::string palm_bin_file = std::string(palm_path) + "/bin";
|
||||||
|
std::string hand_param_file = std::string(hand_path) + "/param";
|
||||||
|
std::string hand_bin_file = std::string(hand_path) + "/bin";
|
||||||
|
if (palm_net_->load_param(palm_param_file.c_str()) == -1 ||
|
||||||
|
palm_net_->load_model(palm_bin_file.c_str()) == -1) {
|
||||||
|
return 10000;
|
||||||
|
}
|
||||||
|
if (hand_net_->load_param(hand_param_file.c_str()) == -1 ||
|
||||||
|
hand_net_->load_model(hand_bin_file.c_str()) == -1) {
|
||||||
|
return 10000;
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized_ = true;
|
||||||
|
anchors.clear();
|
||||||
|
create_ssd_anchors(target_size, target_size, anchors);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width,
|
||||||
|
int img_height, std::vector<PalmObject> &objects) {
|
||||||
|
if (!initialized_) {
|
||||||
|
return 10000;
|
||||||
|
}
|
||||||
|
if (rgbdata == 0) {
|
||||||
|
return 10001;
|
||||||
|
}
|
||||||
|
int w = img_width;
|
||||||
|
int h = img_height;
|
||||||
|
float scale = 1.f;
|
||||||
|
if (w > h) {
|
||||||
|
scale = (float)target_size / w;
|
||||||
|
w = target_size;
|
||||||
|
h = h * scale;
|
||||||
|
} else {
|
||||||
|
scale = (float)target_size / h;
|
||||||
|
h = target_size;
|
||||||
|
w = w * scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
|
||||||
|
img_width, img_height, w, h);
|
||||||
|
|
||||||
|
int wpad = target_size - w;
|
||||||
|
int hpad = target_size - h;
|
||||||
|
ncnn::Mat in_pad;
|
||||||
|
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
|
||||||
|
wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||||
|
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||||
|
in_pad.substract_mean_normalize(0, norm_vals);
|
||||||
|
|
||||||
|
ncnn::Extractor ex = palm_net_->create_extractor();
|
||||||
|
ncnn::Mat cls, reg;
|
||||||
|
ex.input("input", in_pad);
|
||||||
|
ex.extract("cls", cls);
|
||||||
|
ex.extract("reg", reg);
|
||||||
|
|
||||||
|
float *scores = (float *)cls.data;
|
||||||
|
float *bboxes = (float *)reg.data;
|
||||||
|
|
||||||
|
std::list<DetectRegion> region_list, region_nms_list;
|
||||||
|
std::vector<DetectRegion> detect_results;
|
||||||
|
|
||||||
|
decode_bounds(region_list, prob_threshold, target_size, target_size, scores,
|
||||||
|
bboxes, anchors);
|
||||||
|
non_max_suppression(region_list, region_nms_list, nms_threshold);
|
||||||
|
objects.clear();
|
||||||
|
pack_detect_result(detect_results, region_nms_list, target_size, objects);
|
||||||
|
|
||||||
|
for (int i = 0; i < objects.size(); i++) {
|
||||||
|
objects[i].hand_pos[0].x =
|
||||||
|
(objects[i].hand_pos[0].x * target_size - ((float)wpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[0].y =
|
||||||
|
(objects[i].hand_pos[0].y * target_size - ((float)hpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[1].x =
|
||||||
|
(objects[i].hand_pos[1].x * target_size - ((float)wpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[1].y =
|
||||||
|
(objects[i].hand_pos[1].y * target_size - ((float)hpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[2].x =
|
||||||
|
(objects[i].hand_pos[2].x * target_size - ((float)wpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[2].y =
|
||||||
|
(objects[i].hand_pos[2].y * target_size - ((float)hpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[3].x =
|
||||||
|
(objects[i].hand_pos[3].x * target_size - ((float)wpad / 2)) / scale;
|
||||||
|
objects[i].hand_pos[3].y =
|
||||||
|
(objects[i].hand_pos[3].y * target_size - ((float)hpad / 2)) / scale;
|
||||||
|
|
||||||
|
for (int j = 0; j < 7; j++) {
|
||||||
|
objects[i].landmarks[j].x =
|
||||||
|
(objects[i].landmarks[j].x * target_size - ((float)wpad / 2)) / scale;
|
||||||
|
objects[i].landmarks[j].y =
|
||||||
|
(objects[i].landmarks[j].y * target_size - ((float)hpad / 2)) / scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
const float srcPts[8] = {
|
||||||
|
objects[i].hand_pos[0].x, objects[i].hand_pos[0].y,
|
||||||
|
objects[i].hand_pos[1].x, objects[i].hand_pos[1].y,
|
||||||
|
objects[i].hand_pos[2].x, objects[i].hand_pos[2].y,
|
||||||
|
objects[i].hand_pos[3].x, objects[i].hand_pos[3].y,
|
||||||
|
};
|
||||||
|
|
||||||
|
const float dstPts[8] = {
|
||||||
|
0, 0, 224, 0, 224, 224, 0, 224,
|
||||||
|
};
|
||||||
|
|
||||||
|
float tm[6];
|
||||||
|
unsigned char *trans_mat =
|
||||||
|
(unsigned char *)malloc(224 * 224 * 3 * sizeof(unsigned char));
|
||||||
|
ncnn::get_affine_transform(dstPts, srcPts, 4, tm);
|
||||||
|
|
||||||
|
ncnn::warpaffine_bilinear_c3(rgbdata, img_width, img_height, trans_mat, 224,
|
||||||
|
224, tm);
|
||||||
|
|
||||||
|
ncnn::Mat trans_image =
|
||||||
|
ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224);
|
||||||
|
|
||||||
|
float score = GetLandmarks(trans_image, tm, objects[i].skeleton,
|
||||||
|
objects[i].skeleton3d);
|
||||||
|
|
||||||
|
free(trans_mat);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float MediapipeHand::GetLandmarks(ncnn::Mat in, float tm[6],
|
||||||
|
std::vector<ov::Point2f> &skeleton,
|
||||||
|
std::vector<ov::Point3d> &skeleton3d) {
|
||||||
|
|
||||||
|
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||||
|
in.substract_mean_normalize(NULL, norm_vals);
|
||||||
|
ncnn::Mat points, score;
|
||||||
|
{
|
||||||
|
ncnn::Extractor ex = hand_net_->create_extractor();
|
||||||
|
ex.input("input", in);
|
||||||
|
ex.extract("points", points);
|
||||||
|
ex.extract("score", score);
|
||||||
|
}
|
||||||
|
|
||||||
|
float *points_data = (float *)points.data;
|
||||||
|
float *score_data = (float *)score.data;
|
||||||
|
for (int i = 0; i < 21; i++) {
|
||||||
|
ov::Point3d pt3d;
|
||||||
|
pt3d.x = points_data[i * 3];
|
||||||
|
pt3d.y = points_data[i * 3 + 1];
|
||||||
|
pt3d.z = points_data[i * 3 + 2];
|
||||||
|
|
||||||
|
ov::Point2f pt;
|
||||||
|
pt.x = pt3d.x * tm[0] + pt3d.y * tm[1] + tm[2];
|
||||||
|
pt.y = pt3d.x * tm[3] + pt3d.y * tm[4] + tm[5];
|
||||||
|
|
||||||
|
skeleton.push_back(pt);
|
||||||
|
|
||||||
|
pt3d.x /= 224.f;
|
||||||
|
pt3d.y /= 224.f;
|
||||||
|
skeleton3d.push_back(pt3d);
|
||||||
|
}
|
||||||
|
return score_data[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ovhand3d
|
87
src/hand/pose3d/mediapipe/mediapipe.hpp
Normal file
87
src/hand/pose3d/mediapipe/mediapipe.hpp
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
#ifndef _HAND_POSE3D_MEDIAPIPE_H_
|
||||||
|
#define _HAND_POSE3D_MEDIAPIPE_H_
|
||||||
|
|
||||||
|
#include "../../../common/common.hpp"
|
||||||
|
#include <net.h>
|
||||||
|
|
||||||
|
namespace ovhand3d {
|
||||||
|
|
||||||
|
struct PalmObject {
|
||||||
|
float score;
|
||||||
|
ov::Point2f landmarks[7];
|
||||||
|
float rotation;
|
||||||
|
|
||||||
|
float hand_cx;
|
||||||
|
float hand_cy;
|
||||||
|
float hand_w;
|
||||||
|
float hand_h;
|
||||||
|
ov::Point2f hand_pos[4];
|
||||||
|
|
||||||
|
std::vector<ov::Point2f> skeleton;
|
||||||
|
std::vector<ov::Point3d> skeleton3d;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DetectRegion {
|
||||||
|
float score;
|
||||||
|
ov::Point2f topleft;
|
||||||
|
ov::Point2f btmright;
|
||||||
|
ov::Point2f landmarks[7];
|
||||||
|
|
||||||
|
float rotation;
|
||||||
|
ov::Point2f roi_center;
|
||||||
|
ov::Point2f roi_size;
|
||||||
|
ov::Point2f roi_coord[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Anchor {
|
||||||
|
float x_center, y_center, w, h;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnchorsParams {
|
||||||
|
int input_size_width;
|
||||||
|
int input_size_height;
|
||||||
|
|
||||||
|
float min_scale;
|
||||||
|
float max_scale;
|
||||||
|
|
||||||
|
float anchor_offset_x;
|
||||||
|
float anchor_offset_y;
|
||||||
|
|
||||||
|
int num_layers;
|
||||||
|
std::vector<int> feature_map_width;
|
||||||
|
std::vector<int> feature_map_height;
|
||||||
|
std::vector<int> strides;
|
||||||
|
std::vector<float> aspect_ratios;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MediapipeHand : public ov::EstimatorBase {
|
||||||
|
public:
|
||||||
|
MediapipeHand();
|
||||||
|
~MediapipeHand();
|
||||||
|
int LoadModel(const char *palm_model, const char *hand_model);
|
||||||
|
int Detect(const unsigned char *rgbdata, int img_width, int img_heidht,
|
||||||
|
std::vector<PalmObject> &objects);
|
||||||
|
float GetLandmarks(ncnn::Mat in, float tm[6],
|
||||||
|
std::vector<ov::Point2f> &skeleton,
|
||||||
|
std::vector<ov::Point3d> &skeleton3d);
|
||||||
|
void set_light_mode(bool mode);
|
||||||
|
void set_num_threads(int n);
|
||||||
|
|
||||||
|
private:
|
||||||
|
ncnn::Net *palm_net_ = NULL;
|
||||||
|
ncnn::Net *hand_net_ = NULL;
|
||||||
|
ncnn::PoolAllocator palm_workspace_allocator_;
|
||||||
|
ncnn::UnlockedPoolAllocator palm_blob_allocator_;
|
||||||
|
ncnn::PoolAllocator hand_workspace_allocator_;
|
||||||
|
ncnn::UnlockedPoolAllocator hand_blob_allocator_;
|
||||||
|
bool initialized_ = false;
|
||||||
|
bool light_mode_ = true;
|
||||||
|
std::vector<Anchor> anchors;
|
||||||
|
float prob_threshold = 0.55f;
|
||||||
|
float nms_threshold = 0.3f;
|
||||||
|
const int target_size = 192;
|
||||||
|
const float mean_vals[3] = {0.f, 0.f, 0.f};
|
||||||
|
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
|
||||||
|
};
|
||||||
|
} // namespace ovhand3d
|
||||||
|
#endif // !_HAND_POSE3D_MEDIAPIPE_H_
|
161
src/pose/estimator/pptinypose/pptinypose.bak
Normal file
161
src/pose/estimator/pptinypose/pptinypose.bak
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
#include "pptinypose.hpp"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef OV_VULKAN
|
||||||
|
#include "gpu.h"
|
||||||
|
#endif // OV_VULKAN
|
||||||
|
|
||||||
|
namespace ovpose {
|
||||||
|
static int argmax(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob,
|
||||||
|
std::vector<float> &prob) {
|
||||||
|
int size = bottom_blob.total();
|
||||||
|
const float *ptr = bottom_blob;
|
||||||
|
std::vector<std::pair<float, int>> vec;
|
||||||
|
vec.resize(size);
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
vec[i] = std::make_pair(ptr[i], i);
|
||||||
|
}
|
||||||
|
top_blob.create(bottom_blob.c, 1, 1, 4u);
|
||||||
|
float *outptr = top_blob;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < bottom_blob.c; i++) {
|
||||||
|
int size0 = bottom_blob.channel(i).total();
|
||||||
|
std::partial_sort(vec.begin() + size0 * i, vec.begin() + size0 * (i + 1),
|
||||||
|
vec.begin() + size0 * (i + 1),
|
||||||
|
std::greater<std::pair<float, int>>());
|
||||||
|
outptr[i] = vec[size0 * i].second - size0 * i;
|
||||||
|
prob.push_back(vec[size0 * i].first);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dark_parse(const ncnn::Mat &heatmap, std::vector<int> &dim,
|
||||||
|
std::vector<float> &coords, int px, int py, int ch) {
|
||||||
|
/*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
|
||||||
|
Representation for Human Pose Estimation (CVPR 2020).
|
||||||
|
1) offset = - hassian.inv() * derivative
|
||||||
|
2) dx = (heatmap[x+1] - heatmap[x-1])/2.
|
||||||
|
3) dxx = (dx[x+1] - dx[x-1])/2.
|
||||||
|
4) derivative = Mat([dx, dy])
|
||||||
|
5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
|
||||||
|
*/
|
||||||
|
|
||||||
|
float *heatmap_data = (float *)heatmap.channel(ch).data;
|
||||||
|
std::vector<float> heatmap_ch;
|
||||||
|
heatmap_ch.insert(heatmap_ch.begin(), heatmap_data,
|
||||||
|
heatmap_data + heatmap.channel(ch).total());
|
||||||
|
cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
|
||||||
|
heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
|
||||||
|
cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
|
||||||
|
heatmap_mat = heatmap_mat.reshape(1, 1);
|
||||||
|
heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
|
||||||
|
|
||||||
|
ncnn::Mat heatmap_mat = heatmap.channel(ch).reshape(dim[2]);
|
||||||
|
heatmap_mat = heatmap_mat.reshape(1);
|
||||||
|
heatmap_ch = (float *)heatmap_mat.data;
|
||||||
|
|
||||||
|
float epsilon = 1e-10;
|
||||||
|
// sample heatmap to get values in around target location
|
||||||
|
float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
|
||||||
|
float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
|
||||||
|
float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
|
||||||
|
|
||||||
|
float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
|
||||||
|
float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
|
||||||
|
float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
|
||||||
|
float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
|
||||||
|
float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
|
||||||
|
float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
|
||||||
|
float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
|
||||||
|
float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
|
||||||
|
float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
|
||||||
|
float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
|
||||||
|
|
||||||
|
// compute dx/dy and dxx/dyy with sampled values
|
||||||
|
float dx = 0.5 * (xr - xl);
|
||||||
|
float dy = 0.5 * (yu - yd);
|
||||||
|
float dxx = 0.25 * (xr2 - 2 * xy + xl2);
|
||||||
|
float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
|
||||||
|
float dyy = 0.25 * (yu2 - 2 * xy + yd2);
|
||||||
|
|
||||||
|
// finally get offset by derivative and hassian, which combined by dx/dy and
|
||||||
|
// dxx/dyy
|
||||||
|
if (dxx * dyy - dxy * dxy != 0) {
|
||||||
|
float M[2][2] = {dxx, dxy, dxy, dyy};
|
||||||
|
float D[2] = {dx, dy};
|
||||||
|
cv::Mat hassian(2, 2, CV_32F, M);
|
||||||
|
cv::Mat derivative(2, 1, CV_32F, D);
|
||||||
|
cv::Mat offset = -hassian.inv() * derivative;
|
||||||
|
coords[ch * 2] += offset.at<float>(0, 0);
|
||||||
|
coords[ch * 2 + 1] += offset.at<float>(1, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<float> get_final_preds(const ncnn::Mat &heatmap,
|
||||||
|
const ncnn::Mat &argmax_out) {
|
||||||
|
std::vector<float> coords((size_t)heatmap.c * 2);
|
||||||
|
for (int i = 0; i < heatmap.c; i++) {
|
||||||
|
int idx = argmax_out[i];
|
||||||
|
coords[i * 2] = idx % heatmap.w;
|
||||||
|
coords[i * 2 + 1] = (float)idx / heatmap.w;
|
||||||
|
|
||||||
|
int px = int(coords[i * 2] + 0.5);
|
||||||
|
int py = int(coords[i * 2 + 1] + 0.5);
|
||||||
|
|
||||||
|
std::vector<int> dim({1, heatmap.c, heatmap.h, heatmap.w});
|
||||||
|
dark_parse(heatmap, dim, coords, px, py, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return coords;
|
||||||
|
}
|
||||||
|
PPTinyPoseEstimator::PPTinyPoseEstimator(int target_size) : Estimator() {
|
||||||
|
if (target_size == 128) {
|
||||||
|
target_width_ = 96;
|
||||||
|
target_height_ = 128;
|
||||||
|
} else {
|
||||||
|
target_width_ = 196;
|
||||||
|
target_height_ = 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int PPTinyPoseEstimator::ExtractKeypoints(
|
||||||
|
const unsigned char *rgbdata, int img_width, int img_height,
|
||||||
|
const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
|
||||||
|
if (!initialized_) {
|
||||||
|
return 10000;
|
||||||
|
}
|
||||||
|
if (rgbdata == 0) {
|
||||||
|
return 10001;
|
||||||
|
}
|
||||||
|
keypoints->clear();
|
||||||
|
|
||||||
|
ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
|
||||||
|
rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
|
||||||
|
rect.width, rect.height, target_width_, target_height_);
|
||||||
|
in.substract_mean_normalize(meanVals, normVals);
|
||||||
|
|
||||||
|
ncnn::Extractor ex = net_->create_extractor();
|
||||||
|
ex.set_light_mode(light_mode_);
|
||||||
|
ex.set_num_threads(num_threads);
|
||||||
|
ex.input("image", in);
|
||||||
|
ncnn::Mat out;
|
||||||
|
ex.extract("save_infer_model/scale_0.tmp_1", out);
|
||||||
|
|
||||||
|
ncnn::Mat argmax_out;
|
||||||
|
std::vector<float> probs;
|
||||||
|
argmax(out, argmax_out, probs);
|
||||||
|
std::vector<float> coords = get_final_preds(out, argmax_out);
|
||||||
|
|
||||||
|
for (int i = 0; i < coords.size() / 2; i++) {
|
||||||
|
ov::KeyPoint keypoint;
|
||||||
|
keypoint.p = ov::Point(coords[i * 2] * rect.width / (float)out.w + rect.x,
|
||||||
|
coords[i * 2 + 1] * rect.h / (float)out.h + rect.y);
|
||||||
|
keypoint.score = probs[i];
|
||||||
|
keypoints->push_back(keypoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ovpose
|
25
src/pose/estimator/pptinypose/pptinypose.hpp
Normal file
25
src/pose/estimator/pptinypose/pptinypose.hpp
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
#ifndef _POSE_PPTINYPOSE_ESTIMATOR_H_
|
||||||
|
#define _POSE_PPTINYPOSE_ESTIMATOR_H_
|
||||||
|
|
||||||
|
#include "../estimator.hpp"
|
||||||
|
#include "net.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace ovpose {
|
||||||
|
class PPTinyPoseEstimator : public Estimator {
|
||||||
|
public:
|
||||||
|
PPTinyPoseEstimator(int target_size);
|
||||||
|
int ExtractKeypoints(const unsigned char *rgbdata, int img_width,
|
||||||
|
int img_height, const ov::Rect &rect,
|
||||||
|
std::vector<ov::Keypoint> *keypoints);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int target_width_ = 96;
|
||||||
|
int target_height_ = 128;
|
||||||
|
const float meanVals[3] = {123.675f, 116.28f, 103.53f};
|
||||||
|
const float normVals[3] = {0.01712475f, 0.0175f, 0.01742919f};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ovpose
|
||||||
|
|
||||||
|
#endif // !_POSE_PPTINYPOSE_ESTIMATOR_H_
|
Reference in New Issue
Block a user