feat(face): add yolov5 face detecter

This commit is contained in:
Syd Xu
2021-10-29 19:05:58 +08:00
parent a61f9dc7b0
commit 50e43fc864
49 changed files with 1057 additions and 736 deletions

View File

@@ -21,6 +21,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
- mtcnn [Google Drive](https://drive.google.com/drive/folders/14ToHyDXZr4Ihuk8WYp1mVS7QnVxnzEjn?usp=sharing) - mtcnn [Google Drive](https://drive.google.com/drive/folders/14ToHyDXZr4Ihuk8WYp1mVS7QnVxnzEjn?usp=sharing)
- centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing) - centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing)
- retainface [Google Drive](https://drive.google.com/drive/folders/1nxR3WFqqEWLwGVsp5c4tI0_iVVEaVOe8?usp=sharing) - retainface [Google Drive](https://drive.google.com/drive/folders/1nxR3WFqqEWLwGVsp5c4tI0_iVVEaVOe8?usp=sharing)
- yoloface [Google Drive](https://drive.google.com/drive/folders/1EM9H6-aYXKsWTRxx_wbKDyYHVIYpU6f7?usp=sharing)
- anticonv (for mask detection) [Google Drive](https://drive.google.com/drive/folders/1Fje0fmVPy5g0_oaxUbH_cAedkgjBf7QW?usp=sharing) - anticonv (for mask detection) [Google Drive](https://drive.google.com/drive/folders/1Fje0fmVPy5g0_oaxUbH_cAedkgjBf7QW?usp=sharing)
- recognizer (face feature extration for classification) - recognizer (face feature extration for classification)
- mobilenet [Google Drive](https://drive.google.com/drive/folders/1fRLs10atm_vwDWQXZ-GJbKQpypNcXLAx?usp=sharing) - mobilenet [Google Drive](https://drive.google.com/drive/folders/1fRLs10atm_vwDWQXZ-GJbKQpypNcXLAx?usp=sharing)

61
go/common/keypoint.go Normal file
View File

@@ -0,0 +1,61 @@
package common
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
*/
import "C"
import (
"unsafe"
)
// Keypoint represents detected body keypoint
type Keypoint struct {
// Point keypoint location
Point Point
// Score keypoint prob
Score float32
}
// GoKeypoint convert C.Keypoint to go type
func GoKeypoint(c *C.Keypoint, w float64, h float64) Keypoint {
return Keypoint{
Point: Pt(float64(c.p.x)/w, float64(c.p.y)/h),
Score: float32(c.prob),
}
}
// Convert Keypoint to C.Keypoint pointer
func (k Keypoint) CKeypoint(w float64, h float64) *C.Keypoint {
ret := (*C.Keypoint)(C.malloc(C.sizeof_Keypoint))
ret.prob = C.float(k.Score)
ret.p = C.Point2f{
C.float(k.Point.X * w),
C.float(k.Point.Y * h),
}
return ret
}
// NewCKeypointVector returns *C.KeypointVector
func NewCKeypointVector() *C.KeypointVector {
return (*C.KeypointVector)(C.malloc(C.sizeof_KeypointVector))
}
// FreeCKeypointVector release *C.KeypointVector memory
func FreeCKeypointVector(points *C.KeypointVector) {
C.FreeKeypointVector(points)
C.free(unsafe.Pointer(points))
}
// GoKeypointVector convert *C.KeypointVector to Keypoint slice
func GoKeypointVector(c *C.KeypointVector, w float64, h float64) []Keypoint {
l := int(c.length)
ret := make([]Keypoint, 0, l)
ptr := unsafe.Pointer(c.points)
for i := 0; i < l; i++ {
cKeypoint := (*C.Keypoint)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_Keypoint*C.int(i))))
ret = append(ret, GoKeypoint(cKeypoint, w, h))
}
return ret
}

98
go/common/objectinfo.go Normal file
View File

@@ -0,0 +1,98 @@
package common
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
*/
import "C"
import (
"unsafe"
)
// ObjectInfo represents detected roi object info
type ObjectInfo struct {
// Score detected score
Score float32
// Label
Label int
// Rect roi location
Rect Rectangle
// Points keypoints
Keypoints []Keypoint
}
// GoObjectInfo convert C.ObjectInfo to go type
func GoObjectInfo(c *C.ObjectInfo, w float64, h float64) ObjectInfo {
ret := ObjectInfo{
Label: int(c.label),
Score: float32(c.prob),
Rect: Rect(
float64(c.rect.x)/w,
float64(c.rect.y)/h,
float64(c.rect.width)/w,
float64(c.rect.height)/h,
),
}
if c.pts != nil {
ret.Keypoints = GoKeypointVector(c.pts, w, h)
}
return ret
}
// ToCObjectInfo returns ObjectInfo C type
func (o ObjectInfo) ToCObjectInfo(w float64, h float64) *C.ObjectInfo {
ret := (*C.ObjectInfo)(C.malloc(C.sizeof_ObjectInfo))
ret.label = C.int(o.Label)
ret.prob = C.float(o.Score)
ret.rect.x = C.int(o.Rect.X * w)
ret.rect.y = C.int(o.Rect.Y * h)
ret.rect.width = C.int(o.Rect.Width * w)
ret.rect.height = C.int(o.Rect.Height * h)
if len(o.Keypoints) > 0 {
ret.pts = (*C.KeypointVector)(C.malloc(C.sizeof_KeypointVector))
ret.pts.length = C.int(len(o.Keypoints))
ret.pts.points = (*C.Keypoint)(C.malloc(C.sizeof_Keypoint))
for idx, p := range o.Keypoints {
pt := C.Keypoint{
C.Point2f{C.float(p.Point.X * w), C.float(p.Point.Y * h)},
C.float(p.Score),
}
C.KeypointVectorSetValue(ret.pts, C.int(idx), &pt)
}
}
return ret
}
// NewCObjectInfoector returns *C.ObjectInfoVector
func NewCObjectInfoVector() *C.ObjectInfoVector {
return (*C.ObjectInfoVector)(C.malloc(C.sizeof_ObjectInfoVector))
}
// FreeCObjectInfoVector release *C.ObjectInfoVector memory
func FreeCObjectInfoVector(p *C.ObjectInfoVector) {
C.FreeObjectInfoVector(p)
C.free(unsafe.Pointer(p))
}
// GoObjectInfoVector convert *C.ObjectInfoVector to ROI slice
func GoObjectInfoVector(c *C.ObjectInfoVector, w float64, h float64) []ObjectInfo {
l := int(c.length)
ret := make([]ObjectInfo, 0, l)
ptr := unsafe.Pointer(c.items)
for i := 0; i < l; i++ {
cVal := (*C.ObjectInfo)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_ObjectInfo*C.int(i))))
ret = append(ret, GoObjectInfo(cVal, w, h))
}
return ret
}
// CObjectInfoVectiorLength get C.ObjectInfoVector length
func CObjectInfoVectiorLength(c *C.ObjectInfoVector) int {
return int(c.length)
}
// CObjectInfoVectorPtr get C.ObjectInfoVector start pointer
func CObjectInfoVectorPtr(c *C.ObjectInfoVector) unsafe.Pointer {
return unsafe.Pointer(c.items)
}

View File

@@ -32,6 +32,7 @@ func test_detect(imgPath string, modelPath string) {
retinaface(modelPath), retinaface(modelPath),
centerface(modelPath), centerface(modelPath),
mtcnn(modelPath), mtcnn(modelPath),
yoloface(modelPath),
} { } {
detect(d, imgPath, idx, "4.jpg", false) detect(d, imgPath, idx, "4.jpg", false)
d.Destroy() d.Destroy()
@@ -62,6 +63,15 @@ func mtcnn(modelPath string) detecter.Detecter {
return d return d
} }
func yoloface(modelPath string) detecter.Detecter {
modelPath = filepath.Join(modelPath, "yoloface/v505")
d := detecter.NewYoloFace()
if err := d.LoadModel(modelPath); err != nil {
log.Fatalln(err)
}
return d
}
func centerface(modelPath string) detecter.Detecter { func centerface(modelPath string) detecter.Detecter {
modelPath = filepath.Join(modelPath, "centerface") modelPath = filepath.Join(modelPath, "centerface")
d := detecter.NewCenterface() d := detecter.NewCenterface()

View File

@@ -79,7 +79,13 @@ func detect(d detecter.Detecter, e pose.Estimator, imgPath string, filename stri
log.Fatalln(err) log.Fatalln(err)
continue continue
} }
rois[idx].Keypoints = keypoints pts := make([]common.Keypoint, 0, len(keypoints))
for _, pt := range keypoints {
pts = append(pts, common.Keypoint{
Point: pt,
})
}
rois[idx].Keypoints = pts
log.Printf("keypoints: %d\n", len(keypoints)) log.Printf("keypoints: %d\n", len(keypoints))
} }

View File

@@ -0,0 +1,44 @@
package detecter
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/face/detecter.h"
*/
import "C"
import (
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
// YoloFace represents yoloface detecter
type YoloFace struct {
d C.IFaceDetecter
}
// NewYoloFace returns a new YoloFace
func NewYoloFace() *YoloFace {
return &YoloFace{
d: C.new_yoloface(),
}
}
// Destroy free detecter
func (d *YoloFace) Destroy() {
Destroy(d)
}
// Handler returns C.IFaceDetecter
func (d *YoloFace) Handler() C.IFaceDetecter {
return d.d
}
// LoadModel implement Detecter interface
func (d *YoloFace) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
}
// DetectFace implement Detecter interface
func (d *YoloFace) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
}

View File

@@ -3,7 +3,7 @@ package detecter
/* /*
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
#include "openvision/hand/common.h" #include "openvision/common/common.h"
#include "openvision/hand/detecter.h" #include "openvision/hand/detecter.h"
*/ */
import "C" import "C"
@@ -12,14 +12,13 @@ import (
openvision "github.com/bububa/openvision/go" openvision "github.com/bububa/openvision/go"
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/hand"
) )
// Detecter represents deteter interface // Detecter represents deteter interface
type Detecter interface { type Detecter interface {
Handler() C.IHandDetecter Handler() C.IHandDetecter
LoadModel(modelPath string) error LoadModel(modelPath string) error
Detect(img *common.Image) ([]hand.ROI, error) Detect(img *common.Image) ([]common.ObjectInfo, error)
Destroy() Destroy()
} }
@@ -40,20 +39,20 @@ func Destroy(d Detecter) {
} }
// Detect detect hand ROI // Detect detect hand ROI
func Detect(d Detecter, img *common.Image) ([]hand.ROI, error) { func Detect(d Detecter, img *common.Image) ([]common.ObjectInfo, error) {
imgWidth := img.WidthF64() imgWidth := img.WidthF64()
imgHeight := img.HeightF64() imgHeight := img.HeightF64()
data := img.Bytes() data := img.Bytes()
cROIs := hand.NewCROIVector() cObjs := common.NewCObjectInfoVector()
defer hand.FreeCROIVector(cROIs) defer common.FreeCObjectInfoVector(cObjs)
errCode := C.extract_hand_rois( errCode := C.extract_hand_rois(
d.Handler(), d.Handler(),
(*C.uchar)(unsafe.Pointer(&data[0])), (*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgWidth),
C.int(imgHeight), C.int(imgHeight),
(*C.HandROIVector)(unsafe.Pointer(cROIs))) (*C.ObjectInfoVector)(unsafe.Pointer(cObjs)))
if errCode != 0 { if errCode != 0 {
return nil, openvision.DetectHandError(int(errCode)) return nil, openvision.DetectHandError(int(errCode))
} }
return hand.GoROIVector(cROIs, imgWidth, imgHeight), nil return common.GoObjectInfoVector(cObjs, imgWidth, imgHeight), nil
} }

View File

@@ -8,7 +8,6 @@ package detecter
import "C" import "C"
import ( import (
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/hand"
) )
// Nanodet represents nanodet detecter // Nanodet represents nanodet detecter
@@ -39,6 +38,6 @@ func (d *Nanodet) LoadModel(modelPath string) error {
} }
// Detect implement Detecter interface // Detect implement Detecter interface
func (d *Nanodet) Detect(img *common.Image) ([]hand.ROI, error) { func (d *Nanodet) Detect(img *common.Image) ([]common.ObjectInfo, error) {
return Detect(d, img) return Detect(d, img)
} }

View File

@@ -8,7 +8,6 @@ package detecter
import "C" import "C"
import ( import (
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/hand"
) )
// Yolox represents yolox detecter // Yolox represents yolox detecter
@@ -39,6 +38,6 @@ func (d *Yolox) LoadModel(modelPath string) error {
} }
// Detect implement Detecter interface // Detect implement Detecter interface
func (d *Yolox) Detect(img *common.Image) ([]hand.ROI, error) { func (d *Yolox) Detect(img *common.Image) ([]common.ObjectInfo, error) {
return Detect(d, img) return Detect(d, img)
} }

View File

@@ -6,7 +6,6 @@ import (
"github.com/llgcode/draw2d/draw2dimg" "github.com/llgcode/draw2d/draw2dimg"
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/hand"
) )
// Drawer represents a hand drawer // Drawer represents a hand drawer
@@ -36,7 +35,7 @@ func New(options ...Option) *Drawer {
} }
// Draw draw rois // Draw draw rois
func (d *Drawer) Draw(img image.Image, rois []hand.ROI, drawBorder bool) image.Image { func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool) image.Image {
imgW := float64(img.Bounds().Dx()) imgW := float64(img.Bounds().Dx())
imgH := float64(img.Bounds().Dy()) imgH := float64(img.Bounds().Dy())
out := image.NewRGBA(img.Bounds()) out := image.NewRGBA(img.Bounds())
@@ -68,8 +67,8 @@ func (d *Drawer) Draw(img image.Image, rois []hand.ROI, drawBorder bool) image.I
) )
gc.SetStrokeColor(common.ColorFromHex(poseColor)) gc.SetStrokeColor(common.ColorFromHex(poseColor))
if idx == 5 || idx == 9 || idx == 13 || idx == 17 { if idx == 5 || idx == 9 || idx == 13 || idx == 17 {
p0 = roi.Keypoints[0] p0 = roi.Keypoints[0].Point
p1 = roi.Keypoints[idx] p1 = roi.Keypoints[idx].Point
gc.BeginPath() gc.BeginPath()
gc.MoveTo(p0.X*imgW, p0.Y*imgH) gc.MoveTo(p0.X*imgW, p0.Y*imgH)
gc.LineTo(p1.X*imgW, p1.Y*imgH) gc.LineTo(p1.X*imgW, p1.Y*imgH)
@@ -78,8 +77,8 @@ func (d *Drawer) Draw(img image.Image, rois []hand.ROI, drawBorder bool) image.I
} else if idx == 4 || idx == 8 || idx == 12 || idx == 16 { } else if idx == 4 || idx == 8 || idx == 12 || idx == 16 {
continue continue
} }
p0 = roi.Keypoints[idx] p0 = roi.Keypoints[idx].Point
p1 = roi.Keypoints[idx+1] p1 = roi.Keypoints[idx+1].Point
gc.BeginPath() gc.BeginPath()
gc.MoveTo(p0.X*imgW, p0.Y*imgH) gc.MoveTo(p0.X*imgW, p0.Y*imgH)
gc.LineTo(p1.X*imgW, p1.Y*imgH) gc.LineTo(p1.X*imgW, p1.Y*imgH)
@@ -94,7 +93,7 @@ func (d *Drawer) Draw(img image.Image, rois []hand.ROI, drawBorder bool) image.I
colorIdx-- colorIdx--
} }
poseColor := PoseColors[colorIdx] poseColor := PoseColors[colorIdx]
common.DrawCircle(gc, common.Pt(pt.X*imgW, pt.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth) common.DrawCircle(gc, common.Pt(pt.Point.X*imgW, pt.Point.Y*imgH), d.KeypointRadius, poseColor, "", d.KeypointStrokeWidth)
} }
} }
return out return out

View File

@@ -3,7 +3,7 @@ package pose
/* /*
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
#include "openvision/hand/common.h" #include "openvision/common/common.h"
#include "openvision/hand/pose.h" #include "openvision/hand/pose.h"
*/ */
import "C" import "C"

View File

@@ -1,84 +0,0 @@
package hand
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/hand/common.h"
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// ROI represents detected person roi
type ROI struct {
// Score detected score
Score float32
// Label
Label int
// Rect roi location
Rect common.Rectangle
// Points keypoints
Keypoints []common.Point
}
// GoROI convert C.HandROI to go type
func GoROI(c *C.HandROI, w float64, h float64) ROI {
return ROI{
Label: int(c.label),
Score: float32(c.prob),
Rect: common.Rect(
float64(c.rect.x)/w,
float64(c.rect.y)/h,
float64(c.rect.width)/w,
float64(c.rect.height)/h,
),
}
}
// ToCROI returns ROI C type
func (r ROI) ToCROI(w float64, h float64) *C.HandROI {
ret := (*C.HandROI)(C.malloc(C.sizeof_HandROI))
ret.label = C.int(r.Label)
ret.prob = C.float(r.Score)
ret.rect.x = C.int(r.Rect.X * w)
ret.rect.y = C.int(r.Rect.Y * h)
ret.rect.width = C.int(r.Rect.Width * w)
ret.rect.height = C.int(r.Rect.Height * h)
return ret
}
// NewROIVector returns *C.HandROIVector
func NewCROIVector() *C.HandROIVector {
return (*C.HandROIVector)(C.malloc(C.sizeof_HandROIVector))
}
// FreeCROIVector release *C.HandROIVectore memory
func FreeCROIVector(p *C.HandROIVector) {
C.FreeHandROIVector(p)
C.free(unsafe.Pointer(p))
}
// GoROIVector convert *C.HandROIVector to ROI slice
func GoROIVector(c *C.HandROIVector, w float64, h float64) []ROI {
l := int(c.length)
ret := make([]ROI, 0, l)
ptr := unsafe.Pointer(c.items)
for i := 0; i < l; i++ {
cVal := (*C.HandROI)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_HandROI*C.int(i))))
ret = append(ret, GoROI(cVal, w, h))
}
return ret
}
// CROIVectiorLength get C.HandROIVector length
func CROIVectiorLength(c *C.HandROIVector) int {
return int(c.length)
}
// CROIVectorPtr get C.HandROIVector start pointer
func CROIVectorPtr(c *C.HandROIVector) unsafe.Pointer {
return unsafe.Pointer(c.items)
}

View File

@@ -3,7 +3,7 @@ package detecter
/* /*
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
#include "openvision/pose/common.h" #include "openvision/common/common.h"
#include "openvision/pose/detecter.h" #include "openvision/pose/detecter.h"
*/ */
import "C" import "C"
@@ -12,14 +12,13 @@ import (
openvision "github.com/bububa/openvision/go" openvision "github.com/bububa/openvision/go"
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/pose"
) )
// Detecter represents deteter interface // Detecter represents deteter interface
type Detecter interface { type Detecter interface {
Handler() C.IPoseDetecter Handler() C.IPoseDetecter
LoadModel(modelPath string) error LoadModel(modelPath string) error
ExtractKeypoints(img *common.Image) ([]pose.ROI, error) ExtractKeypoints(img *common.Image) ([]common.ObjectInfo, error)
Destroy() Destroy()
} }
@@ -40,37 +39,40 @@ func Destroy(d Detecter) {
} }
// ExtractKeypoints detect pose keypoints using detecter // ExtractKeypoints detect pose keypoints using detecter
func ExtractKeypoints(d Detecter, img *common.Image) ([]pose.ROI, error) { func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error) {
imgWidth := img.WidthF64() imgWidth := img.WidthF64()
imgHeight := img.HeightF64() imgHeight := img.HeightF64()
data := img.Bytes() data := img.Bytes()
cROIs := pose.NewCROIVector() cObjs := common.NewCObjectInfoVector()
defer pose.FreeCROIVector(cROIs) defer common.FreeCObjectInfoVector(cObjs)
errCode := C.extract_pose_rois( errCode := C.extract_pose_rois(
d.Handler(), d.Handler(),
(*C.uchar)(unsafe.Pointer(&data[0])), (*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgWidth),
C.int(imgHeight), C.int(imgHeight),
(*C.PoseROIVector)(unsafe.Pointer(cROIs))) (*C.ObjectInfoVector)(unsafe.Pointer(cObjs)))
if errCode != 0 { if errCode != 0 {
return nil, openvision.DetectPoseError(int(errCode)) return nil, openvision.DetectPoseError(int(errCode))
} }
totalROIs := pose.CROIVectiorLength(cROIs) totalROIs := common.CObjectInfoVectiorLength(cObjs)
rois := make([]pose.ROI, 0, totalROIs) rois := make([]common.ObjectInfo, 0, totalROIs)
ptr := pose.CROIVectorPtr(cROIs) ptr := common.CObjectInfoVectorPtr(cObjs)
for i := 0; i < totalROIs; i++ { for i := 0; i < totalROIs; i++ {
cKeypoints := pose.NewCKeypointVector() cKeypoints := common.NewCKeypointVector()
defer pose.FreeCKeypointVector(cKeypoints) defer common.FreeCKeypointVector(cKeypoints)
cROI := (*C.PoseROI)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PoseROI*C.int(i)))) cROI := (*C.ObjectInfo)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_ObjectInfo*C.int(i))))
errCode := C.extract_pose_keypoints( errCode := C.extract_pose_keypoints(
d.Handler(), d.Handler(),
cROI, (*C.uchar)(unsafe.Pointer(&data[0])),
(*C.PoseKeypointVector)(unsafe.Pointer(cKeypoints))) C.int(imgWidth),
C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(&cROI.rect)),
(*C.KeypointVector)(unsafe.Pointer(cKeypoints)))
if errCode != 0 { if errCode != 0 {
return nil, openvision.DetectPoseError(int(errCode)) return nil, openvision.DetectPoseError(int(errCode))
} }
keypoints := pose.GoKeypointVector(cKeypoints, imgWidth, imgHeight) keypoints := common.GoKeypointVector(cKeypoints, imgWidth, imgHeight)
rois = append(rois, pose.ROI{ rois = append(rois, common.ObjectInfo{
Keypoints: keypoints, Keypoints: keypoints,
Rect: common.Rect( Rect: common.Rect(
float64(cROI.rect.x)/imgWidth, float64(cROI.rect.x)/imgWidth,
@@ -78,7 +80,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]pose.ROI, error) {
float64(cROI.rect.width)/imgWidth, float64(cROI.rect.width)/imgWidth,
float64(cROI.rect.height)/imgHeight, float64(cROI.rect.height)/imgHeight,
), ),
Score: float32(cROI.score), Score: float32(cROI.prob),
}) })
} }

View File

@@ -8,7 +8,6 @@ package detecter
import "C" import "C"
import ( import (
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/pose"
) )
// Ultralight represents utralight detecter // Ultralight represents utralight detecter
@@ -39,6 +38,6 @@ func (d *Ultralight) LoadModel(modelPath string) error {
} }
// ExtractKeypoints implement Detecter interface // ExtractKeypoints implement Detecter interface
func (d *Ultralight) ExtractKeypoints(img *common.Image) ([]pose.ROI, error) { func (d *Ultralight) ExtractKeypoints(img *common.Image) ([]common.ObjectInfo, error) {
return ExtractKeypoints(d, img) return ExtractKeypoints(d, img)
} }

View File

@@ -6,7 +6,6 @@ import (
"github.com/llgcode/draw2d/draw2dimg" "github.com/llgcode/draw2d/draw2dimg"
"github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/pose"
) )
// Drawer represents a pose drawer // Drawer represents a pose drawer
@@ -36,7 +35,7 @@ func New(options ...Option) *Drawer {
} }
// Draw draw rois // Draw draw rois
func (d *Drawer) Draw(img image.Image, rois []pose.ROI, drawBorder bool) image.Image { func (d *Drawer) Draw(img image.Image, rois []common.ObjectInfo, drawBorder bool) image.Image {
imgW := float64(img.Bounds().Dx()) imgW := float64(img.Bounds().Dx())
imgH := float64(img.Bounds().Dy()) imgH := float64(img.Bounds().Dy())
out := image.NewRGBA(img.Bounds()) out := image.NewRGBA(img.Bounds())

View File

@@ -1,63 +0,0 @@
package pose
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/pose/common.h"
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// Keypoint represents detected body keypoint
type Keypoint struct {
// Point keypoint location
Point common.Point
// Score keypoint prob
Score float32
}
// GoKeypoint convert C.PoseKeypoint to go type
func GoKeypoint(c *C.PoseKeypoint, w float64, h float64) Keypoint {
return Keypoint{
Point: common.Pt(float64(c.p.x)/w, float64(c.p.y)/h),
Score: float32(c.prob),
}
}
// Convert Keypoint to C.Keypoint pointer
func (k Keypoint) CKeypoint(w float64, h float64) *C.PoseKeypoint {
ret := (*C.PoseKeypoint)(C.malloc(C.sizeof_PoseKeypoint))
ret.prob = C.float(k.Score)
ret.p = C.Point2f{
C.float(k.Point.X * w),
C.float(k.Point.Y * h),
}
return ret
}
// NewCKeypointVector returns *C.PoseKeypointVector
func NewCKeypointVector() *C.PoseKeypointVector {
return (*C.PoseKeypointVector)(C.malloc(C.sizeof_PoseKeypointVector))
}
// FreeCKeypointVector release *C.PoseKeypointVector memory
func FreeCKeypointVector(points *C.PoseKeypointVector) {
C.FreePoseKeypointVector(points)
C.free(unsafe.Pointer(points))
}
// GoKeypointVector convert *C.PoseKeypointVector to Keypoint slice
func GoKeypointVector(c *C.PoseKeypointVector, w float64, h float64) []Keypoint {
l := int(c.length)
ret := make([]Keypoint, 0, l)
ptr := unsafe.Pointer(c.points)
for i := 0; i < l; i++ {
cKeypoint := (*C.PoseKeypoint)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_PoseKeypoint*C.int(i))))
ret = append(ret, GoKeypoint(cKeypoint, w, h))
}
return ret
}

View File

@@ -1,42 +0,0 @@
package pose
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/pose/common.h"
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// ROI represents detected person roi
type ROI struct {
// Score detected score
Score float32
// Rect roi location
Rect common.Rectangle
// Keypoints
Keypoints []Keypoint
}
// NewROIVector returns *C.PoseROIVector
func NewCROIVector() *C.PoseROIVector {
return (*C.PoseROIVector)(C.malloc(C.sizeof_PoseROIVector))
}
// FreeCROIVector release *C.PoseROIVectore memory
func FreeCROIVector(p *C.PoseROIVector) {
C.FreePoseROIVector(p)
C.free(unsafe.Pointer(p))
}
func CROIVectiorLength(c *C.PoseROIVector) int {
return int(c.length)
}
func CROIVectorPtr(c *C.PoseROIVector) unsafe.Pointer {
return unsafe.Pointer(c.items)
}

View File

@@ -61,12 +61,10 @@ target_include_directories(openvision
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/common>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/pose>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/common>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
) )
@@ -87,14 +85,12 @@ file(COPY
) )
file(COPY file(COPY
${CMAKE_CURRENT_SOURCE_DIR}/hand/common.h
${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter.h
${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h ${CMAKE_CURRENT_SOURCE_DIR}/hand/pose.h
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/hand
) )
file(COPY file(COPY
${CMAKE_CURRENT_SOURCE_DIR}/pose/common.h
${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter.h ${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter.h
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/pose DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/pose
) )

View File

@@ -2,6 +2,7 @@
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
#include <math.h> #include <math.h>
#include <float.h>
#ifdef OV_VULKAN #ifdef OV_VULKAN
#include "gpu.h" #include "gpu.h"
@@ -42,6 +43,13 @@ void FreePoint2fVector(Point2fVector* p) {
} }
} }
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val) {
if (p->points == NULL || i >= p->length) {
return;
}
p->points[i] = *val;
}
void FreeFloatVector(FloatVector *p) { void FreeFloatVector(FloatVector *p) {
if (p->values != NULL) { if (p->values != NULL) {
free(p->values); free(p->values);
@@ -56,6 +64,37 @@ void FreeBytes(Bytes *p) {
} }
} }
void FreeKeypointVector(KeypointVector *p) {
if (p->points != NULL) {
free(p->points);
p->points = NULL;
}
}
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val) {
if (p->points == NULL || i >= p->length) {
return;
}
p->points[i] = *val;
}
void FreeObjectInfo(ObjectInfo *p) {
if (p->pts != NULL) {
FreeKeypointVector(p->pts);
free(p->pts);
p->pts = NULL;
}
}
void FreeObjectInfoVector(ObjectInfoVector *p) {
if (p->items!=NULL) {
for (int i=0; i < p->length; i ++) {
FreeObjectInfo(&p->items[i]);
}
free(p->items);
p->items= NULL;
}
}
namespace ov { namespace ov {
int RatioAnchors(const Rect & anchor, int RatioAnchors(const Rect & anchor,
@@ -164,4 +203,105 @@ void RectifyRect(Rect* rect) {
rect->height = max_side; rect->height = max_side;
} }
void qsort_descent_inplace(std::vector<ObjectInfo>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
void qsort_descent_inplace(std::vector<ObjectInfo>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
void nms_sorted_bboxes(const std::vector<ObjectInfo>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const ObjectInfo& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const ObjectInfo& b = objects[picked[j]];
// intersection over union
float inter_area = InterRectArea(a.rect, b.rect);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
{
for (auto stride : strides)
{
int num_grid = target_size / stride;
for (int g1 = 0; g1 < num_grid; g1++)
{
for (int g0 = 0; g0 < num_grid; g0++)
{
grid_strides.push_back((GridAndStride){g0, g1, stride});
}
}
}
return 0;
}
float sigmoid(float x)
{
return static_cast<float>(1.f / (1.f + exp(-x)));
}
} }

View File

@@ -11,6 +11,7 @@ typedef ov::Size Size;
typedef ov::Point Point; typedef ov::Point Point;
typedef ov::Point2f Point2f; typedef ov::Point2f Point2f;
typedef ov::Rect Rect; typedef ov::Rect Rect;
typedef ov::Keypoint Keypoint;
#else #else
// Wrapper for an individual cv::cvSize // Wrapper for an individual cv::cvSize
@@ -40,6 +41,12 @@ typedef struct Rect {
int height; int height;
} Rect; } Rect;
typedef struct Keypoint {
Point2f p;
float prob;
} Keypoint;
#endif #endif
typedef void* IEstimator; typedef void* IEstimator;
@@ -56,6 +63,7 @@ typedef struct Point2fVector {
} Point2fVector; } Point2fVector;
void FreePoint2fVector(Point2fVector *p); void FreePoint2fVector(Point2fVector *p);
void Point2fVectorSetValue(Point2fVector *p, int i, const Point2f* val);
typedef struct RectVector { typedef struct RectVector {
Rect* rects; Rect* rects;
@@ -78,6 +86,30 @@ typedef struct Bytes {
void FreeBytes(Bytes *p); void FreeBytes(Bytes *p);
typedef struct KeypointVector {
Keypoint* points;
int length;
} KeypointVector;
void FreeKeypointVector(KeypointVector *p);
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
typedef struct ObjectInfoC {
Rect rect;
float prob;
int label;
KeypointVector* pts;
} ObjectInfo;
void FreeObjectInfo(ObjectInfo *p);
typedef struct ObjectInfoVector {
ObjectInfo* items;
int length;
} ObjectInfoVector;
void FreeObjectInfoVector(ObjectInfoVector *p);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -4,6 +4,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include "config.h" #include "config.h"
#include "net.h"
#ifdef OV_OPENMP #ifdef OV_OPENMP
#include <omp.h> #include <omp.h>
#endif #endif
@@ -80,10 +81,23 @@ struct ImageInfo {
float score_; float score_;
}; };
struct Keypoint {
ov::Point2f p;
float prob;
};
struct ObjectInfo { struct ObjectInfo {
Rect location_; Rect rect;
float score_; float prob;
std::string name_; int label;
std::vector<Point2f> pts;
};
struct GridAndStride
{
int grid0;
int grid1;
int stride;
}; };
int RatioAnchors(const Rect & anchor, int RatioAnchors(const Rect & anchor,
@@ -140,6 +154,16 @@ int const NMS(const std::vector<T>& inputs, std::vector<T>* result,
return 0; return 0;
} }
void qsort_descent_inplace(std::vector<ObjectInfo>& objects, int left, int right);
void qsort_descent_inplace(std::vector<ObjectInfo>& objects);
void nms_sorted_bboxes(const std::vector<ObjectInfo>& objects, std::vector<int>& picked, float nms_threshold);
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides);
float sigmoid(float x);
void EnlargeRect(const float& scale, Rect* rect); void EnlargeRect(const float& scale, Rect* rect);
void RectifyRect(Rect* rect); void RectifyRect(Rect* rect);

View File

@@ -11,6 +11,7 @@ extern "C" {
IFaceDetecter new_retinaface(); IFaceDetecter new_retinaface();
IFaceDetecter new_centerface(); IFaceDetecter new_centerface();
IFaceDetecter new_mtcnn(); IFaceDetecter new_mtcnn();
IFaceDetecter new_yoloface();
IFaceDetecter new_anticonv(); IFaceDetecter new_anticonv();
int detect_face(IFaceDetecter d, const unsigned char* rgbdata, int img_width, int img_height, FaceInfoVector* faces); int detect_face(IFaceDetecter d, const unsigned char* rgbdata, int img_width, int img_height, FaceInfoVector* faces);
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -3,6 +3,7 @@
#include "mtcnn/mtcnn.hpp" #include "mtcnn/mtcnn.hpp"
#include "retinaface/retinaface.hpp" #include "retinaface/retinaface.hpp"
#include "anticonv/anticonv.hpp" #include "anticonv/anticonv.hpp"
#include "yoloface/yoloface.hpp"
IFaceDetecter new_retinaface() { IFaceDetecter new_retinaface() {
return new ovface::RetinaFace(); return new ovface::RetinaFace();
@@ -16,6 +17,10 @@ IFaceDetecter new_mtcnn() {
return new ovface::Mtcnn(); return new ovface::Mtcnn();
} }
IFaceDetecter new_yoloface() {
return new ovface::YoloFace();
}
IFaceDetecter new_anticonv() { IFaceDetecter new_anticonv() {
return new ovface::AntiConv(); return new ovface::AntiConv();
} }
@@ -49,6 +54,10 @@ Detecter* RetinafaceFactory::CreateDetecter() {
return new RetinaFace(); return new RetinaFace();
} }
Detecter* YoloFaceFactory::CreateDetecter() {
return new YoloFace();
}
Detecter* AnticonvFactory::CreateDetecter() { Detecter* AnticonvFactory::CreateDetecter() {
return new AntiConv(); return new AntiConv();
} }

View File

@@ -44,6 +44,13 @@ public:
Detecter* CreateDetecter(); Detecter* CreateDetecter();
}; };
class YoloFaceFactory : public DetecterFactory {
public:
YoloFaceFactory() {}
~YoloFaceFactory() {}
Detecter* CreateDetecter();
};
class AnticonvFactory : public DetecterFactory { class AnticonvFactory : public DetecterFactory {
public: public:
AnticonvFactory() {} AnticonvFactory() {}

View File

@@ -0,0 +1,290 @@
#include "yoloface.hpp"
#include "../../../common/yolov5focus.hpp"
#include <string>
#include <float.h>
#ifdef OV_VULKAN
#include "gpu.h"
#endif // OV_VULKAN
namespace ovface {
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
{
const int num_grid = feat_blob.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
{
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
}
else
{
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = feat_blob.w - 5-10;
const int num_anchors = anchors.w / 2;
for (int q = 0; q < num_anchors; q++)
{
const float anchor_w = anchors[q * 2];
const float anchor_h = anchors[q * 2 + 1];
const ncnn::Mat feat = feat_blob.channel(q);
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const float* featptr = feat.row(i * num_grid_x + j);
// find class index with max class score
int class_index = 0;
float class_score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
float score = featptr[5 +10+ k];
if (score > class_score)
{
class_index = k;
class_score = score;
}
}
float box_score = featptr[4];
float confidence = ov::sigmoid(box_score); //* sigmoid(class_score);
if (confidence >= prob_threshold)
{
// yolov5/models/yolo.py Detect forward
// y = x[i].sigmoid()
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
float dx = ov::sigmoid(featptr[0]);
float dy = ov::sigmoid(featptr[1]);
float dw = ov::sigmoid(featptr[2]);
float dh = ov::sigmoid(featptr[3]);
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
float pb_w = pow(dw * 2.f, 2) * anchor_w;
float pb_h = pow(dh * 2.f, 2) * anchor_h;
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
ov::ObjectInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = confidence;
for (int l = 0; l < 5; l++)
{
float x = featptr[2 * l + 5] * anchor_w + j * stride;
float y = featptr[2 * l + 1 + 5] * anchor_h + i * stride;
obj.pts.push_back(ov::Point2f(x, y));
}
objects.push_back(obj);
}
}
}
}
}
YoloFace::YoloFace() :
net_ (new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
YoloFace::~YoloFace() {
net_->clear();
}
int YoloFace::LoadModel(const char * root_path) {
register_yolov5focus(net_);
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
int YoloFace::DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces) {
faces->clear();
if (!initialized_) {
return 10000;
}
if (rgbdata == 0){
return 10001;
}
// letterbox pad to multiple of 32
int w = img_width;
int h = img_height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
// pad to target_size rectangle
// yolov5/utils/datasets.py letterbox
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = net_->create_extractor();
ex.input("data", in_pad);
std::vector<ov::ObjectInfo> proposals;
// anchor setting from yolov5/models/yolov5s.yaml
// stride 8
{
ncnn::Mat out;
ex.extract("981", out);
ncnn::Mat anchors(6);
anchors[0] = 4.f;
anchors[1] = 5.f;
anchors[2] = 8.f;
anchors[3] = 10.f;
anchors[4] = 13.f;
anchors[5] = 16.f;
std::vector<ov::ObjectInfo> objects8;
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
}
// stride 16
{
ncnn::Mat out;
ex.extract("983", out);
ncnn::Mat anchors(6);
anchors[0] = 23.f;
anchors[1] = 29.f;
anchors[2] = 43.f;
anchors[3] = 55.f;
anchors[4] = 73.f;
anchors[5] = 105.f;
std::vector<ov::ObjectInfo> objects16;
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
}
// stride 32
{
ncnn::Mat out;
ex.extract("985", out);
ncnn::Mat anchors(6);
anchors[0] = 146.f;
anchors[1] = 217.f;
anchors[2] = 231.f;
anchors[3] = 300.f;
anchors[4] = 335.f;
anchors[5] = 433.f;
std::vector<ov::ObjectInfo> objects32;
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
for (int i = 0; i < count; i++)
{
ov::ObjectInfo obj = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (obj.rect.x - (float(wpad) / 2)) / scale;
float y0 = (obj.rect.y - (float(hpad) / 2)) / scale;
float x1 = (obj.rect.x + obj.rect.width - (float(wpad) / 2)) / scale;
float y1 = (obj.rect.y + obj.rect.height - (float(hpad) / 2)) / scale;
for (int j = 0; j < obj.pts.size(); j++)
{
float ptx = (obj.pts[j].x - (float(wpad) / 2)) / scale;
float pty = (obj.pts[j].y - (float(hpad) / 2)) / scale;
obj.pts[j] = ov::Point2f(ptx, pty);
}
// clip
x0 = std::max(std::min(x0, (float)(img_width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_height - 1)), 0.f);
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
FaceInfo info;
info.location_ = obj.rect;
for (int k = 0; k < 5; ++k) {
info.keypoints_[k] = obj.pts[k].x;
info.keypoints_[k + 5] = obj.pts[k].y;
}
faces->push_back(info);
}
return 0;
}
}

View File

@@ -0,0 +1,30 @@
#ifndef _YOLOFACE_H_
#define _YOLOFACE_H_
#include "../detecter.hpp"
#include "net.h"
namespace ovface {
class YoloFace : public Detecter {
public:
YoloFace();
~YoloFace();
int LoadModel(const char* root_path);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
ncnn::Net* net_;
bool initialized_;
const int target_size = 640;
const float mean_vals[3] = {127.f, 127.f, 127.f};
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
const float prob_threshold = 0.25f;
const float nms_threshold = 0.45f;
};
}
#endif // !_RETINAFACE_H_

View File

@@ -1,29 +0,0 @@
#ifndef _HAND_COMMON_C_H_
#define _HAND_COMMON_C_H_
#include "../common/common.h"
#ifdef __cplusplus
#include "common/common.hpp"
extern "C" {
#endif
#ifdef __cplusplus
typedef ovhand::HandROI HandROI;
#else
typedef struct HandROI {
Rect rect;
int label;
float prob;
} HandROI;
#endif
typedef struct HandROIVector {
HandROI* items;
int length;
} HandROIVector;
void FreeHandROIVector(HandROIVector *p);
#ifdef __cplusplus
}
#endif
#endif // !_HAND_COMMON_C_H_

View File

@@ -1,265 +0,0 @@
#include "../common.h"
#include <float.h>
#include <math.h>
void FreeHandROIVector(HandROIVector *p) {
if (p->items!=NULL) {
free(p->items);
p->items= NULL;
}
}
namespace ovhand {
inline float intersection_area(const HandROI& a, const HandROI& b)
{
ov::Rect inter = a.rect & b.rect;
return inter.area();
}
void qsort_descent_inplace(std::vector<HandROI>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
while (i <= j)
{
while (objects[i].prob > p)
i++;
while (objects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
void qsort_descent_inplace(std::vector<HandROI>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
void nms_sorted_bboxes(const std::vector<HandROI>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const HandROI& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const HandROI& b = objects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
{
for (auto stride : strides)
{
int num_grid = target_size / stride;
for (int g1 = 0; g1 < num_grid; g1++)
{
for (int g0 = 0; g0 < num_grid; g0++)
{
grid_strides.push_back((GridAndStride){g0, g1, stride});
}
}
}
return 0;
}
void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<HandROI>& objects)
{
const int num_grid = feat_blob.h;
const int num_class = feat_blob.w - 5;
const int num_anchors = grid_strides.size();
const float* feat_ptr = feat_blob.channel(0);
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)
{
const int grid0 = grid_strides[anchor_idx].grid0;
const int grid1 = grid_strides[anchor_idx].grid1;
const int stride = grid_strides[anchor_idx].stride;
// yolox/models/yolo_head.py decode logic
// outputs[..., :2] = (outputs[..., :2] + grids) * strides
// outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
float x_center = (feat_ptr[0] + grid0) * stride;
float y_center = (feat_ptr[1] + grid1) * stride;
float w = exp(feat_ptr[2]) * stride;
float h = exp(feat_ptr[3]) * stride;
float x0 = x_center - w * 0.5f;
float y0 = y_center - h * 0.5f;
float box_objectness = feat_ptr[4];
for (int class_idx = 0; class_idx < num_class; class_idx++)
{
float box_cls_score = feat_ptr[5 + class_idx];
float box_prob = box_objectness * box_cls_score;
if (box_prob > prob_threshold)
{
HandROI obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = w;
obj.rect.height = h;
obj.label = class_idx;
obj.prob = box_prob;
objects.push_back(obj);
}
} // class loop
feat_ptr += feat_blob.w;
} // point anchor loop
}
void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<HandROI>& objects)
{
const int num_grid = cls_pred.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
{
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
}
else
{
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = cls_pred.w;
const int reg_max_1 = dis_pred.w / 4;
//__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
//__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const int idx = i * num_grid_x + j;
const float* scores = cls_pred.row(idx);
// find label with max score
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
if (scores[k] > score)
{
label = k;
score = scores[k];
}
}
if (score >= prob_threshold)
{
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (j + 0.5f) * stride;
float pb_cy = (i + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
HandROI obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
}

View File

@@ -1,35 +0,0 @@
#ifndef _HAND_COMMON_H_
#define _HAND_COMMON_H_
#include "../../common/common.h"
#include "net.h"
#include <vector>
namespace ovhand {
struct HandROI {
ov::Rect rect;
int label;
float prob;
};
struct GridAndStride
{
int grid0;
int grid1;
int stride;
};
inline float intersection_area(const HandROI& a, const HandROI& b);
void qsort_descent_inplace(std::vector<HandROI>& objects, int left, int right);
void qsort_descent_inplace(std::vector<HandROI>& objects);
void nms_sorted_bboxes(const std::vector<HandROI>& objects, std::vector<int>& picked, float nms_threshold);
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides);
void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<HandROI>& objects);
void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<HandROI>& objects);
}
#endif // !_HAND_COMMON_H_

View File

@@ -1,7 +1,7 @@
#ifndef _HAND_DETECTER_C_H_ #ifndef _HAND_DETECTER_C_H_
#define _HAND_DETECTER_C_H_ #define _HAND_DETECTER_C_H_
#include "common.h" #include "../common/common.h"
#ifdef __cplusplus #ifdef __cplusplus
#include "detecter/detecter.hpp" #include "detecter/detecter.hpp"
@@ -12,7 +12,7 @@ extern "C" {
IHandDetecter new_nanodet(); IHandDetecter new_nanodet();
int extract_hand_rois(IHandDetecter d, const unsigned char* rgbdata, int extract_hand_rois(IHandDetecter d, const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
HandROIVector* rois); ObjectInfoVector* rois);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -10,17 +10,23 @@ IHandDetecter new_nanodet() {
return new ovhand::Nanodet(); return new ovhand::Nanodet();
} }
int extract_hand_rois(IHandDetecter d, const unsigned char* rgbdata, int img_width, int img_height, HandROIVector* rois) { int extract_hand_rois(IHandDetecter d, const unsigned char* rgbdata, int img_width, int img_height, ObjectInfoVector* rois) {
std::vector<HandROI> detected; std::vector<ov::ObjectInfo> detected;
int ret = static_cast<ovhand::Detecter*>(d)->Detect(rgbdata, img_width, img_height, &detected); int ret = static_cast<ovhand::Detecter*>(d)->Detect(rgbdata, img_width, img_height, detected);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
rois->length = detected.size(); rois->length = detected.size();
rois->items = (HandROI*)malloc(rois->length * sizeof(HandROI)); rois->items = (ObjectInfo*)malloc(rois->length * sizeof(ObjectInfo));
for (size_t i = 0; i < detected.size(); ++i) { for (size_t i = 0; i < detected.size(); ++i) {
rois->items[i] = detected[i]; ov::ObjectInfo o = detected[i];
rois->items[i] = ObjectInfo{
o.rect,
o.prob,
o.label,
NULL
};
} }
return 0; return 0;
} }

View File

@@ -8,7 +8,7 @@ public:
virtual ~Detecter() {}; virtual ~Detecter() {};
virtual int Detect(const unsigned char*rgbdata, virtual int Detect(const unsigned char*rgbdata,
int img_width, int img_height, int img_width, int img_height,
std::vector<HandROI>* rois) = 0; std::vector<ov::ObjectInfo>& rois) = 0;
}; };
class DetecterFactory { class DetecterFactory {

View File

@@ -1,11 +1,114 @@
#include "nanodet.hpp" #include "nanodet.hpp"
#include <string> #include <string>
#include <float.h>
#ifdef OV_VULKAN #ifdef OV_VULKAN
#include "gpu.h" #include "gpu.h"
#endif // OV_VULKAN #endif // OV_VULKAN
namespace ovhand { namespace ovhand {
static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
{
const int num_grid = cls_pred.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
{
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
}
else
{
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
}
const int num_class = cls_pred.w;
const int reg_max_1 = dis_pred.w / 4;
//__android_log_print(ANDROID_LOG_WARN, "ncnn","cls_pred h %d, w %d",cls_pred.h,cls_pred.w);
//__android_log_print(ANDROID_LOG_WARN, "ncnn","%d,%d,%d,%d",num_grid_x,num_grid_y,num_class,reg_max_1);
for (int i = 0; i < num_grid_y; i++)
{
for (int j = 0; j < num_grid_x; j++)
{
const int idx = i * num_grid_x + j;
const float* scores = cls_pred.row(idx);
// find label with max score
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
if (scores[k] > score)
{
label = k;
score = scores[k];
}
}
if (score >= prob_threshold)
{
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * stride;
}
float pb_cx = (j + 0.5f) * stride;
float pb_cy = (i + 0.5f) * stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
ov::ObjectInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
objects.push_back(obj);
}
}
}
}
Nanodet::Nanodet() : Nanodet::Nanodet() :
net_ (new ncnn::Net()), net_ (new ncnn::Net()),
initialized_(false) { initialized_(false) {
@@ -31,7 +134,7 @@ int Nanodet::LoadModel(const char * root_path) {
int Nanodet::Detect(const unsigned char* rgbdata, int Nanodet::Detect(const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
std::vector<HandROI>* rois) { std::vector<ov::ObjectInfo>& rois) {
if (!initialized_) { if (!initialized_) {
return 10000; return 10000;
} }
@@ -39,8 +142,6 @@ int Nanodet::Detect(const unsigned char* rgbdata,
return 10001; return 10001;
} }
const int target_size = 320;
int w = img_width; int w = img_width;
int h = img_height; int h = img_height;
float scale = 1.f; float scale = 1.f;
@@ -62,17 +163,13 @@ int Nanodet::Detect(const unsigned char* rgbdata,
ncnn::Mat in_pad; ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f};
in_pad.substract_mean_normalize(mean_vals, norm_vals); in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = net_->create_extractor(); ncnn::Extractor ex = net_->create_extractor();
//__android_log_print(ANDROID_LOG_WARN, "ncnn","input w:%d,h:%d",in_pad.w,in_pad.h); //__android_log_print(ANDROID_LOG_WARN, "ncnn","input w:%d,h:%d",in_pad.w,in_pad.h);
ex.input("input.1", in_pad); ex.input("input.1", in_pad);
const float prob_threshold = 0.4f; std::vector<ov::ObjectInfo> proposals;
const float nms_threshold = 0.5f;
std::vector<HandROI> proposals;
// stride 8 // stride 8
{ {
ncnn::Mat cls_pred; ncnn::Mat cls_pred;
@@ -80,7 +177,7 @@ int Nanodet::Detect(const unsigned char* rgbdata,
ex.extract("cls_pred_stride_8", cls_pred); ex.extract("cls_pred_stride_8", cls_pred);
ex.extract("dis_pred_stride_8", dis_pred); ex.extract("dis_pred_stride_8", dis_pred);
std::vector<HandROI> objects8; std::vector<ov::ObjectInfo> objects8;
generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8); generate_nanodet_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end()); proposals.insert(proposals.end(), objects8.begin(), objects8.end());
@@ -93,7 +190,7 @@ int Nanodet::Detect(const unsigned char* rgbdata,
ex.extract("cls_pred_stride_16", cls_pred); ex.extract("cls_pred_stride_16", cls_pred);
ex.extract("dis_pred_stride_16", dis_pred); ex.extract("dis_pred_stride_16", dis_pred);
std::vector<HandROI> objects16; std::vector<ov::ObjectInfo> objects16;
generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16); generate_nanodet_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end()); proposals.insert(proposals.end(), objects16.begin(), objects16.end());
@@ -106,7 +203,7 @@ int Nanodet::Detect(const unsigned char* rgbdata,
ex.extract("cls_pred_stride_32", cls_pred); ex.extract("cls_pred_stride_32", cls_pred);
ex.extract("dis_pred_stride_32", dis_pred); ex.extract("dis_pred_stride_32", dis_pred);
std::vector<HandROI> objects32; std::vector<ov::ObjectInfo> objects32;
generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32); generate_nanodet_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end()); proposals.insert(proposals.end(), objects32.begin(), objects32.end());
@@ -120,10 +217,11 @@ int Nanodet::Detect(const unsigned char* rgbdata,
nms_sorted_bboxes(proposals, picked, nms_threshold); nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size(); int count = picked.size();
rois.resize(count);
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
HandROI roi = proposals[picked[i]]; ov::ObjectInfo roi = proposals[picked[i]];
// adjust offset to original unpadded // adjust offset to original unpadded
float x0 = (roi.rect.x - (wpad / 2)) / scale; float x0 = (roi.rect.x - (wpad / 2)) / scale;
@@ -142,17 +240,17 @@ int Nanodet::Detect(const unsigned char* rgbdata,
roi.rect.width = x1 - x0; roi.rect.width = x1 - x0;
roi.rect.height = y1 - y0; roi.rect.height = y1 - y0;
rois->push_back(roi); rois[i] = roi;
} }
// sort objects by area // sort objects by area
struct struct
{ {
bool operator()(const HandROI& a, const HandROI& b) const bool operator()(const ov::ObjectInfo& a, const ov::ObjectInfo& b) const
{ {
return a.rect.area() > b.rect.area(); return a.rect.area() > b.rect.area();
} }
} objects_area_greater; } objects_area_greater;
std::sort(rois->begin(), rois->end(), objects_area_greater); std::sort(rois.begin(), rois.end(), objects_area_greater);
return 0; return 0;
} }
} }

View File

@@ -15,11 +15,16 @@ public:
int LoadModel(const char* root_path); int LoadModel(const char* root_path);
int Detect(const unsigned char* rgbadata, int Detect(const unsigned char* rgbadata,
int img_width, int img_height, int img_width, int img_height,
std::vector<HandROI>* rois); std::vector<ov::ObjectInfo>& rois);
private: private:
ncnn::Net* net_; ncnn::Net* net_;
bool initialized_; bool initialized_;
const int target_size = 320;
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f};
const float prob_threshold = 0.4f;
const float nms_threshold = 0.5f;
}; };
} }

View File

@@ -8,6 +8,55 @@
namespace ovhand { namespace ovhand {
static void generate_yolox_proposals(std::vector<ov::GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<ov::ObjectInfo>& objects)
{
const int num_grid = feat_blob.h;
const int num_class = feat_blob.w - 5;
const int num_anchors = grid_strides.size();
const float* feat_ptr = feat_blob.channel(0);
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)
{
const int grid0 = grid_strides[anchor_idx].grid0;
const int grid1 = grid_strides[anchor_idx].grid1;
const int stride = grid_strides[anchor_idx].stride;
// yolox/models/yolo_head.py decode logic
// outputs[..., :2] = (outputs[..., :2] + grids) * strides
// outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
float x_center = (feat_ptr[0] + grid0) * stride;
float y_center = (feat_ptr[1] + grid1) * stride;
float w = exp(feat_ptr[2]) * stride;
float h = exp(feat_ptr[3]) * stride;
float x0 = x_center - w * 0.5f;
float y0 = y_center - h * 0.5f;
float box_objectness = feat_ptr[4];
for (int class_idx = 0; class_idx < num_class; class_idx++)
{
float box_cls_score = feat_ptr[5 + class_idx];
float box_prob = box_objectness * box_cls_score;
if (box_prob > prob_threshold)
{
ov::ObjectInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = w;
obj.rect.height = h;
obj.label = class_idx;
obj.prob = box_prob;
objects.push_back(obj);
}
} // class loop
feat_ptr += feat_blob.w;
} // point anchor loop
}
Yolox::Yolox() : Yolox::Yolox() :
net_ (new ncnn::Net()), net_ (new ncnn::Net()),
initialized_(false) { initialized_(false) {
@@ -34,7 +83,7 @@ int Yolox::LoadModel(const char * root_path) {
int Yolox::Detect(const unsigned char* rgbdata, int Yolox::Detect(const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
std::vector<HandROI>* rois) { std::vector<ov::ObjectInfo>& rois) {
if (!initialized_) { if (!initialized_) {
return 10000; return 10000;
} }
@@ -42,8 +91,6 @@ int Yolox::Detect(const unsigned char* rgbdata,
return 10001; return 10001;
} }
const int target_size = 416;
int w = img_width; int w = img_width;
int h = img_height; int h = img_height;
float scale = 1.f; float scale = 1.f;
@@ -65,8 +112,6 @@ int Yolox::Detect(const unsigned char* rgbdata,
ncnn::Mat in_pad; ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f); ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);
const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f};
const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)};
// so for 0-255 input image, rgb_mean should multiply 255 and norm should div by std. // so for 0-255 input image, rgb_mean should multiply 255 and norm should div by std.
in_pad.substract_mean_normalize(mean_vals, norm_vals); in_pad.substract_mean_normalize(mean_vals, norm_vals);
@@ -76,13 +121,11 @@ int Yolox::Detect(const unsigned char* rgbdata,
ncnn::Mat out; ncnn::Mat out;
ex.extract("output", out); ex.extract("output", out);
const float prob_threshold = 0.45f; std::vector<ov::ObjectInfo> proposals;
const float nms_threshold = 0.65f;
std::vector<HandROI> proposals;
{ {
std::vector<int> strides = {8, 16, 32}; // might have stride=64 std::vector<int> strides = {8, 16, 32}; // might have stride=64
std::vector<GridAndStride> grid_strides; std::vector<ov::GridAndStride> grid_strides;
generate_grids_and_stride(target_size, strides, grid_strides); generate_grids_and_stride(target_size, strides, grid_strides);
generate_yolox_proposals(grid_strides, out, prob_threshold, proposals); generate_yolox_proposals(grid_strides, out, prob_threshold, proposals);
} }
@@ -96,9 +139,11 @@ int Yolox::Detect(const unsigned char* rgbdata,
int count = picked.size(); int count = picked.size();
rois.resize(count);
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
HandROI roi = proposals[picked[i]]; ov::ObjectInfo roi = proposals[picked[i]];
// adjust offset to original unpadded // adjust offset to original unpadded
float x0 = (roi.rect.x) / scale; float x0 = (roi.rect.x) / scale;
@@ -117,7 +162,7 @@ int Yolox::Detect(const unsigned char* rgbdata,
roi.rect.width = x1 - x0; roi.rect.width = x1 - x0;
roi.rect.height = y1 - y0; roi.rect.height = y1 - y0;
rois->push_back(roi); rois[i] = roi;
} }
return 0; return 0;

View File

@@ -15,11 +15,16 @@ public:
int LoadModel(const char* root_path); int LoadModel(const char* root_path);
int Detect(const unsigned char* rgbadata, int Detect(const unsigned char* rgbadata,
int img_width, int img_height, int img_width, int img_height,
std::vector<HandROI>* rois); std::vector<ov::ObjectInfo>& rois);
private: private:
ncnn::Net* net_; ncnn::Net* net_;
bool initialized_; bool initialized_;
const int target_size = 416;
const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f};
const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)};
const float prob_threshold = 0.45f;
const float nms_threshold = 0.65f;
}; };
} }

View File

@@ -1,7 +1,7 @@
#ifndef _HAND_POSE_C_H_ #ifndef _HAND_POSE_C_H_
#define _HAND_POSE_C_H_ #define _HAND_POSE_C_H_
#include "common.h" #include "../common/common.h"
#ifdef __cplusplus #ifdef __cplusplus
#include "pose/estimator.hpp" #include "pose/estimator.hpp"

View File

@@ -10,9 +10,9 @@ int hand_pose(IHandPoseEstimator d, const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
const Rect* rect, const Rect* rect,
Point2fVector* keypoints) { Point2fVector* keypoints) {
std::vector<ov::Point2f>points; std::vector<ov::Point2f> points;
int ret = static_cast<ovhand::HandPose*>(d)->Detect(rgbdata, img_width, img_height, *rect, &points); int ret = static_cast<ovhand::HandPose*>(d)->Detect(rgbdata, img_width, img_height, *rect, points);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }

View File

@@ -11,7 +11,7 @@ public:
virtual int Detect(const unsigned char*rgbdata, virtual int Detect(const unsigned char*rgbdata,
int img_width, int img_height, int img_width, int img_height,
const ov::Rect& rect, const ov::Rect& rect,
std::vector<ov::Point2f>* keypoints) = 0; std::vector<ov::Point2f>& keypoints) = 0;
}; };
class PoseEstimatorFactory { class PoseEstimatorFactory {

View File

@@ -32,8 +32,8 @@ int HandPose::LoadModel(const char * root_path) {
int HandPose::Detect(const unsigned char* rgbdata, int HandPose::Detect(const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
const ov::Rect& rect, const ov::Rect& rect,
std::vector<ov::Point2f>* keypoints) { std::vector<ov::Point2f>& keypoints) {
keypoints->clear(); keypoints.clear();
if (!initialized_) { if (!initialized_) {
return 10000; return 10000;
} }
@@ -58,6 +58,8 @@ int HandPose::Detect(const unsigned char* rgbdata,
ex1.input("input", ncnn_in); ex1.input("input", ncnn_in);
ncnn::Mat ncnn_out; ncnn::Mat ncnn_out;
ex1.extract("output", ncnn_out); ex1.extract("output", ncnn_out);
keypoints.resize(21);
for (int c = 0; c < ncnn_out.c; c++) for (int c = 0; c < ncnn_out.c; c++)
{ {
ncnn::Mat data = ncnn_out.channel(c); ncnn::Mat data = ncnn_out.channel(c);
@@ -66,8 +68,7 @@ int HandPose::Detect(const unsigned char* rgbdata,
{ {
float pt_x = ptr[j * 2] * rect.width; float pt_x = ptr[j * 2] * rect.width;
float pt_y = ptr[j * 2 + 1] * rect.height; float pt_y = ptr[j * 2 + 1] * rect.height;
keypoints->push_back(ov::Point2f(pt_x + rect.x, pt_y + rect.y)); keypoints[j] = ov::Point2f(pt_x + rect.x, pt_y + rect.y);
} }
} }
free(crop_img); free(crop_img);

View File

@@ -16,11 +16,13 @@ public:
int Detect(const unsigned char* rgbdata, int Detect(const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
const ov::Rect& rect, const ov::Rect& rect,
std::vector<ov::Point2f>* keypoints); std::vector<ov::Point2f>& keypoints);
private: private:
ncnn::Net* net_; ncnn::Net* net_;
bool initialized_; bool initialized_;
const float meanVals[3] = { 128.0f, 128.0f, 128.0f };
const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f };
}; };
} }

View File

@@ -1,41 +0,0 @@
#ifndef _POSE_COMMON_C_H_
#define _POSE_COMMON_C_H_
#include "../common/common.h"
#ifdef __cplusplus
#include "common/common.hpp"
extern "C" {
#endif
#ifdef __cplusplus
typedef ovpose::PoseKeypoint PoseKeypoint;
typedef ovpose::PoseROI PoseROI;
#else
typedef struct PoseKeypoint {
Point2f p;
float prob;
} PoseKeypoint;
typedef struct PoseROI {
Rect rect;
unsigned char *data;
float score;
} PoseROI;
#endif
typedef struct PoseROIVector {
PoseROI* items;
int length;
} PoseROIVector;
typedef struct PoseKeypointVector {
PoseKeypoint* points;
int length;
} PoseKeypointVector;
void FreePoseKeypointVector(PoseKeypointVector *p);
void FreePoseROI(PoseROI *p);
void FreePoseROIVector(PoseROIVector *p);
#ifdef __cplusplus
}
#endif
#endif // !_POSE_COMMON_C_H_

View File

@@ -1,25 +0,0 @@
#include "../common.h"
void FreePoseKeypointVector(PoseKeypointVector *p) {
if (p->points != NULL) {
free(p->points);
p->points = NULL;
}
}
void FreePoseROI(PoseROI *p) {
if (p->data!= NULL) {
free(p->data);
p->data= NULL;
}
}
void FreePoseROIVector(PoseROIVector *p) {
if (p->items!= NULL) {
for (int i=0; i < p->length; i ++) {
FreePoseROI(&p->items[i]);
}
free(p->items);
p->items= NULL;
}
}

View File

@@ -1,18 +0,0 @@
#ifndef _POSE_COMMON_H_
#define _POSE_COMMON_H_
#include "../../common/common.h"
namespace ovpose {
struct PoseKeypoint {
ov::Point2f p;
float prob;
};
struct PoseROI {
ov::Rect rect;
unsigned char *data;
float score;
};
}
#endif // !_POSE_COMMON_H_

View File

@@ -1,7 +1,7 @@
#ifndef _POSE_DETECTER_C_H_ #ifndef _POSE_DETECTER_C_H_
#define _POSE_DETECTER_C_H_ #define _POSE_DETECTER_C_H_
#include "common.h" #include "../common/common.h"
#ifdef __cplusplus #ifdef __cplusplus
#include "detecter/detecter.hpp" #include "detecter/detecter.hpp"
@@ -11,8 +11,10 @@ extern "C" {
IPoseDetecter new_ultralight(); IPoseDetecter new_ultralight();
int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata, int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
PoseROIVector* rois); ObjectInfoVector* rois);
int extract_pose_keypoints(IPoseDetecter d, const PoseROI* roi, PoseKeypointVector* keypoints); int extract_pose_keypoints(IPoseDetecter d, const unsigned char* rgbdata,
int img_width, int img_height,
const Rect* rect, KeypointVector* keypoints);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -5,29 +5,35 @@ IPoseDetecter new_ultralight() {
return new ovpose::Ultralight(); return new ovpose::Ultralight();
} }
int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata, int img_width, int img_height, PoseROIVector* rois) { int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata, int img_width, int img_height, ObjectInfoVector* rois) {
std::vector<PoseROI> detected; std::vector<ov::ObjectInfo> detected;
int ret = static_cast<ovpose::Detecter*>(d)->ExtractROIs(rgbdata, img_width, img_height, &detected); int ret = static_cast<ovpose::Detecter*>(d)->ExtractROIs(rgbdata, img_width, img_height, &detected);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
rois->length = detected.size(); rois->length = detected.size();
rois->items = (PoseROI*)malloc(rois->length * sizeof(PoseROI)); rois->items = (ObjectInfo*)malloc(rois->length * sizeof(ObjectInfo));
for (size_t i = 0; i < detected.size(); ++i) { for (size_t i = 0; i < detected.size(); ++i) {
rois->items[i] = detected[i]; ov::ObjectInfo o = detected[i];
rois->items[i] = ObjectInfo{
o.rect,
o.prob,
o.label,
NULL
};
} }
return 0; return 0;
} }
int extract_pose_keypoints(IPoseDetecter d, const PoseROI* roi, PoseKeypointVector* keypoints) { int extract_pose_keypoints(IPoseDetecter d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, KeypointVector* keypoints) {
std::vector<PoseKeypoint> points; std::vector<ov::Keypoint> points;
int ret = static_cast<ovpose::Detecter*>(d)->ExtractKeypoints(*roi, &points); int ret = static_cast<ovpose::Detecter*>(d)->ExtractKeypoints(rgbdata, img_width, img_height, *rect, &points);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
keypoints->length = points.size(); keypoints->length = points.size();
keypoints->points = (PoseKeypoint*)malloc(keypoints->length * sizeof(PoseKeypoint)); keypoints->points = (Keypoint*)malloc(keypoints->length * sizeof(Keypoint));
for (size_t i = 0; i < points.size(); ++i) { for (size_t i = 0; i < points.size(); ++i) {
keypoints->points[i] = points[i]; keypoints->points[i] = points[i];
} }

View File

@@ -10,8 +10,10 @@ public:
virtual ~Detecter(){}; virtual ~Detecter(){};
virtual int ExtractROIs(const unsigned char* rgbadata, virtual int ExtractROIs(const unsigned char* rgbadata,
int img_width, int img_height, int img_width, int img_height,
std::vector<PoseROI>* rois) = 0; std::vector<ov::ObjectInfo>* rois) = 0;
virtual int ExtractKeypoints(const PoseROI& roi, std::vector<PoseKeypoint>* keypoints) = 0; virtual int ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) = 0;
}; };
class DetecterFactory { class DetecterFactory {

View File

@@ -42,7 +42,7 @@ int Ultralight::LoadModel(const char * root_path) {
int Ultralight::ExtractROIs(const unsigned char* rgbdata, int Ultralight::ExtractROIs(const unsigned char* rgbdata,
int img_width, int img_height, int img_width, int img_height,
std::vector<PoseROI>* rois) { std::vector<ov::ObjectInfo>* rois) {
if (!initialized_) { if (!initialized_) {
return 10000; return 10000;
} }
@@ -52,8 +52,6 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata,
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
ncnn::Mat::PIXEL_RGB, img_width, img_height, 320, 320); ncnn::Mat::PIXEL_RGB, img_width, img_height, 320, 320);
//数据预处理 //数据预处理
const float mean_vals[3] = {0.f, 0.f, 0.f};
const float norm_vals[3] = {1/255.f, 1/255.f, 1/255.f};
in.substract_mean_normalize(mean_vals, norm_vals); in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = roi_net_->create_extractor(); ncnn::Extractor ex = roi_net_->create_extractor();
@@ -98,32 +96,32 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata,
if(y2>img_height) y2=img_height; if(y2>img_height) y2=img_height;
//截取人体ROI //截取人体ROI
//printf("x1:%f y1:%f x2:%f y2:%f\n",x1,y1,x2,y2); //printf("x1:%f y1:%f x2:%f y2:%f\n",x1,y1,x2,y2);
Rect rect = ov::Rect(x1, y1, x2-x1, y2-y1); ov::Rect rect = ov::Rect(x1, y1, x2-x1, y2-y1);
size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char); ov::ObjectInfo roi;
PoseROI roi;
roi.rect = rect; roi.rect = rect;
roi.score = score; roi.prob = score;
roi.data = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
for(size_t i = 0; i < rect.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3;
unsigned char* dstCursor = roi.data + i * rect.width * 3;
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
}
rois->push_back(roi); rois->push_back(roi);
} }
return 0; return 0;
} }
int Ultralight::ExtractKeypoints(const PoseROI& roi, std::vector<PoseKeypoint>* keypoints) { int Ultralight::ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) {
keypoints->clear(); keypoints->clear();
int w = roi.rect.width; int w = rect.width;
int h = roi.rect.height; int h = rect.height;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(roi.data, ncnn::Mat::PIXEL_RGB, w, h, 192, 256); size_t total_size = w * h * 3 * sizeof(unsigned char);
unsigned char* data = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
for(size_t i = 0; i < h; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3;
unsigned char* dstCursor = data + i * w * 3;
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * w);
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, w, h, 192, 256);
//数据预处理 //数据预处理
const float mean_vals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f}; in.substract_mean_normalize(meanVals, normVals);
const float norm_vals[3] = {1 / 0.229f / 255.f, 1 / 0.224f / 255.f, 1 / 0.225f / 255.f};
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = pose_net_->create_extractor(); ncnn::Extractor ex = pose_net_->create_extractor();
ex.set_num_threads(4); ex.set_num_threads(4);
@@ -152,11 +150,13 @@ int Ultralight::ExtractKeypoints(const PoseROI& roi, std::vector<PoseKeypoint>*
} }
} }
PoseKeypoint keypoint; ov::Keypoint keypoint;
keypoint.p = Point2f(max_x * w / (float)out.w+roi.rect.x, max_y * h / (float)out.h+roi.rect.y); keypoint.p = ov::Point2f(max_x * w / (float)out.w+rect.x, max_y * h / (float)out.h+rect.y);
keypoint.prob = max_prob; keypoint.prob = max_prob;
keypoints->push_back(keypoint); keypoints->push_back(keypoint);
} }
free(data);
return 0; return 0;
} }

View File

@@ -15,14 +15,20 @@ public:
int LoadModel(const char* root_path); int LoadModel(const char* root_path);
int ExtractROIs(const unsigned char* rgbadata, int ExtractROIs(const unsigned char* rgbadata,
int img_width, int img_height, int img_width, int img_height,
std::vector<PoseROI>* rois); std::vector<ov::ObjectInfo>* rois);
int ExtractKeypoints(const PoseROI& roi, int ExtractKeypoints(const unsigned char* rgbdata,
std::vector<PoseKeypoint>* keypoints); int img_width, int img_height,
const ov::Rect& rect,
std::vector<ov::Keypoint>* keypoints);
private: private:
ncnn::Net* roi_net_; ncnn::Net* roi_net_;
ncnn::Net* pose_net_; ncnn::Net* pose_net_;
bool initialized_; bool initialized_;
const float mean_vals[3] = {0.f, 0.f, 0.f};
const float norm_vals[3] = {1/255.f, 1/255.f, 1/255.f};
const float meanVals[3] = {0.485f * 255.f, 0.456f * 255.f, 0.406f * 255.f};
const float normVals[3] = {1 / 0.229f / 255.f, 1 / 0.224f / 255.f, 1 / 0.225f / 255.f};
}; };
} }