feat(face): add scrfd face detecter/landmarker

This commit is contained in:
Syd Xu
2021-10-30 20:29:00 +08:00
parent 50e43fc864
commit 9413dd6a0e
82 changed files with 1101 additions and 661 deletions

View File

@@ -22,12 +22,14 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
- centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing)
- retainface [Google Drive](https://drive.google.com/drive/folders/1nxR3WFqqEWLwGVsp5c4tI0_iVVEaVOe8?usp=sharing)
- yoloface [Google Drive](https://drive.google.com/drive/folders/1EM9H6-aYXKsWTRxx_wbKDyYHVIYpU6f7?usp=sharing)
- scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing)
- anticonv (for mask detection) [Google Drive](https://drive.google.com/drive/folders/1Fje0fmVPy5g0_oaxUbH_cAedkgjBf7QW?usp=sharing)
- recognizer (face feature extration for classification)
- mobilenet [Google Drive](https://drive.google.com/drive/folders/1fRLs10atm_vwDWQXZ-GJbKQpypNcXLAx?usp=sharing)
- landmarker (for face landmarkers extraction)
- insightface [Google Drive](https://drive.google.com/drive/folders/1e_nRwneMEDf_sXEMZCmOk0S4VT0_XpOS?usp=sharing)
- zq [Google Drive](https://drive.google.com/drive/folders/1ax0J1TVhf2S-B3V6lnqwJaaHUK433sPm?usp=sharing)
- scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing)
- tracker (for face IOU calculation bettween frames)
- hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing)
- pose
@@ -40,3 +42,14 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
- pose (for hand pose estimation)
- handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
- golang binding (github.com/bububa/openvision/go)
## Reference
- [MirrorYuChen/ncnn_example](https://github.com/MirrorYuChen/ncnn_example)
- [nihui/ncnn-android-nanodet](https://github.com/nihui/ncnn-android-nanodet)
- [FeiGeChuanShu/ncnn_Android_face](https://github.com/FeiGeChuanShu/ncnn_Android_face)
- [FeiGeChuanShu/ncnn_nanodet_hand](https://github.com/FeiGeChuanShu/ncnn_nanodet_hand)
- [docongminh/deep-head-pose-ncnn](https://github.com/docongminh/deep-head-pose-ncnn)
- [nilseuropa/hopenet_ncnn](https://github.com/nilseuropa/hopenet_ncnn)
- [dog-qiuqiu/Ultralight-SimplePose](https://github.com/dog-qiuqiu/Ultralight-SimplePose)
- [GHLab/deep-head-pose-lite-ncnn](https://github.com/GHLab/deep-head-pose-lite-ncnn)

41
go/common/estimator.go Normal file
View File

@@ -0,0 +1,41 @@
package common
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
*/
import "C"
import (
"unsafe"
openvision "github.com/bububa/openvision/go"
)
// Estimator represents Estimator interface
type Estimator interface {
LoadModel(modelPath string) error
Destroy()
Pointer() unsafe.Pointer
}
// SetEstimatorThreads set ncnn net opt.num_threads
func SetEstimatorThreads(e Estimator, n int) {
C.set_num_threads((C.IEstimator)(e.Pointer()), C.int(n))
}
// DestroyEstimator destory an Estimator
func DestroyEstimator(e Estimator) {
C.destroy_estimator((C.IEstimator)(e.Pointer()))
}
// EstimatorLoadModel load detecter model
func EstimatorLoadModel(e Estimator, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(e.Pointer()), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}

View File

@@ -7,16 +7,29 @@ package common
*/
import "C"
// GetGPUCount get gpu number
func GetGPUCount() int {
count := C.get_gpu_count()
return int(count)
}
// CreateGPUInstance create gpu instance
func CreateGPUInstance() int {
i := C.create_gpu_instance()
return int(i)
}
// DestroyGPUInstance destory gpu instance
func DestroyGPUInstance() {
C.destroy_gpu_instance()
}
// GetBigCPUCount get cpu number
func GetBigCPUCount() int {
return int(C.get_big_cpu_count())
}
// SetOMPThreads set omp thread number
func SetOMPThreads(n int) {
C.set_omp_num_threads(C.int(n))
}

View File

@@ -22,14 +22,14 @@ type Keypoint struct {
func GoKeypoint(c *C.Keypoint, w float64, h float64) Keypoint {
return Keypoint{
Point: Pt(float64(c.p.x)/w, float64(c.p.y)/h),
Score: float32(c.prob),
Score: float32(c.score),
}
}
// Convert Keypoint to C.Keypoint pointer
func (k Keypoint) CKeypoint(w float64, h float64) *C.Keypoint {
ret := (*C.Keypoint)(C.malloc(C.sizeof_Keypoint))
ret.prob = C.float(k.Score)
ret.score = C.float(k.Score)
ret.p = C.Point2f{
C.float(k.Point.X * w),
C.float(k.Point.Y * h),

View File

@@ -26,7 +26,7 @@ type ObjectInfo struct {
func GoObjectInfo(c *C.ObjectInfo, w float64, h float64) ObjectInfo {
ret := ObjectInfo{
Label: int(c.label),
Score: float32(c.prob),
Score: float32(c.score),
Rect: Rect(
float64(c.rect.x)/w,
float64(c.rect.y)/h,
@@ -44,7 +44,7 @@ func GoObjectInfo(c *C.ObjectInfo, w float64, h float64) ObjectInfo {
func (o ObjectInfo) ToCObjectInfo(w float64, h float64) *C.ObjectInfo {
ret := (*C.ObjectInfo)(C.malloc(C.sizeof_ObjectInfo))
ret.label = C.int(o.Label)
ret.prob = C.float(o.Score)
ret.score = C.float(o.Score)
ret.rect.x = C.int(o.Rect.X * w)
ret.rect.y = C.int(o.Rect.Y * h)
ret.rect.width = C.int(o.Rect.Width * w)

View File

@@ -23,24 +23,30 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
test_detect(imgPath, modelPath)
test_mask(imgPath, modelPath)
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
test_detect(imgPath, modelPath, cpuCores)
test_mask(imgPath, modelPath, cpuCores)
}
func test_detect(imgPath string, modelPath string) {
func test_detect(imgPath string, modelPath string, threads int) {
for idx, d := range []detecter.Detecter{
retinaface(modelPath),
centerface(modelPath),
mtcnn(modelPath),
yoloface(modelPath),
scrfd(modelPath),
} {
common.SetEstimatorThreads(d, threads)
detect(d, imgPath, idx, "4.jpg", false)
d.Destroy()
}
}
func test_mask(imgPath string, modelPath string) {
func test_mask(imgPath string, modelPath string, threads int) {
d := anticonv(modelPath)
common.SetEstimatorThreads(d, threads)
defer d.Destroy()
detect(d, imgPath, 0, "mask3.jpg", true)
}
@@ -72,6 +78,15 @@ func yoloface(modelPath string) detecter.Detecter {
return d
}
func scrfd(modelPath string) detecter.Detecter {
modelPath = filepath.Join(modelPath, "scrfd/scrfd1g")
d := detecter.NewScrfd()
if err := d.LoadModel(modelPath); err != nil {
log.Fatalln(err)
}
return d
}
func centerface(modelPath string) detecter.Detecter {
modelPath = filepath.Join(modelPath, "centerface")
d := detecter.NewCenterface()
@@ -96,7 +111,7 @@ func detect(d detecter.Detecter, imgPath string, idx int, filename string, mask
if err != nil {
log.Fatalln("load image failed,", err)
}
faces, err := d.DetectFace(common.NewImage(img))
faces, err := d.Detect(common.NewImage(img))
if err != nil {
log.Fatalln(err)
}

View File

@@ -24,13 +24,19 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
estimator := handpose(modelPath)
defer estimator.Destroy()
common.SetEstimatorThreads(estimator, cpuCores)
for idx, d := range []detecter.Detecter{
yolox(modelPath), nanodet(modelPath),
yolox(modelPath),
nanodet(modelPath),
} {
defer d.Destroy()
detect(d, estimator, imgPath, "hand2.jpg", idx)
common.SetEstimatorThreads(d, cpuCores)
detect(d, estimator, imgPath, "hand1.jpg", idx)
}
}

View File

@@ -24,10 +24,15 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
d := retinaface(modelPath)
defer d.Destroy()
common.SetEstimatorThreads(d, cpuCores)
h := processer(modelPath)
defer h.Destroy()
common.SetEstimatorThreads(h, cpuCores)
for _, fn := range []string{"robocop.jpg", "terminator.jpg"} {
process(d, h, imgPath, fn)
}
@@ -60,7 +65,7 @@ func process(d detecter.Detecter, h *hopenet.Hopenet, imgPath string, filename s
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgLoaded)
faces, err := d.DetectFace(img)
faces, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}

View File

@@ -2,6 +2,7 @@ package main
import (
"bytes"
"fmt"
"image"
"image/jpeg"
"log"
@@ -23,11 +24,21 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
d := retinaface(modelPath)
defer d.Destroy()
m := insightface(modelPath)
common.SetEstimatorThreads(d, cpuCores)
for idx, m := range []landmarker.Landmarker{
insightface(modelPath),
zq(modelPath),
scrfd(modelPath),
} {
defer m.Destroy()
extract_keypoints(d, m, imgPath, "4.jpg")
common.SetEstimatorThreads(m, cpuCores)
extract_keypoints(d, m, imgPath, "4.jpg", idx)
}
}
func retinaface(modelPath string) detecter.Detecter {
@@ -57,14 +68,23 @@ func zq(modelPath string) landmarker.Landmarker {
return d
}
func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath string, filename string) {
func scrfd(modelPath string) landmarker.Landmarker {
modelPath = filepath.Join(modelPath, "scrfd/landmarker")
d := landmarker.NewScrfd()
if err := d.LoadModel(modelPath); err != nil {
log.Fatalln(err)
}
return d
}
func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath string, filename string, idx int) {
inPath := filepath.Join(imgPath, filename)
imgLoaded, err := loadImage(inPath)
if err != nil {
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgLoaded)
faces, err := d.DetectFace(img)
faces, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}
@@ -83,8 +103,7 @@ func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath str
keypoints = append(keypoints, points...)
}
out := drawer.DrawLandmark(imgLoaded, keypoints)
outPath := filepath.Join(imgPath, "./results", filename)
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("landmarker-%d-%s", idx, filename))
if err := saveImage(out, outPath); err != nil {
log.Fatalln(err)
}

View File

@@ -23,8 +23,12 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
d := ultralightDetector(modelPath)
defer d.Destroy()
common.SetEstimatorThreads(d, cpuCores)
detect(d, imgPath, "ultralight-pose3.jpg")
}

View File

@@ -22,10 +22,15 @@ func main() {
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
cpuCores := common.GetBigCPUCount()
common.SetOMPThreads(cpuCores)
log.Printf("CPU big cores:%d\n", cpuCores)
d := retinaface(modelPath)
defer d.Destroy()
common.SetEstimatorThreads(d, cpuCores)
m := mobilefacenet(modelPath)
defer m.Destroy()
common.SetEstimatorThreads(m, cpuCores)
extract_features(d, m, imgPath, "4.jpg")
}
@@ -54,7 +59,7 @@ func extract_features(d detecter.Detecter, r recognizer.Recognizer, imgPath stri
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgLoaded)
faces, err := d.DetectFace(img)
faces, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}

View File

@@ -48,7 +48,7 @@ func track(d detecter.Detecter, t *tracker.Tracker, imgPath string, filename str
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgLoaded)
faces, err := d.DetectFace(img)
faces, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
@@ -25,20 +27,20 @@ func NewAnticonv() *Anticonv {
// Destroy free detecter
func (d *Anticonv) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IDetecter
func (d *Anticonv) Handler() C.IFaceDetecter {
return d.d
// Pointer implement Estimator interface
func (d *Anticonv) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for detecter
func (d *Anticonv) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// DetectFace implement Detecter interface
func (d *Anticonv) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
// Detect implement Detecter interface
func (d *Anticonv) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
@@ -25,20 +27,20 @@ func NewCenterface() *Centerface {
// Destroy free detecter
func (d *Centerface) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IDetecter
func (d *Centerface) Handler() C.IFaceDetecter {
return d.d
// Pointer implement Estimator interface
func (d *Centerface) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for detecter
func (d *Centerface) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// DetectFace implement Detecter interface
func (d *Centerface) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
// Detect implement Detecter interface
func (d *Centerface) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

View File

@@ -17,36 +17,18 @@ import (
// Detecter represents deteter interface
type Detecter interface {
Handler() C.IFaceDetecter
LoadModel(modelPath string) error
DetectFace(img *common.Image) ([]face.FaceInfo, error)
Destroy()
common.Estimator
Detect(img *common.Image) ([]face.FaceInfo, error)
}
// LoadModel load detecter model
func LoadModel(d Detecter, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a detecter
func Destroy(d Detecter) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler())))
}
// DetectFace detect face useing detecter
func DetectFace(d Detecter, img *common.Image) ([]face.FaceInfo, error) {
// Detect detect face useing detecter
func Detect(d Detecter, img *common.Image) ([]face.FaceInfo, error) {
imgWidth := img.WidthF64()
imgHeight := img.HeightF64()
data := img.Bytes()
CFaces := face.NewCFaceInfoVector()
defer face.FreeCFaceInfoVector(CFaces)
errCode := C.detect_face(d.Handler(), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.FaceInfoVector)(unsafe.Pointer(CFaces)))
errCode := C.detect_face((C.IFaceDetecter)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.FaceInfoVector)(unsafe.Pointer(CFaces)))
if errCode != 0 {
return nil, openvision.DetectFaceError(int(errCode))
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
@@ -25,20 +27,20 @@ func NewMtcnn() *Mtcnn {
// Destroy free detecter
func (d *Mtcnn) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IFaceDetecter
func (d *Mtcnn) Handler() C.IFaceDetecter {
return d.d
// Pointer implement Estimator interface
func (d *Mtcnn) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Detecter interface
func (d *Mtcnn) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// DetectFace implement Detecter interface
func (d *Mtcnn) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
// Detect implement Detecter interface
func (d *Mtcnn) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
@@ -25,19 +27,20 @@ func NewRetinaFace() *RetinaFace {
// Destroy free detecter
func (d *RetinaFace) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IFaceDetecter
func (d *RetinaFace) Handler() C.IFaceDetecter {
return d.d
// Pointer implement Estimator interface
func (d *RetinaFace) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Detecter interface
func (d *RetinaFace) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
func (d *RetinaFace) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
// Detect implement Detecter interface
func (d *RetinaFace) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

46
go/face/detecter/scrfd.go Normal file
View File

@@ -0,0 +1,46 @@
package detecter
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/face/detecter.h"
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
// Scrfd represents scrfd detecter
type Scrfd struct {
d C.IFaceDetecter
}
// NewScrfd returns a new Scrfd
func NewScrfd() *Scrfd {
return &Scrfd{
d: C.new_scrfd(),
}
}
// Destroy free detecter
func (d *Scrfd) Destroy() {
common.DestroyEstimator(d)
}
// LoadModel implement Detecter interface
func (d *Scrfd) LoadModel(modelPath string) error {
return common.EstimatorLoadModel(d, modelPath)
}
// Pointer implement Estimator interface
func (d *Scrfd) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// Detect implement Detecter interface
func (d *Scrfd) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face"
)
@@ -25,20 +27,20 @@ func NewYoloFace() *YoloFace {
// Destroy free detecter
func (d *YoloFace) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IFaceDetecter
func (d *YoloFace) Handler() C.IFaceDetecter {
return d.d
// Pointer implement Estimator interface
func (d *YoloFace) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Detecter interface
func (d *YoloFace) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// DetectFace implement Detecter interface
func (d *YoloFace) DetectFace(img *common.Image) ([]face.FaceInfo, error) {
return DetectFace(d, img)
// Detect implement Detecter interface
func (d *YoloFace) Detect(img *common.Image) ([]face.FaceInfo, error) {
return Detect(d, img)
}

View File

@@ -27,13 +27,13 @@ type FaceInfo struct {
// GoFaceInfo convert c FaceInfo to go type
func GoFaceInfo(cInfo *C.FaceInfo, w float64, h float64) FaceInfo {
info := FaceInfo{
Score: float32(cInfo.score_),
Score: float32(cInfo.score),
Mask: bool(cInfo.mask_),
Rect: common.Rect(
float64(cInfo.location_.x)/w,
float64(cInfo.location_.y)/h,
float64(cInfo.location_.width)/w,
float64(cInfo.location_.height)/h,
float64(cInfo.rect.x)/w,
float64(cInfo.rect.y)/h,
float64(cInfo.rect.width)/w,
float64(cInfo.rect.height)/h,
),
}
for i := 0; i < 5; i++ {
@@ -48,9 +48,9 @@ func GoFaceInfo(cInfo *C.FaceInfo, w float64, h float64) FaceInfo {
// CFaceInfo convert FaceInfo to C.FaceInfo
func (f FaceInfo) CFaceInfo(w float64, h float64) *C.FaceInfo {
ret := (*C.FaceInfo)(C.malloc(C.sizeof_FaceInfo))
ret.score_ = C.float(f.Score)
ret.score = C.float(f.Score)
ret.mask_ = C.bool(f.Mask)
ret.location_ = C.Rect{
ret.rect = C.Rect{
C.int(f.Rect.X * w),
C.int(f.Rect.Y * h),
C.int(f.Rect.Width * w),

View File

@@ -26,20 +26,20 @@ func NewHopenet() *Hopenet {
}
}
// Pointer implement Estimator interface
func (h *Hopenet) Pointer() unsafe.Pointer {
return unsafe.Pointer(h.d)
}
// LoadModel load detecter model
func (h *Hopenet) LoadModel(modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(h.d)), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return common.EstimatorLoadModel(h, modelPath)
return nil
}
// Destroy destroy C.IHopeNet
func (h *Hopenet) Destroy() {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(h.d)))
common.DestroyEstimator(h)
}
// Detect head pose
@@ -51,7 +51,7 @@ func (h *Hopenet) Detect(img *common.Image, faceRect common.Rectangle) (face.Hea
CHeadPose := face.NewCHeadPose()
defer C.free(unsafe.Pointer(CHeadPose))
errCode := C.hopenet_detect(
h.d,
(C.IHopenet)(h.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(CRect)),

View File

@@ -6,7 +6,11 @@ package landmarker
#include "openvision/face/landmarker.h"
*/
import "C"
import "github.com/bububa/openvision/go/common"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// Insightface represents Insightface landmarker
type Insightface struct {
@@ -20,19 +24,19 @@ func NewInsightface() *Insightface {
}
}
// Handler returns C.ILandmarker
func (d *Insightface) Handler() C.IFaceLandmarker {
return d.d
// Pointer implement Estimator interface
func (d *Insightface) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Landmarker interface
func (d *Insightface) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Destroy implement Landmarker interface
func (d *Insightface) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// ExtractKeypoints implement Landmarker interface

View File

@@ -16,26 +16,8 @@ import (
// Landmarker represents landmarker interface
type Landmarker interface {
Handler() C.IFaceLandmarker
LoadModel(modelPath string) error
common.Estimator
ExtractKeypoints(img *common.Image, face common.Rectangle) ([]common.Point, error)
Destroy()
}
// LoadModel load landmarker model
func LoadModel(d Landmarker, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a landmarker
func Destroy(d Landmarker) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler())))
}
// ExtractKeypoints extract keypoints using landmarker
@@ -47,7 +29,7 @@ func ExtractKeypoints(d Landmarker, img *common.Image, faceRect common.Rectangle
defer common.FreeCPoint2fVector(CPoints)
CRect := faceRect.CRect(imgWidth, imgHeight)
errCode := C.extract_face_keypoints(
d.Handler(),
(C.IFaceLandmarker)(d.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(CRect)),

View File

@@ -0,0 +1,45 @@
package landmarker
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/face/landmarker.h"
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// Scrfd represents Scrfd landmarker
type Scrfd struct {
d C.IFaceLandmarker
}
// NewScrfd returns a new Scrfd landmarker
func NewScrfd() *Scrfd {
return &Scrfd{
d: C.new_scrfd_landmarker(),
}
}
// Pointer implement Estimator interface
func (d *Scrfd) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Landmarker interface
func (d *Scrfd) LoadModel(modelPath string) error {
return common.EstimatorLoadModel(d, modelPath)
}
// Destroy implement Landmarker interface
func (d *Scrfd) Destroy() {
common.DestroyEstimator(d)
}
// ExtractKeypoints implement Landmarker interface
func (d *Scrfd) ExtractKeypoints(img *common.Image, faceRect common.Rectangle) ([]common.Point, error) {
return ExtractKeypoints(d, img, faceRect)
}

View File

@@ -6,7 +6,11 @@ package landmarker
#include "openvision/face/landmarker.h"
*/
import "C"
import "github.com/bububa/openvision/go/common"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// Zq represents Zq landmarker
type Zq struct {
@@ -20,19 +24,19 @@ func NewZq() *Zq {
}
}
// Handler returns C.ILandmarker
func (d *Zq) Handler() C.IFaceLandmarker {
return d.d
// Pointer implement Estimator interface
func (d *Zq) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Landmarker interface
func (d *Zq) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Destroy implement Landmarker interface
func (d *Zq) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// ExtractKeypoints implement Landmarker interface

View File

@@ -6,7 +6,11 @@ package recognizer
#include "openvision/face/recognizer.h"
*/
import "C"
import "github.com/bububa/openvision/go/common"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
// Mobilefacenet represents Mobilefacenet recognizer
type Mobilefacenet struct {
@@ -20,19 +24,19 @@ func NewMobilefacenet() *Mobilefacenet {
}
}
// Handler returns C.IFaceRecognizer
func (d *Mobilefacenet) Handler() C.IFaceRecognizer {
return d.d
// Pointer implement Estimator interface
func (d *Mobilefacenet) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel implement Recognizer interface
func (d *Mobilefacenet) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Destroy implement Recognizer interface
func (d *Mobilefacenet) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// ExtractFeatures implement Recognizer interface

View File

@@ -16,26 +16,8 @@ import (
// Recognizer represents Recognizer interface
type Recognizer interface {
Handler() C.IFaceRecognizer
LoadModel(modelPath string) error
common.Estimator
ExtractFeatures(img *common.Image, face common.Rectangle) ([]float64, error)
Destroy()
}
// LoadModel load recognizer model
func LoadModel(r Recognizer, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(r.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a recognizer
func Destroy(r Recognizer) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(r.Handler())))
}
// ExtractFeatures extract features using recognizer
@@ -47,7 +29,7 @@ func ExtractFeatures(r Recognizer, img *common.Image, faceRect common.Rectangle)
defer common.FreeCFloatVector(CFeatures)
CRect := faceRect.CRect(imgWidth, imgHeight)
errCode := C.extract_feature(
r.Handler(),
(C.IFaceRecognizer)(r.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(CRect)),

View File

@@ -16,26 +16,8 @@ import (
// Detecter represents deteter interface
type Detecter interface {
Handler() C.IHandDetecter
LoadModel(modelPath string) error
common.Estimator
Detect(img *common.Image) ([]common.ObjectInfo, error)
Destroy()
}
// LoadModel load detecter model
func LoadModel(d Detecter, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a detecter
func Destroy(d Detecter) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler())))
}
// Detect detect hand ROI
@@ -46,7 +28,7 @@ func Detect(d Detecter, img *common.Image) ([]common.ObjectInfo, error) {
cObjs := common.NewCObjectInfoVector()
defer common.FreeCObjectInfoVector(cObjs)
errCode := C.extract_hand_rois(
d.Handler(),
(C.IHandDetecter)(d.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth),
C.int(imgHeight),

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
@@ -24,17 +26,17 @@ func NewNanodet() *Nanodet {
// Destroy free detecter
func (d *Nanodet) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IHandDetecter
func (d *Nanodet) Handler() C.IHandDetecter {
return d.d
// Pointer implement Estimator interface
func (d *Nanodet) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for detecter
func (d *Nanodet) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Detect implement Detecter interface

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
@@ -24,17 +26,17 @@ func NewYolox() *Yolox {
// Destroy free detecter
func (d *Yolox) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IHandDetecter
func (d *Yolox) Handler() C.IHandDetecter {
return d.d
// Pointer implenment Estimator interface
func (d *Yolox) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for detecter
func (d *Yolox) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Detect implement Detecter interface

View File

@@ -16,26 +16,8 @@ import (
// Estimator represents estimator interface
type Estimator interface {
Handler() C.IHandPoseEstimator
LoadModel(modelPath string) error
common.Estimator
Detect(img *common.Image, rect common.Rectangle) ([]common.Point, error)
Destroy()
}
// LoadModel load detecter model
func LoadModel(d Estimator, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a estimator
func Destroy(d Estimator) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler())))
}
// Detect detect hand pose
@@ -47,7 +29,7 @@ func Detect(d Estimator, img *common.Image, rect common.Rectangle) ([]common.Poi
defer common.FreeCPoint2fVector(CPoints)
CRect := rect.CRect(imgWidth, imgHeight)
errCode := C.hand_pose(
d.Handler(),
(C.IHandPoseEstimator)(d.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(CRect)),

View File

@@ -7,6 +7,8 @@ package pose
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
@@ -24,17 +26,17 @@ func NewHandPoseEstimator() *HandPoseEstimator {
// Destroy free Estimator
func (d *HandPoseEstimator) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IHandPoseEstimator
func (d *HandPoseEstimator) Handler() C.IHandPoseEstimator {
return d.d
// Pointer implement Estimator interface
func (d *HandPoseEstimator) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for estimator
func (d *HandPoseEstimator) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// Detect implement Estimator interface

View File

@@ -16,26 +16,8 @@ import (
// Detecter represents deteter interface
type Detecter interface {
Handler() C.IPoseDetecter
LoadModel(modelPath string) error
common.Estimator
ExtractKeypoints(img *common.Image) ([]common.ObjectInfo, error)
Destroy()
}
// LoadModel load detecter model
func LoadModel(d Detecter, modelPath string) error {
cpath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cpath))
retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath)
if retCode != 0 {
return openvision.LoadModelError(int(retCode))
}
return nil
}
// Destroy a detecter
func Destroy(d Detecter) {
C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler())))
}
// ExtractKeypoints detect pose keypoints using detecter
@@ -46,7 +28,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error
cObjs := common.NewCObjectInfoVector()
defer common.FreeCObjectInfoVector(cObjs)
errCode := C.extract_pose_rois(
d.Handler(),
(C.IPoseDetecter)(d.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth),
C.int(imgHeight),
@@ -62,7 +44,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error
defer common.FreeCKeypointVector(cKeypoints)
cROI := (*C.ObjectInfo)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_ObjectInfo*C.int(i))))
errCode := C.extract_pose_keypoints(
d.Handler(),
(C.IPoseDetecter)(d.Pointer()),
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth),
C.int(imgHeight),
@@ -80,7 +62,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error
float64(cROI.rect.width)/imgWidth,
float64(cROI.rect.height)/imgHeight,
),
Score: float32(cROI.prob),
Score: float32(cROI.score),
})
}

View File

@@ -7,6 +7,8 @@ package detecter
*/
import "C"
import (
"unsafe"
"github.com/bububa/openvision/go/common"
)
@@ -24,17 +26,17 @@ func NewUltralight() *Ultralight {
// Destroy free detecter
func (d *Ultralight) Destroy() {
Destroy(d)
common.DestroyEstimator(d)
}
// Handler returns C.IPoseDetecter
func (d *Ultralight) Handler() C.IPoseDetecter {
return d.d
// Pointer implement Estimator interface
func (d *Ultralight) Pointer() unsafe.Pointer {
return unsafe.Pointer(d.d)
}
// LoadModel load model for detecter
func (d *Ultralight) LoadModel(modelPath string) error {
return LoadModel(d, modelPath)
return common.EstimatorLoadModel(d, modelPath)
}
// ExtractKeypoints implement Detecter interface

View File

@@ -1,8 +1,8 @@
#include "common.h"
#include <algorithm>
#include <iostream>
#include <math.h>
#include <float.h>
#include "cpu.h"
#ifdef OV_VULKAN
#include "gpu.h"
@@ -28,6 +28,16 @@ void destroy_gpu_instance() {
#endif // OV_VULKAN
}
int get_big_cpu_count() {
return ncnn::get_big_cpu_count();
}
void set_omp_num_threads(int n) {
#ifdef OV_OPENMP
ncnn::set_omp_num_threads(n);
#endif
}
int load_model(IEstimator d, const char *root_path) {
return static_cast<ov::Estimator*>(d)->LoadModel(root_path);
}
@@ -36,6 +46,10 @@ void destroy_estimator(IEstimator d) {
delete static_cast<ov::Estimator*>(d);
}
void set_num_threads(IEstimator d, int n) {
static_cast<ov::Estimator*>(d)->set_num_threads(n);
}
void FreePoint2fVector(Point2fVector* p) {
if (p->points != NULL) {
free(p->points);
@@ -97,9 +111,44 @@ void FreeObjectInfoVector(ObjectInfoVector *p) {
}
namespace ov {
Estimator::Estimator() {
net_ = new ncnn::Net();
initialized_ = false;
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
Estimator::~Estimator() {
if (net_) {
net_->clear();
}
}
int Estimator::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
void Estimator::set_num_threads(int n) {
num_threads = n;
if (net_) {
net_->opt.num_threads = n;
}
}
int RatioAnchors(const Rect & anchor,
const std::vector<float>& ratios,
std::vector<Rect>* anchors) {
std::vector<Rect>* anchors, int threads_num) {
anchors->clear();
Point center = Point(anchor.x + (anchor.width - 1) * 0.5f,
anchor.y + (anchor.height - 1) * 0.5f);
@@ -123,7 +172,7 @@ int RatioAnchors(const Rect & anchor,
}
int ScaleAnchors(const std::vector<Rect>& ratio_anchors,
const std::vector<float>& scales, std::vector<Rect>* anchors) {
const std::vector<float>& scales, std::vector<Rect>* anchors, int threads_num) {
anchors->clear();
#if defined(_OPENMP)
#pragma omp parallel for num_threads(threads_num)
@@ -150,12 +199,13 @@ int ScaleAnchors(const std::vector<Rect>& ratio_anchors,
int GenerateAnchors(const int & base_size,
const std::vector<float>& ratios,
const std::vector<float> scales,
std::vector<Rect>* anchors) {
std::vector<Rect>* anchors,
int threads_num) {
anchors->clear();
Rect anchor = Rect(0, 0, base_size, base_size);
std::vector<Rect> ratio_anchors;
RatioAnchors(anchor, ratios, &ratio_anchors);
ScaleAnchors(ratio_anchors, scales, anchors);
RatioAnchors(anchor, ratios, &ratio_anchors, threads_num);
ScaleAnchors(ratio_anchors, scales, anchors, threads_num);
return 0;
}
@@ -207,14 +257,14 @@ void qsort_descent_inplace(std::vector<ObjectInfo>& objects, int left, int right
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].prob;
float p = objects[(left + right) / 2].score;
while (i <= j)
{
while (objects[i].prob > p)
while (objects[i].score > p)
i++;
while (objects[j].prob < p)
while (objects[j].score < p)
j--;
if (i <= j)
@@ -281,6 +331,44 @@ void nms_sorted_bboxes(const std::vector<ObjectInfo>& objects, std::vector<int>&
picked.push_back(i);
}
}
//
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
{
int num_ratio = ratios.w;
int num_scale = scales.w;
ncnn::Mat anchors;
anchors.create(4, num_ratio * num_scale);
const float cx = 0;
const float cy = 0;
for (int i = 0; i < num_ratio; i++)
{
float ar = ratios[i];
int r_w = round(base_size / sqrt(ar));
int r_h = round(r_w * ar); //round(base_size * sqrt(ar));
for (int j = 0; j < num_scale; j++)
{
float scale = scales[j];
float rs_w = r_w * scale;
float rs_h = r_h * scale;
float* anchor = anchors.row(i * num_scale + j);
anchor[0] = cx - rs_w * 0.5f;
anchor[1] = cy - rs_h * 0.5f;
anchor[2] = cx + rs_w * 0.5f;
anchor[3] = cy + rs_h * 0.5f;
}
}
return anchors;
}
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
{

View File

@@ -43,7 +43,7 @@ typedef struct Rect {
typedef struct Keypoint {
Point2f p;
float prob;
float score;
} Keypoint;
@@ -54,8 +54,11 @@ typedef void* IEstimator;
int get_gpu_count();
int create_gpu_instance();
void destroy_gpu_instance();
int get_big_cpu_count();
void set_omp_num_threads(int n);
int load_model(IEstimator e, const char* root_path);
void destroy_estimator(IEstimator e);
void set_num_threads(IEstimator e, int n);
typedef struct Point2fVector {
Point2f* points;
@@ -96,7 +99,7 @@ void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
typedef struct ObjectInfoC {
Rect rect;
float prob;
float score;
int label;
KeypointVector* pts;
} ObjectInfo;

View File

@@ -10,12 +10,17 @@
#endif
namespace ov {
const int threads_num = 2;
class Estimator {
public:
virtual ~Estimator(){};
virtual int LoadModel(const char* root_path) = 0;
Estimator();
virtual ~Estimator();
virtual int LoadModel(const char* root_path);
virtual void set_num_threads(int n);
protected:
int num_threads = 2;
ncnn::Net* net_;
bool initialized_ = false;
};
// Wrapper for an individual cv::cvSize
@@ -78,17 +83,17 @@ typedef struct Rect {
struct ImageInfo {
std::string label_;
float score_;
float score;
};
struct Keypoint {
ov::Point2f p;
float prob;
Point2f p;
float score;
};
struct ObjectInfo {
Rect rect;
float prob;
float score;
int label;
std::vector<Point2f> pts;
};
@@ -101,14 +106,15 @@ struct GridAndStride
};
int RatioAnchors(const Rect & anchor,
const std::vector<float>& ratios, std::vector<Rect>* anchors);
const std::vector<float>& ratios, std::vector<Rect>* anchors, int threads_num);
int ScaleAnchors(const std::vector<Rect>& ratio_anchors,
const std::vector<float>& scales, std::vector<Rect>* anchors);
const std::vector<float>& scales, std::vector<Rect>* anchors, int threads_num);
int GenerateAnchors(const int & base_size,
const std::vector<float>& ratios, const std::vector<float> scales,
std::vector<Rect>* anchors);
std::vector<Rect>* anchors,
int threads_num);
float InterRectArea(const Rect & a,
const Rect & b);
@@ -128,7 +134,7 @@ int const NMS(const std::vector<T>& inputs, std::vector<T>* result,
inputs_tmp.assign(inputs.begin(), inputs.end());
std::sort(inputs_tmp.begin(), inputs_tmp.end(),
[](const T& a, const T& b) {
return a.score_ > b.score_;
return a.score > b.score;
});
std::vector<int> indexes(inputs_tmp.size());
@@ -145,7 +151,7 @@ int const NMS(const std::vector<T>& inputs, std::vector<T>* result,
for (int i = 1; i < tmp_indexes.size(); i++) {
int tmp_i = tmp_indexes[i];
float iou = 0.0f;
ComputeIOU(inputs_tmp[good_idx].location_, inputs_tmp[tmp_i].location_, &iou, type);
ComputeIOU(inputs_tmp[good_idx].rect, inputs_tmp[tmp_i].rect, &iou, type);
if (iou <= threshold) {
indexes.push_back(tmp_i);
}
@@ -160,6 +166,9 @@ void qsort_descent_inplace(std::vector<ObjectInfo>& objects);
void nms_sorted_bboxes(const std::vector<ObjectInfo>& objects, std::vector<int>& picked, float nms_threshold);
// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors()
ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales);
int generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides);
float sigmoid(float x);

View File

@@ -13,8 +13,8 @@ typedef ovface::TrackedFaceInfo TrackedFaceInfo;
typedef ovface::HeadPose HeadPose;
#else
typedef struct FaceInfo {
Rect location_;
float score_;
Rect rect;
float score;
float keypoints_[10];
bool mask_;
} FaceInfo;

View File

@@ -33,7 +33,86 @@ void FreeTrackedFaceInfoVector(TrackedFaceInfoVector *p) {
namespace ovface {
float CalculateSimilarity(const std::vector<float>&feature1, const std::vector<float>& feature2) {
void qsort_descent_inplace(std::vector<FaceInfo>& objects, int left, int right)
{
int i = left;
int j = right;
float p = objects[(left + right) / 2].score;
while (i <= j)
{
while (objects[i].score > p)
i++;
while (objects[j].score < p)
j--;
if (i <= j)
{
// swap
std::swap(objects[i], objects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(objects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(objects, i, right);
}
}
}
void qsort_descent_inplace(std::vector<FaceInfo>& objects)
{
if (objects.empty())
return;
qsort_descent_inplace(objects, 0, objects.size() - 1);
}
void nms_sorted_bboxes(const std::vector<FaceInfo>& objects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = objects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = objects[i].rect.area();
}
for (int i = 0; i < n; i++)
{
const FaceInfo& a = objects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const FaceInfo& b = objects[picked[j]];
// intersection over union
float inter_area = InterRectArea(a.rect, b.rect);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
float CalculateSimilarity(const std::vector<float>&feature1, const std::vector<float>& feature2, int threads_num) {
if (feature1.size() != feature2.size()) {
return 10003;
}
@@ -41,7 +120,7 @@ float CalculateSimilarity(const std::vector<float>&feature1, const std::vector<f
float feature_norm1 = 0.0f;
float feature_norm2 = 0.0f;
#ifdef OV_OPENMP
#pragma omp parallel for num_threads(ov::threads_num)
#pragma omp parallel for num_threads(threads_num)
#endif
for(int i = 0; i < kFaceFeatureDim; ++i) {
inner_product += feature1[i] * feature2[i];

View File

@@ -8,8 +8,8 @@ namespace ovface {
#define kFaceFeatureDim 128
#define kFaceNameDim 256
struct FaceInfo {
ov::Rect location_;
float score_;
ov::Rect rect;
float score;
float keypoints_[10];
bool mask_;
};
@@ -25,7 +25,15 @@ struct HeadPose
float pitch;
float yaw;
};
}
void qsort_descent_inplace(std::vector<FaceInfo>& objects, int left, int right);
void qsort_descent_inplace(std::vector<FaceInfo>& objects);
void nms_sorted_bboxes(const std::vector<FaceInfo>& objects, std::vector<int>& picked, float nms_threshold);
float CalculateSimilarity(const std::vector<float>&feature1, const std::vector<float>& feature2);
}
#endif // !_FACE_COMMON_H_

View File

@@ -12,6 +12,7 @@ extern "C" {
IFaceDetecter new_centerface();
IFaceDetecter new_mtcnn();
IFaceDetecter new_yoloface();
IFaceDetecter new_scrfd();
IFaceDetecter new_anticonv();
int detect_face(IFaceDetecter d, const unsigned char* rgbdata, int img_width, int img_height, FaceInfoVector* faces);
#ifdef __cplusplus

View File

@@ -5,44 +5,26 @@
#endif // OV_VULKAN
namespace ovface {
AntiConv::AntiConv() :
anticonv_net_(new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
anticonv_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
AntiConv::~AntiConv() {
if (anticonv_net_) {
anticonv_net_->clear();
}
}
int AntiConv::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (anticonv_net_->load_param(param_file.c_str()) == -1 ||
anticonv_net_->load_model(bin_file.c_str()) == -1) {
return 10000;
int ret = Estimator::LoadModel(root_path);
if (ret != 0) {
return ret;
}
// generate anchors
for (int i = 0; i < 3; ++i) {
ANCHORS anchors;
if (0 == i) {
GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors, num_threads);
}
else if (1 == i) {
GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors, num_threads);
}
else {
GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors, num_threads);
}
anchors_generated_.push_back(anchors);
}
initialized_ = true;
return 0;
}
@@ -60,7 +42,7 @@ int AntiConv::DetectFace(const unsigned char* rgbdata,
float factor_x = static_cast<float>(img_width) / inputSize_.width;
float factor_y = static_cast<float>(img_height) / inputSize_.height;
ncnn::Extractor ex = anticonv_net_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
ex.input("data", in);
@@ -115,9 +97,9 @@ int AntiConv::DetectFace(const unsigned char* rgbdata,
FaceInfo face_info;
memset(&face_info, 0, sizeof(face_info));
face_info.score_ = score;
face_info.score = score;
face_info.mask_ = (prob > maskThreshold_);
face_info.location_ = curr_box;
face_info.rect = curr_box;
faces_tmp.push_back(face_info);
}
}

View File

@@ -8,17 +8,13 @@ namespace ovface {
using ANCHORS = std::vector<ov::Rect>;
class AntiConv : public Detecter {
public:
AntiConv();
~AntiConv();
int LoadModel(const char* root_path);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
ncnn::Net* anticonv_net_;
std::vector<ANCHORS> anchors_generated_;
bool initialized_;
const int RPNs_[3] = { 32, 16, 8 };
const Size inputSize_ = { 640, 640 };
const float iouThreshold_ = 0.4f;

View File

@@ -5,31 +5,6 @@
#endif // OV_VULKAN
namespace ovface {
CenterFace::CenterFace() {
centernet_ = new ncnn::Net();
initialized_ = false;
#ifdef OV_VULKAN
centernet_->opt.use_vulkan_compute = true;
#endif // MIRROR_VULKAN
}
CenterFace::~CenterFace(){
if (centernet_) {
centernet_->clear();
}
}
int CenterFace::LoadModel(const char* root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string model_file = std::string(root_path) + "/bin";
if (centernet_->load_param(param_file.c_str()) == -1 ||
centernet_->load_model(model_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
int CenterFace::DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
@@ -49,7 +24,7 @@ int CenterFace::DetectFace(const unsigned char* rgbdata,
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
img_width, img_height, img_width_new, img_height_new);
ncnn::Extractor ex = centernet_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ex.input("input.1", in);
ncnn::Mat mat_heatmap, mat_scale, mat_offset, mat_landmark;
ex.extract("537", mat_heatmap);
@@ -78,11 +53,11 @@ int CenterFace::DetectFace(const unsigned char* rgbdata,
float xmax = fminf(xmin + s1, img_width_new);
FaceInfo face_info;
face_info.score_ = score;
face_info.location_.x = scale_x * xmin;
face_info.location_.y = scale_y * ymin;
face_info.location_.width = scale_x * (xmax - xmin);
face_info.location_.height = scale_y * (ymax - ymin);
face_info.score = score;
face_info.rect.x = scale_x * xmin;
face_info.rect.y = scale_y * ymin;
face_info.rect.width = scale_x * (xmax - xmin);
face_info.rect.height = scale_y * (ymax - ymin);
for (int num = 0; num < 5; ++num) {
face_info.keypoints_[num ] = scale_x * (s1 * mat_landmark.channel(2 * num + 1)[index] + xmin);

View File

@@ -8,18 +8,13 @@
namespace ovface {
class CenterFace : public Detecter {
public:
CenterFace();
~CenterFace();
int LoadModel(const char* root_path);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
ncnn::Net* centernet_ = nullptr;
const float scoreThreshold_ = 0.5f;
const float nmsThreshold_ = 0.5f;
bool initialized_;
};
}

View File

@@ -4,6 +4,7 @@
#include "retinaface/retinaface.hpp"
#include "anticonv/anticonv.hpp"
#include "yoloface/yoloface.hpp"
#include "scrfd/scrfd.hpp"
IFaceDetecter new_retinaface() {
return new ovface::RetinaFace();
@@ -21,6 +22,10 @@ IFaceDetecter new_yoloface() {
return new ovface::YoloFace();
}
IFaceDetecter new_scrfd() {
return new ovface::Scrfd();
}
IFaceDetecter new_anticonv() {
return new ovface::AntiConv();
}
@@ -58,6 +63,10 @@ Detecter* YoloFaceFactory::CreateDetecter() {
return new YoloFace();
}
Detecter* ScrfdFactory::CreateDetecter() {
return new Scrfd();
}
Detecter* AnticonvFactory::CreateDetecter() {
return new AntiConv();
}

View File

@@ -7,7 +7,6 @@ namespace ovface {
// 抽象类
class Detecter: public ov::Estimator {
public:
virtual ~Detecter() {};
virtual int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces) = 0;
@@ -51,6 +50,13 @@ public:
Detecter* CreateDetecter();
};
class ScrfdFactory : public DetecterFactory {
public:
ScrfdFactory() {}
~ScrfdFactory() {}
Detecter* CreateDetecter();
};
class AnticonvFactory : public DetecterFactory {
public:
AnticonvFactory() {}

View File

@@ -32,6 +32,19 @@ Mtcnn::~Mtcnn() {
}
}
void Mtcnn::set_num_threads(int n) {
num_threads = n;
if (pnet_) {
pnet_->opt.num_threads = n;
}
if (rnet_) {
rnet_->opt.num_threads = n;
}
if (onet_) {
onet_->opt.num_threads = n;
}
}
int Mtcnn::LoadModel(const char * root_path) {
std::string pnet_param = std::string(root_path) + "/pnet.param";
std::string pnet_bin = std::string(root_path) + "/pnet.bin";
@@ -141,12 +154,12 @@ int Mtcnn::PDetect(const ncnn::Mat & img_in,
int bbox_height = y2 - y1 + 1;
FaceInfo face_info;
face_info.score_ = score;
face_info.location_.x = x1 + x1_reg * bbox_width;
face_info.location_.y = y1 + y1_reg * bbox_height;
face_info.location_.width = x2 + x2_reg * bbox_width - face_info.location_.x;
face_info.location_.height = y2 + y2_reg * bbox_height - face_info.location_.y;
face_info.location_ = face_info.location_ & Rect(0, 0, width, height);
face_info.score = score;
face_info.rect.x = x1 + x1_reg * bbox_width;
face_info.rect.y = y1 + y1_reg * bbox_height;
face_info.rect.width = x2 + x2_reg * bbox_width - face_info.rect.x;
face_info.rect.height = y2 + y2_reg * bbox_height - face_info.rect.y;
face_info.rect = face_info.rect & Rect(0, 0, width, height);
first_bboxes->push_back(face_info);
}
}
@@ -159,13 +172,13 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in,
std::vector<FaceInfo>* second_bboxes) {
second_bboxes->clear();
for (int i = 0; i < static_cast<int>(first_bboxes.size()); ++i) {
Rect face = first_bboxes.at(i).location_ & Rect(0, 0, img_in.w, img_in.h);
Rect face = first_bboxes.at(i).rect & Rect(0, 0, img_in.w, img_in.h);
ncnn::Mat img_face, img_resized;
ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x);
ncnn::resize_bilinear(img_face, img_resized, 24, 24);
ncnn::Extractor ex = rnet_->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(2);
// ex.set_num_threads(2);
ex.input("data", img_resized);
ncnn::Mat score_mat, location_mat;
ex.extract("prob1", score_mat);
@@ -178,13 +191,13 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in,
float h_reg = location_mat[3];
FaceInfo face_info;
face_info.score_ = score;
face_info.location_.x = face.x + x_reg * face.width;
face_info.location_.y = face.y + y_reg * face.height;
face_info.location_.width = face.x + face.width +
w_reg * face.width - face_info.location_.x;
face_info.location_.height = face.y + face.height +
h_reg * face.height - face_info.location_.y;
face_info.score = score;
face_info.rect.x = face.x + x_reg * face.width;
face_info.rect.y = face.y + y_reg * face.height;
face_info.rect.width = face.x + face.width +
w_reg * face.width - face_info.rect.x;
face_info.rect.height = face.y + face.height +
h_reg * face.height - face_info.rect.y;
second_bboxes->push_back(face_info);
}
return 0;
@@ -195,14 +208,14 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in,
std::vector<FaceInfo>* third_bboxes) {
third_bboxes->clear();
for (int i = 0; i < static_cast<int>(second_bboxes.size()); ++i) {
Rect face = second_bboxes.at(i).location_ & Rect(0, 0, img_in.w, img_in.h);
Rect face = second_bboxes.at(i).rect & Rect(0, 0, img_in.w, img_in.h);
ncnn::Mat img_face, img_resized;
ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x);
ncnn::resize_bilinear(img_face, img_resized, 48, 48);
ncnn::Extractor ex = onet_->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(2);
// ex.set_num_threads(2);
ex.input("data", img_resized);
ncnn::Mat score_mat, location_mat, keypoints_mat;
ex.extract("prob1", score_mat);
@@ -216,13 +229,13 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in,
float h_reg = location_mat[3];
FaceInfo face_info;
face_info.score_ = score;
face_info.location_.x = face.x + x_reg * face.width;
face_info.location_.y = face.y + y_reg * face.height;
face_info.location_.width = face.x + face.width +
w_reg * face.width - face_info.location_.x;
face_info.location_.height = face.y + face.height +
h_reg * face.height - face_info.location_.y;
face_info.score = score;
face_info.rect.x = face.x + x_reg * face.width;
face_info.rect.y = face.y + y_reg * face.height;
face_info.rect.width = face.x + face.width +
w_reg * face.width - face_info.rect.x;
face_info.rect.height = face.y + face.height +
h_reg * face.height - face_info.rect.y;
for (int num = 0; num < 5; num++) {
face_info.keypoints_[num] = face.x + face.width * keypoints_mat[num];
@@ -238,15 +251,15 @@ int Mtcnn::Refine(std::vector<FaceInfo>* bboxes, const Size max_size) {
int num_boxes = static_cast<int>(bboxes->size());
for (int i = 0; i < num_boxes; ++i) {
FaceInfo face_info = bboxes->at(i);
int width = face_info.location_.width;
int height = face_info.location_.height;
int width = face_info.rect.width;
int height = face_info.rect.height;
float max_side = fmaxf(width, height);
face_info.location_.x = face_info.location_.x + 0.5 * width - 0.5 * max_side;
face_info.location_.y = face_info.location_.y + 0.5 * height - 0.5 * max_side;
face_info.location_.width = max_side;
face_info.location_.height = max_side;
face_info.location_ = face_info.location_ & Rect(0, 0, max_size.width, max_size.height);
face_info.rect.x = face_info.rect.x + 0.5 * width - 0.5 * max_side;
face_info.rect.y = face_info.rect.y + 0.5 * height - 0.5 * max_side;
face_info.rect.width = max_side;
face_info.rect.height = max_side;
face_info.rect = face_info.rect & Rect(0, 0, max_size.width, max_size.height);
bboxes->at(i) = face_info;
}

View File

@@ -11,6 +11,7 @@ public:
Mtcnn();
~Mtcnn();
int LoadModel(const char* root_path);
void set_num_threads(int n);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);

View File

@@ -5,44 +5,26 @@
#endif // OV_VULKAN
namespace ovface {
RetinaFace::RetinaFace() :
retina_net_(new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
retina_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
RetinaFace::~RetinaFace() {
if (retina_net_) {
retina_net_->clear();
}
}
int RetinaFace::LoadModel(const char * root_path) {
std::string fd_param = std::string(root_path) + "/param";
std::string fd_bin = std::string(root_path) + "/bin";
if (retina_net_->load_param(fd_param.c_str()) == -1 ||
retina_net_->load_model(fd_bin.c_str()) == -1) {
return 10000;
int ret = Estimator::LoadModel(root_path);
if (ret != 0) {
return ret;
}
// generate anchors
for (int i = 0; i < 3; ++i) {
ANCHORS anchors;
if (0 == i) {
GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors, num_threads);
}
else if (1 == i) {
GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors, num_threads);
}
else {
GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors);
GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors, num_threads);
}
anchors_generated_.push_back(anchors);
}
initialized_ = true;
return 0;
}
@@ -60,7 +42,7 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata,
float factor_x = static_cast<float>(img_width) / inputSize_.width;
float factor_y = static_cast<float>(img_height) / inputSize_.height;
ncnn::Extractor ex = retina_net_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
ex.input("data", in);
@@ -131,8 +113,8 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata,
face_info.keypoints_[k + 5] = fminf(fmaxf(y * factor_y, 0.0f), img_height - 1);
}
face_info.score_ = score;
face_info.location_ = curr_box;
face_info.score = score;
face_info.rect = curr_box;
faces_tmp.push_back(face_info);
}
}

View File

@@ -5,20 +5,16 @@
#include "net.h"
namespace ovface {
using ANCHORS = std::vector<Rect>;
using ANCHORS = std::vector<ov::Rect>;
class RetinaFace : public Detecter {
public:
RetinaFace();
~RetinaFace();
int LoadModel(const char* root_path);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
ncnn::Net* retina_net_;
std::vector<ANCHORS> anchors_generated_;
bool initialized_;
const int RPNs_[3] = { 32, 16, 8 };
const Size inputSize_ = { 300, 300 };
const float iouThreshold_ = 0.4f;

View File

@@ -0,0 +1,262 @@
#include "scrfd.hpp"
#ifdef OV_VULKAN
#include "gpu.h"
#endif // OV_VULKAN
namespace ovface {
static void generate_scrfd_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& kps_blob, float prob_threshold, std::vector<FaceInfo>& faceobjects)
{
int w = score_blob.w;
int h = score_blob.h;
// generate face proposal from bbox deltas and shifted anchors
const int num_anchors = anchors.h;
for (int q = 0; q < num_anchors; q++)
{
const float* anchor = anchors.row(q);
const ncnn::Mat score = score_blob.channel(q);
const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
// shifted anchor
float anchor_y = anchor[1];
float anchor_w = anchor[2] - anchor[0];
float anchor_h = anchor[3] - anchor[1];
for (int i = 0; i < h; i++)
{
float anchor_x = anchor[0];
for (int j = 0; j < w; j++)
{
int index = i * w + j;
float prob = score[index];
if (prob >= prob_threshold)
{
// insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single()
float dx = bbox.channel(0)[index] * feat_stride;
float dy = bbox.channel(1)[index] * feat_stride;
float dw = bbox.channel(2)[index] * feat_stride;
float dh = bbox.channel(3)[index] * feat_stride;
// insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox()
float cx = anchor_x + anchor_w * 0.5f;
float cy = anchor_y + anchor_h * 0.5f;
float x0 = cx - dx;
float y0 = cy - dy;
float x1 = cx + dw;
float y1 = cy + dh;
FaceInfo obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0 + 1;
obj.rect.height = y1 - y0 + 1;
obj.score = prob;
if (!kps_blob.empty())
{
const ncnn::Mat kps = kps_blob.channel_range(q * 10, 10);
obj.keypoints_[0] = cx + kps.channel(0)[index] * feat_stride;
obj.keypoints_[5] = cy + kps.channel(1)[index] * feat_stride;
obj.keypoints_[1] = cx + kps.channel(2)[index] * feat_stride;
obj.keypoints_[6] = cy + kps.channel(3)[index] * feat_stride;
obj.keypoints_[2] = cx + kps.channel(4)[index] * feat_stride;
obj.keypoints_[7] = cy + kps.channel(5)[index] * feat_stride;
obj.keypoints_[3] = cx + kps.channel(6)[index] * feat_stride;
obj.keypoints_[8] = cy + kps.channel(7)[index] * feat_stride;
obj.keypoints_[4] = cx + kps.channel(8)[index] * feat_stride;
obj.keypoints_[9] = cy + kps.channel(9)[index] * feat_stride;
}
faceobjects.push_back(obj);
}
anchor_x += feat_stride;
}
anchor_y += feat_stride;
}
}
}
int Scrfd::DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces) {
faces->clear();
if (!initialized_) {
return 10000;
}
if (rgbdata == 0){
return 10001;
}
// pad to multiple of 32
int w = img_width;
int h = img_height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h);
// pad to target_size rectangle
float wpad = (float)(w + 31) / 32 * 32 - w;
float hpad = (float)(h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
const float norm_vals[3] = {1/128.f, 1/128.f, 1/128.f};
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = net_->create_extractor();
ex.input("input.1", in_pad);
std::vector<FaceInfo> faceproposals;
// stride 8
{
ncnn::Mat score_blob, bbox_blob, kps_blob;
ex.extract("score_8", score_blob);
ex.extract("bbox_8", bbox_blob);
if (has_kps)
ex.extract("kps_8", kps_blob);
const int base_size = 16;
const int feat_stride = 8;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales);
std::vector<FaceInfo> faceobjects32;
generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects32);
faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
}
// stride 16
{
ncnn::Mat score_blob, bbox_blob, kps_blob;
ex.extract("score_16", score_blob);
ex.extract("bbox_16", bbox_blob);
if (has_kps)
ex.extract("kps_16", kps_blob);
const int base_size = 64;
const int feat_stride = 16;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales);
std::vector<FaceInfo> faceobjects16;
generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects16);
faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
}
// stride 32
{
ncnn::Mat score_blob, bbox_blob, kps_blob;
ex.extract("score_32", score_blob);
ex.extract("bbox_32", bbox_blob);
if (has_kps)
ex.extract("kps_32", kps_blob);
const int base_size = 256;
const int feat_stride = 32;
ncnn::Mat ratios(1);
ratios[0] = 1.f;
ncnn::Mat scales(2);
scales[0] = 1.f;
scales[1] = 2.f;
ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales);
std::vector<FaceInfo> faceobjects8;
generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects8);
faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
}
// sort all proposals by score from highest to lowest
qsort_descent_inplace(faceproposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(faceproposals, picked, nms_threshold);
int face_count = picked.size();
for (int i = 0; i < face_count; i++)
{
FaceInfo obj = faceproposals[picked[i]];
// adjust offset to original unpadded
float x0 = (obj.rect.x - (wpad / 2)) / scale;
float y0 = (obj.rect.y - (hpad / 2)) / scale;
float x1 = (obj.rect.x + obj.rect.width - (wpad / 2)) / scale;
float y1 = (obj.rect.y + obj.rect.height - (hpad / 2)) / scale;
x0 = std::max(std::min(x0, (float)img_width - 1), 0.f);
y0 = std::max(std::min(y0, (float)img_height - 1), 0.f);
x1 = std::max(std::min(x1, (float)img_width - 1), 0.f);
y1 = std::max(std::min(y1, (float)img_height - 1), 0.f);
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
if (has_kps)
{
float x0 = (obj.keypoints_[0] - (wpad / 2)) / scale;
float y0 = (obj.keypoints_[5] - (hpad / 2)) / scale;
float x1 = (obj.keypoints_[1] - (wpad / 2)) / scale;
float y1 = (obj.keypoints_[6] - (hpad / 2)) / scale;
float x2 = (obj.keypoints_[2] - (wpad / 2)) / scale;
float y2 = (obj.keypoints_[7] - (hpad / 2)) / scale;
float x3 = (obj.keypoints_[3] - (wpad / 2)) / scale;
float y3 = (obj.keypoints_[8] - (hpad / 2)) / scale;
float x4 = (obj.keypoints_[4] - (wpad / 2)) / scale;
float y4 = (obj.keypoints_[9] - (hpad / 2)) / scale;
obj.keypoints_[0] = std::max(std::min(x0, (float)img_width - 1), 0.f);
obj.keypoints_[5] = std::max(std::min(y0, (float)img_height - 1), 0.f);
obj.keypoints_[1] = std::max(std::min(x1, (float)img_width - 1), 0.f);
obj.keypoints_[6] = std::max(std::min(y1, (float)img_height - 1), 0.f);
obj.keypoints_[2] = std::max(std::min(x2, (float)img_width - 1), 0.f);
obj.keypoints_[7] = std::max(std::min(y2, (float)img_height - 1), 0.f);
obj.keypoints_[3] = std::max(std::min(x3, (float)img_width - 1), 0.f);
obj.keypoints_[8] = std::max(std::min(y3, (float)img_height - 1), 0.f);
obj.keypoints_[4] = std::max(std::min(x4, (float)img_width - 1), 0.f);
obj.keypoints_[9] = std::max(std::min(y4, (float)img_height - 1), 0.f);
}
faces->push_back(obj);
}
return 0;
}
}

View File

@@ -0,0 +1,26 @@
#ifndef _SCRFD_DETECT_H_
#define _SCRFD_DETECT_H_
#include "../detecter.hpp"
#include "net.h"
namespace ovface {
class Scrfd : public Detecter {
public:
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
const int target_size = 640;
const float meanVals[3] = { 123.675f, 116.28f, 103.53f };
const float normVals[3] = { 0.01712475f, 0.0175f, 0.01742919f };
const float prob_threshold = 0.5f;
const float nms_threshold = 0.45f;
const bool has_kps = false;
};
}
#endif // !_RETINAFACE_H_

View File

@@ -92,7 +92,7 @@ static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn:
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = confidence;
obj.score = confidence;
for (int l = 0; l < 5; l++)
{
@@ -107,28 +107,10 @@ static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn:
}
}
YoloFace::YoloFace() :
net_ (new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
YoloFace::~YoloFace() {
net_->clear();
}
int YoloFace::LoadModel(const char * root_path) {
register_yolov5focus(net_);
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
return Estimator::LoadModel(root_path);
}
int YoloFace::DetectFace(const unsigned char* rgbdata,
@@ -274,7 +256,7 @@ int YoloFace::DetectFace(const unsigned char* rgbdata,
obj.rect.height = y1 - y0;
FaceInfo info;
info.location_ = obj.rect;
info.rect = obj.rect;
for (int k = 0; k < 5; ++k) {
info.keypoints_[k] = obj.pts[k].x;
info.keypoints_[k + 5] = obj.pts[k].y;

View File

@@ -7,16 +7,12 @@
namespace ovface {
class YoloFace : public Detecter {
public:
YoloFace();
~YoloFace();
int LoadModel(const char* root_path);
int LoadModel(const char * root_path);
int DetectFace(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<FaceInfo>* faces);
private:
ncnn::Net* net_;
bool initialized_;
const int target_size = 640;
const float mean_vals[3] = {127.f, 127.f, 127.f};
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};

View File

@@ -21,34 +21,15 @@ namespace ovface {
#define NEAR_0 1e-10
#define ODIM 66
Hopenet::Hopenet():
net_(new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
Hopenet::~Hopenet() {
if (net_) {
net_->clear();
}
}
int Hopenet::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
int Hopenet::LoadModel(const char* root_path) {
int ret = Estimator::LoadModel(root_path);
if (ret != 0) {
return ret;
}
for (uint i=1; i<67; i++) idx_tensor[i-1] = i;
initialized_ = true;
return 0;
}
int Hopenet::Detect(const unsigned char* rgbdata,
int img_width, int img_height,
Rect roi, HeadPose* head_angles) {
@@ -80,15 +61,15 @@ int Hopenet::Detect(const unsigned char* rgbdata,
float* pred_roll = output.range(ODIM, ODIM*2);
float* pred_yaw = output.range(ODIM*2, ODIM*3);
softmax(pred_pitch, ODIM);
softmax(pred_roll, ODIM);
softmax(pred_yaw, ODIM);
this->softmax(pred_pitch, ODIM);
this->softmax(pred_roll, ODIM);
this->softmax(pred_yaw, ODIM);
// printArray(pred_pitch, ODIM);
head_angles->pitch = getAngle(pred_pitch, ODIM);
head_angles->roll = getAngle(pred_roll, ODIM);
head_angles->yaw = getAngle(pred_yaw, ODIM);
head_angles->pitch = this->getAngle(pred_pitch, ODIM);
head_angles->roll = this->getAngle(pred_roll, ODIM);
head_angles->yaw = this->getAngle(pred_yaw, ODIM);
free(img_face);

View File

@@ -7,16 +7,12 @@
namespace ovface {
class Hopenet : public ov::Estimator {
public:
Hopenet();
~Hopenet();
int LoadModel(const char* root_path);
int Detect(const unsigned char* rgbdata,
int img_width, int img_height,
Rect roi, HeadPose* euler_angles);
private:
ncnn::Net* net_;
bool initialized_;
float idx_tensor[66];
void softmax(float* z, size_t el);
double getAngle(float* prediction, size_t len);

View File

@@ -10,6 +10,7 @@ extern "C" {
typedef void* IFaceLandmarker;
IFaceLandmarker new_insightface();
IFaceLandmarker new_zq();
IFaceLandmarker new_scrfd_landmarker();
int extract_face_keypoints(IFaceLandmarker m, const unsigned char* rgbdata, int img_width, int img_height, const Rect* face, Point2fVector* keypoints);
#ifdef __cplusplus
}

View File

@@ -6,34 +6,12 @@
#endif // OV_VULKAN
namespace ovface {
InsightfaceLandmarker::InsightfaceLandmarker() {
insightface_landmarker_net_ = new ncnn::Net();
initialized = false;
#ifdef OV_VULKAN
insightface_landmarker_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
InsightfaceLandmarker::~InsightfaceLandmarker() {
insightface_landmarker_net_->clear();
}
int InsightfaceLandmarker::LoadModel(const char * root_path) {
std::string fl_param = std::string(root_path) + "/param";
std::string fl_bin = std::string(root_path) + "/bin";
if (insightface_landmarker_net_->load_param(fl_param.c_str()) == -1 ||
insightface_landmarker_net_->load_model(fl_bin.c_str()) == -1) {
return 10000;
}
initialized = true;
return 0;
}
int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
keypoints->clear();
if (!initialized) {
if (!initialized_) {
return 10000;
}
@@ -42,7 +20,7 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
}
// 1 enlarge the face rect
Rect face_enlarged = face;
ov::Rect face_enlarged = face;
const float enlarge_scale = 1.5f;
EnlargeRect(enlarge_scale, &face_enlarged);
@@ -61,7 +39,7 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
}
// 4 do inference
ncnn::Extractor ex = insightface_landmarker_net_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
ex.input("data", in);

View File

@@ -7,17 +7,10 @@
namespace ovface {
class InsightfaceLandmarker : public Landmarker {
public:
InsightfaceLandmarker();
~InsightfaceLandmarker();
int LoadModel(const char* root_path);
int ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& face, std::vector<ov::Point2f>* keypoints);
private:
ncnn::Net* insightface_landmarker_net_;
bool initialized;
};
}

View File

@@ -1,6 +1,7 @@
#include "../landmarker.h"
#include "zqlandmarker/zqlandmarker.hpp"
#include "insightface/insightface.hpp"
#include "scrfd/scrfd.hpp"
IFaceLandmarker new_zq() {
return new ovface::ZQLandmarker();
@@ -10,6 +11,10 @@ IFaceLandmarker new_insightface() {
return new ovface::InsightfaceLandmarker();
}
IFaceLandmarker new_scrfd_landmarker() {
return new ovface::ScrfdLandmarker();
}
int extract_face_keypoints(
IFaceLandmarker m,
const unsigned char* rgbdata,
@@ -39,4 +44,8 @@ Landmarker* InsightfaceLandmarkerFactory::CreateLandmarker() {
return new InsightfaceLandmarker();
}
Landmarker* ScrfdLandmarkerFactory::CreateLandmarker() {
return new ScrfdLandmarker();
}
}

View File

@@ -7,7 +7,6 @@ namespace ovface {
// 抽象类
class Landmarker: public ov::Estimator {
public:
virtual ~Landmarker() {};
virtual int ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const Rect& face, std::vector<Point2f>* keypoints) = 0;
@@ -35,6 +34,13 @@ public:
~InsightfaceLandmarkerFactory() {}
};
class ScrfdLandmarkerFactory : public LandmarkerFactory {
public:
ScrfdLandmarkerFactory(){}
Landmarker* CreateLandmarker();
~ScrfdLandmarkerFactory() {}
};
}
#endif // !_FACE_LANDMARKER_H_

View File

@@ -0,0 +1,57 @@
#include "scrfd.hpp"
#ifdef OV_VULKAN
#include "gpu.h"
#endif // OV_VULKAN
namespace ovface {
int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
keypoints->clear();
if (!initialized_) {
return 10000;
}
if (rgbdata == 0){
return 10001;
}
// 1 enlarge the face rect
ov::Rect box = face;
const float enlarge_scale = 1.5f;
EnlargeRect(enlarge_scale, &box);
// 2 square the rect
RectifyRect(&box);
box = box & Rect(0, 0, img_width, img_height);
size_t total_size = box.width * box.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
for(size_t i = 0; i < box.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + box.y) * img_width + box.x) * 3;
unsigned char* dstCursor = img_face + i * box.width * 3;
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width);
}
ncnn::Extractor ex = net_->create_extractor();
ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192);
ncnn_in.substract_mean_normalize(means, norms);
ex.input("input.1",ncnn_in);
ncnn::Mat ncnn_out;
ex.extract("482",ncnn_out);
float *scoredata = (float*)ncnn_out.data;
for(int i = 0; i < 468; i++)
{
ov::Point2f pt;
pt.x = scoredata[i*3]*box.width/192 + box.x;
pt.y = scoredata[i*3+1]*box.height/192 + box.y;
keypoints->push_back(pt);
}
free(img_face);
return 0;
}
}

View File

@@ -0,0 +1,22 @@
#ifndef _FACE_SCRFD_LANDMARKER_H_
#define _FACE_SCRFD_LANDMARKER_H_
#include "../landmarker.hpp"
#include "net.h"
namespace ovface {
class ScrfdLandmarker : public Landmarker {
public:
int ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& face, std::vector<ov::Point2f>* keypoints);
private:
const float means[3] = { 127.5f, 127.5f, 127.5f };
const float norms[3] = { 1/127.5f, 1 / 127.5f, 1 / 127.5f };
};
}
#endif // !_FACE_SCRFD_ANDMARKER_H_

View File

@@ -1,39 +1,16 @@
#include "zqlandmarker.hpp"
#include <string>
#ifdef OV_VULKAN
#include "gpu.h"
#endif // OV_VULKAN
namespace ovface {
ZQLandmarker::ZQLandmarker() {
zq_landmarker_net_ = new ncnn::Net();
initialized = false;
#ifdef OV_VULKAN
zq_landmarker_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
ZQLandmarker::~ZQLandmarker() {
zq_landmarker_net_->clear();
}
int ZQLandmarker::LoadModel(const char * root_path) {
std::string fl_param = std::string(root_path) + "/param";
std::string fl_bin = std::string(root_path) + "/bin";
if (zq_landmarker_net_->load_param(fl_param.c_str()) == -1 ||
zq_landmarker_net_->load_model(fl_bin.c_str()) == -1) {
return 10000;
}
initialized = true;
return 0;
}
int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
keypoints->clear();
if (!initialized) {
if (!initialized_) {
return 10000;
}
@@ -49,7 +26,7 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
unsigned char* dstCursor = img_face + i * face.width * 3;
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width);
}
ncnn::Extractor ex = zq_landmarker_net_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
in.substract_mean_normalize(meanVals, normVals);

View File

@@ -7,19 +7,13 @@
namespace ovface {
class ZQLandmarker : public Landmarker {
public:
ZQLandmarker();
~ZQLandmarker();
int LoadModel(const char* root_path);
int ExtractKeypoints(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& face, std::vector<ov::Point2f>* keypoints);
private:
ncnn::Net* zq_landmarker_net_;
const float meanVals[3] = { 127.5f, 127.5f, 127.5f };
const float normVals[3] = { 0.0078125f, 0.0078125f, 0.0078125f };
bool initialized;
};
}

View File

@@ -5,29 +5,6 @@
#endif // OV_VULKAN
namespace ovface {
Mobilefacenet::Mobilefacenet() {
mobileface_net_ = new ncnn::Net();
initialized_ = false;
#ifdef OV_VULKAN
mobileface_net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
Mobilefacenet::~Mobilefacenet() {
mobileface_net_->clear();
}
int Mobilefacenet::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (mobileface_net_->load_param(param_file.c_str()) == -1 ||
mobileface_net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata,
int img_width, int img_height,
@@ -52,7 +29,7 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata,
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
feature->resize(kFaceFeatureDim);
ncnn::Extractor ex = mobileface_net_->create_extractor();
ncnn::Extractor ex = net_->create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("fc1", out);

View File

@@ -9,18 +9,11 @@ namespace ovface {
class Mobilefacenet : public Recognizer {
public:
Mobilefacenet();
~Mobilefacenet();
int LoadModel(const char* root_path);
int ExtractFeature(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& face,
std::vector<float>* feature);
private:
ncnn::Net* mobileface_net_;
bool initialized_;
};
}

View File

@@ -7,7 +7,6 @@
namespace ovface {
class Recognizer: public ov::Estimator {
public:
virtual ~Recognizer() {};
virtual int ExtractFeature(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& face,

View File

@@ -45,8 +45,8 @@ int Tracker::Track(const std::vector<FaceInfo>& curr_faces, std::vector<TrackedF
for (int i = 0; i < num_faces; ++i) {
auto& face = curr_faces.at(i);
for (auto scored_tracked_face : scored_tracked_faces) {
ComputeIOU(scored_tracked_face.face_info_.location_,
face.location_, &scored_tracked_face.iou_score_);
ComputeIOU(scored_tracked_face.face_info_.rect,
face.rect, &scored_tracked_face.iou_score_);
}
if (scored_tracked_faces.size() > 0) {
std::partial_sort(scored_tracked_faces.begin(),
@@ -61,10 +61,10 @@ int Tracker::Track(const std::vector<FaceInfo>& curr_faces, std::vector<TrackedF
scored_tracked_faces.pop_front();
TrackedFaceInfo &tracked_face = matched_face;
if (matched_face.iou_score_ < maxScore_) {
tracked_face.face_info_.location_.x = (tracked_face.face_info_.location_.x + face.location_.x) / 2;
tracked_face.face_info_.location_.y = (tracked_face.face_info_.location_.y + face.location_.y) / 2;
tracked_face.face_info_.location_.width = (tracked_face.face_info_.location_.width + face.location_.width) / 2;
tracked_face.face_info_.location_.height = (tracked_face.face_info_.location_.height + face.location_.height) / 2;
tracked_face.face_info_.rect.x = (tracked_face.face_info_.rect.x + face.rect.x) / 2;
tracked_face.face_info_.rect.y = (tracked_face.face_info_.rect.y + face.rect.y) / 2;
tracked_face.face_info_.rect.width = (tracked_face.face_info_.rect.width + face.rect.width) / 2;
tracked_face.face_info_.rect.height = (tracked_face.face_info_.rect.height + face.rect.height) / 2;
} else {
tracked_face.face_info_ = face;
}

View File

@@ -23,7 +23,7 @@ int extract_hand_rois(IHandDetecter d, const unsigned char* rgbdata, int img_wid
ov::ObjectInfo o = detected[i];
rois->items[i] = ObjectInfo{
o.rect,
o.prob,
o.score,
o.label,
NULL
};

View File

@@ -5,7 +5,6 @@
namespace ovhand {
class Detecter: public ov::Estimator {
public:
virtual ~Detecter() {};
virtual int Detect(const unsigned char*rgbdata,
int img_width, int img_height,
std::vector<ov::ObjectInfo>& rois) = 0;

View File

@@ -62,7 +62,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
// opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
@@ -101,7 +101,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = score;
obj.score= score;
objects.push_back(obj);
}
@@ -109,29 +109,6 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma
}
}
Nanodet::Nanodet() :
net_ (new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
Nanodet::~Nanodet() {
net_->clear();
}
int Nanodet::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
int Nanodet::Detect(const unsigned char* rgbdata,
int img_width, int img_height,
std::vector<ov::ObjectInfo>& rois) {

View File

@@ -9,17 +9,11 @@ namespace ovhand {
class Nanodet : public Detecter {
public:
Nanodet();
~Nanodet();
int LoadModel(const char* root_path);
int Detect(const unsigned char* rgbadata,
int img_width, int img_height,
std::vector<ov::ObjectInfo>& rois);
private:
ncnn::Net* net_;
bool initialized_;
const int target_size = 320;
const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f};

View File

@@ -46,7 +46,7 @@ static void generate_yolox_proposals(std::vector<ov::GridAndStride> grid_strides
obj.rect.width = w;
obj.rect.height = h;
obj.label = class_idx;
obj.prob = box_prob;
obj.score= box_prob;
objects.push_back(obj);
}
@@ -57,28 +57,9 @@ static void generate_yolox_proposals(std::vector<ov::GridAndStride> grid_strides
} // point anchor loop
}
Yolox::Yolox() :
net_ (new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
Yolox::~Yolox() {
net_->clear();
}
int Yolox::LoadModel(const char * root_path) {
register_yolov5focus(net_);
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
return Estimator::LoadModel(root_path);
}
int Yolox::Detect(const unsigned char* rgbdata,
@@ -116,7 +97,6 @@ int Yolox::Detect(const unsigned char* rgbdata,
in_pad.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = net_->create_extractor();
ex.set_num_threads(4);
ex.input("input", in_pad);
ncnn::Mat out;
ex.extract("output", out);

View File

@@ -9,17 +9,12 @@ namespace ovhand {
class Yolox : public Detecter {
public:
Yolox();
~Yolox();
int LoadModel(const char* root_path);
int LoadModel(const char * root_path);
int Detect(const unsigned char* rgbadata,
int img_width, int img_height,
std::vector<ov::ObjectInfo>& rois);
private:
ncnn::Net* net_;
bool initialized_;
const int target_size = 416;
const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f};
const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)};

View File

@@ -7,7 +7,6 @@
namespace ovhand {
class PoseEstimator: public ov::Estimator {
public:
virtual ~PoseEstimator() {};
virtual int Detect(const unsigned char*rgbdata,
int img_width, int img_height,
const ov::Rect& rect,

View File

@@ -6,28 +6,6 @@
#endif // OV_VULKAN
namespace ovhand {
HandPose::HandPose() :
net_ (new ncnn::Net()),
initialized_(false) {
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
}
HandPose::~HandPose() {
net_->clear();
}
int HandPose::LoadModel(const char * root_path) {
std::string param_file = std::string(root_path) + "/param";
std::string bin_file = std::string(root_path) + "/bin";
if (net_->load_param(param_file.c_str()) == -1 ||
net_->load_model(bin_file.c_str()) == -1) {
return 10000;
}
initialized_ = true;
return 0;
}
int HandPose::Detect(const unsigned char* rgbdata,
int img_width, int img_height,

View File

@@ -9,18 +9,12 @@ namespace ovhand {
class HandPose : public PoseEstimator {
public:
HandPose();
~HandPose();
int LoadModel(const char* root_path);
int Detect(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& rect,
std::vector<ov::Point2f>& keypoints);
private:
ncnn::Net* net_;
bool initialized_;
const float meanVals[3] = { 128.0f, 128.0f, 128.0f };
const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f };
};

View File

@@ -18,7 +18,7 @@ int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata, int img_wid
ov::ObjectInfo o = detected[i];
rois->items[i] = ObjectInfo{
o.rect,
o.prob,
o.score,
o.label,
NULL
};

View File

@@ -7,7 +7,6 @@ namespace ovpose {
class Detecter: public ov::Estimator {
public:
virtual ~Detecter(){};
virtual int ExtractROIs(const unsigned char* rgbadata,
int img_width, int img_height,
std::vector<ov::ObjectInfo>* rois) = 0;

View File

@@ -55,7 +55,6 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata,
in.substract_mean_normalize(mean_vals, norm_vals);
ncnn::Extractor ex = roi_net_->create_extractor();
ex.set_num_threads(4);
ex.input("data", in);
ncnn::Mat out;
ex.extract("output", out);
@@ -99,7 +98,7 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata,
ov::Rect rect = ov::Rect(x1, y1, x2-x1, y2-y1);
ov::ObjectInfo roi;
roi.rect = rect;
roi.prob = score;
roi.score = score;
rois->push_back(roi);
}
return 0;
@@ -124,7 +123,6 @@ int Ultralight::ExtractKeypoints(const unsigned char* rgbdata,
in.substract_mean_normalize(meanVals, normVals);
ncnn::Extractor ex = pose_net_->create_extractor();
ex.set_num_threads(4);
ex.input("data", in);
ncnn::Mat out;
ex.extract("hybridsequential0_conv7_fwd", out);
@@ -152,7 +150,7 @@ int Ultralight::ExtractKeypoints(const unsigned char* rgbdata,
ov::Keypoint keypoint;
keypoint.p = ov::Point2f(max_x * w / (float)out.w+rect.x, max_y * h / (float)out.h+rect.y);
keypoint.prob = max_prob;
keypoint.score = max_prob;
keypoints->push_back(keypoint);
}