diff --git a/README.md b/README.md index 860b8a3..743026d 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,14 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing) - retainface [Google Drive](https://drive.google.com/drive/folders/1nxR3WFqqEWLwGVsp5c4tI0_iVVEaVOe8?usp=sharing) - yoloface [Google Drive](https://drive.google.com/drive/folders/1EM9H6-aYXKsWTRxx_wbKDyYHVIYpU6f7?usp=sharing) + - scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing) - anticonv (for mask detection) [Google Drive](https://drive.google.com/drive/folders/1Fje0fmVPy5g0_oaxUbH_cAedkgjBf7QW?usp=sharing) - recognizer (face feature extration for classification) - mobilenet [Google Drive](https://drive.google.com/drive/folders/1fRLs10atm_vwDWQXZ-GJbKQpypNcXLAx?usp=sharing) - landmarker (for face landmarkers extraction) - insightface [Google Drive](https://drive.google.com/drive/folders/1e_nRwneMEDf_sXEMZCmOk0S4VT0_XpOS?usp=sharing) - zq [Google Drive](https://drive.google.com/drive/folders/1ax0J1TVhf2S-B3V6lnqwJaaHUK433sPm?usp=sharing) + - scrfd [Google Drive](https://drive.google.com/drive/folders/1XPjfsuXGj9rXqAmo1K70BsqWmHvoYQv_?usp=sharing) - tracker (for face IOU calculation bettween frames) - hopenet (for head pose detection) [Google Drive](https://drive.google.com/drive/folders/1zLam-8s9ZMPDUxUEtNU2F9yFTDRM5fk-?usp=sharing) - pose @@ -40,3 +42,14 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - pose (for hand pose estimation) - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing) - golang binding (github.com/bububa/openvision/go) + +## Reference + +- [MirrorYuChen/ncnn_example](https://github.com/MirrorYuChen/ncnn_example) +- [nihui/ncnn-android-nanodet](https://github.com/nihui/ncnn-android-nanodet) +- [FeiGeChuanShu/ncnn_Android_face](https://github.com/FeiGeChuanShu/ncnn_Android_face) +- [FeiGeChuanShu/ncnn_nanodet_hand](https://github.com/FeiGeChuanShu/ncnn_nanodet_hand) +- [docongminh/deep-head-pose-ncnn](https://github.com/docongminh/deep-head-pose-ncnn) +- [nilseuropa/hopenet_ncnn](https://github.com/nilseuropa/hopenet_ncnn) +- [dog-qiuqiu/Ultralight-SimplePose](https://github.com/dog-qiuqiu/Ultralight-SimplePose) +- [GHLab/deep-head-pose-lite-ncnn](https://github.com/GHLab/deep-head-pose-lite-ncnn) diff --git a/go/common/estimator.go b/go/common/estimator.go new file mode 100644 index 0000000..0d4529a --- /dev/null +++ b/go/common/estimator.go @@ -0,0 +1,41 @@ +package common + +/* +#include +#include +#include "openvision/common/common.h" +*/ +import "C" +import ( + "unsafe" + + openvision "github.com/bububa/openvision/go" +) + +// Estimator represents Estimator interface +type Estimator interface { + LoadModel(modelPath string) error + Destroy() + Pointer() unsafe.Pointer +} + +// SetEstimatorThreads set ncnn net opt.num_threads +func SetEstimatorThreads(e Estimator, n int) { + C.set_num_threads((C.IEstimator)(e.Pointer()), C.int(n)) +} + +// DestroyEstimator destory an Estimator +func DestroyEstimator(e Estimator) { + C.destroy_estimator((C.IEstimator)(e.Pointer())) +} + +// EstimatorLoadModel load detecter model +func EstimatorLoadModel(e Estimator, modelPath string) error { + cpath := C.CString(modelPath) + defer C.free(unsafe.Pointer(cpath)) + retCode := C.load_model((C.IEstimator)(e.Pointer()), cpath) + if retCode != 0 { + return openvision.LoadModelError(int(retCode)) + } + return nil +} diff --git a/go/common/gpu.go b/go/common/gpu.go index 39263e5..ea2be10 100644 --- a/go/common/gpu.go +++ b/go/common/gpu.go @@ -7,16 +7,29 @@ package common */ import "C" +// GetGPUCount get gpu number func GetGPUCount() int { count := C.get_gpu_count() return int(count) } +// CreateGPUInstance create gpu instance func CreateGPUInstance() int { i := C.create_gpu_instance() return int(i) } +// DestroyGPUInstance destory gpu instance func DestroyGPUInstance() { C.destroy_gpu_instance() } + +// GetBigCPUCount get cpu number +func GetBigCPUCount() int { + return int(C.get_big_cpu_count()) +} + +// SetOMPThreads set omp thread number +func SetOMPThreads(n int) { + C.set_omp_num_threads(C.int(n)) +} diff --git a/go/common/keypoint.go b/go/common/keypoint.go index a841ebf..593e915 100644 --- a/go/common/keypoint.go +++ b/go/common/keypoint.go @@ -22,14 +22,14 @@ type Keypoint struct { func GoKeypoint(c *C.Keypoint, w float64, h float64) Keypoint { return Keypoint{ Point: Pt(float64(c.p.x)/w, float64(c.p.y)/h), - Score: float32(c.prob), + Score: float32(c.score), } } // Convert Keypoint to C.Keypoint pointer func (k Keypoint) CKeypoint(w float64, h float64) *C.Keypoint { ret := (*C.Keypoint)(C.malloc(C.sizeof_Keypoint)) - ret.prob = C.float(k.Score) + ret.score = C.float(k.Score) ret.p = C.Point2f{ C.float(k.Point.X * w), C.float(k.Point.Y * h), diff --git a/go/common/objectinfo.go b/go/common/objectinfo.go index d2215cf..06874c4 100644 --- a/go/common/objectinfo.go +++ b/go/common/objectinfo.go @@ -26,7 +26,7 @@ type ObjectInfo struct { func GoObjectInfo(c *C.ObjectInfo, w float64, h float64) ObjectInfo { ret := ObjectInfo{ Label: int(c.label), - Score: float32(c.prob), + Score: float32(c.score), Rect: Rect( float64(c.rect.x)/w, float64(c.rect.y)/h, @@ -44,7 +44,7 @@ func GoObjectInfo(c *C.ObjectInfo, w float64, h float64) ObjectInfo { func (o ObjectInfo) ToCObjectInfo(w float64, h float64) *C.ObjectInfo { ret := (*C.ObjectInfo)(C.malloc(C.sizeof_ObjectInfo)) ret.label = C.int(o.Label) - ret.prob = C.float(o.Score) + ret.score = C.float(o.Score) ret.rect.x = C.int(o.Rect.X * w) ret.rect.y = C.int(o.Rect.Y * h) ret.rect.width = C.int(o.Rect.Width * w) diff --git a/go/examples/detecter/main.go b/go/examples/detecter/main.go index 972f133..694ab33 100644 --- a/go/examples/detecter/main.go +++ b/go/examples/detecter/main.go @@ -23,24 +23,30 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() - test_detect(imgPath, modelPath) - test_mask(imgPath, modelPath) + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) + test_detect(imgPath, modelPath, cpuCores) + test_mask(imgPath, modelPath, cpuCores) } -func test_detect(imgPath string, modelPath string) { +func test_detect(imgPath string, modelPath string, threads int) { for idx, d := range []detecter.Detecter{ retinaface(modelPath), centerface(modelPath), mtcnn(modelPath), yoloface(modelPath), + scrfd(modelPath), } { + common.SetEstimatorThreads(d, threads) detect(d, imgPath, idx, "4.jpg", false) d.Destroy() } } -func test_mask(imgPath string, modelPath string) { +func test_mask(imgPath string, modelPath string, threads int) { d := anticonv(modelPath) + common.SetEstimatorThreads(d, threads) defer d.Destroy() detect(d, imgPath, 0, "mask3.jpg", true) } @@ -72,6 +78,15 @@ func yoloface(modelPath string) detecter.Detecter { return d } +func scrfd(modelPath string) detecter.Detecter { + modelPath = filepath.Join(modelPath, "scrfd/scrfd1g") + d := detecter.NewScrfd() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + func centerface(modelPath string) detecter.Detecter { modelPath = filepath.Join(modelPath, "centerface") d := detecter.NewCenterface() @@ -96,7 +111,7 @@ func detect(d detecter.Detecter, imgPath string, idx int, filename string, mask if err != nil { log.Fatalln("load image failed,", err) } - faces, err := d.DetectFace(common.NewImage(img)) + faces, err := d.Detect(common.NewImage(img)) if err != nil { log.Fatalln(err) } diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 2f567aa..0461264 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -24,13 +24,19 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) estimator := handpose(modelPath) defer estimator.Destroy() + common.SetEstimatorThreads(estimator, cpuCores) for idx, d := range []detecter.Detecter{ - yolox(modelPath), nanodet(modelPath), + yolox(modelPath), + nanodet(modelPath), } { defer d.Destroy() - detect(d, estimator, imgPath, "hand2.jpg", idx) + common.SetEstimatorThreads(d, cpuCores) + detect(d, estimator, imgPath, "hand1.jpg", idx) } } diff --git a/go/examples/hopenet/main.go b/go/examples/hopenet/main.go index aeefee6..ae547e4 100644 --- a/go/examples/hopenet/main.go +++ b/go/examples/hopenet/main.go @@ -24,10 +24,15 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) d := retinaface(modelPath) defer d.Destroy() + common.SetEstimatorThreads(d, cpuCores) h := processer(modelPath) defer h.Destroy() + common.SetEstimatorThreads(h, cpuCores) for _, fn := range []string{"robocop.jpg", "terminator.jpg"} { process(d, h, imgPath, fn) } @@ -60,7 +65,7 @@ func process(d detecter.Detecter, h *hopenet.Hopenet, imgPath string, filename s log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - faces, err := d.DetectFace(img) + faces, err := d.Detect(img) if err != nil { log.Fatalln(err) } diff --git a/go/examples/landmarker/main.go b/go/examples/landmarker/main.go index 32d3727..7d1d1b2 100644 --- a/go/examples/landmarker/main.go +++ b/go/examples/landmarker/main.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "fmt" "image" "image/jpeg" "log" @@ -23,11 +24,21 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) d := retinaface(modelPath) defer d.Destroy() - m := insightface(modelPath) - defer m.Destroy() - extract_keypoints(d, m, imgPath, "4.jpg") + common.SetEstimatorThreads(d, cpuCores) + for idx, m := range []landmarker.Landmarker{ + insightface(modelPath), + zq(modelPath), + scrfd(modelPath), + } { + defer m.Destroy() + common.SetEstimatorThreads(m, cpuCores) + extract_keypoints(d, m, imgPath, "4.jpg", idx) + } } func retinaface(modelPath string) detecter.Detecter { @@ -57,14 +68,23 @@ func zq(modelPath string) landmarker.Landmarker { return d } -func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath string, filename string) { +func scrfd(modelPath string) landmarker.Landmarker { + modelPath = filepath.Join(modelPath, "scrfd/landmarker") + d := landmarker.NewScrfd() + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + +func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath string, filename string, idx int) { inPath := filepath.Join(imgPath, filename) imgLoaded, err := loadImage(inPath) if err != nil { log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - faces, err := d.DetectFace(img) + faces, err := d.Detect(img) if err != nil { log.Fatalln(err) } @@ -83,8 +103,7 @@ func extract_keypoints(d detecter.Detecter, m landmarker.Landmarker, imgPath str keypoints = append(keypoints, points...) } out := drawer.DrawLandmark(imgLoaded, keypoints) - outPath := filepath.Join(imgPath, "./results", filename) - + outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("landmarker-%d-%s", idx, filename)) if err := saveImage(out, outPath); err != nil { log.Fatalln(err) } diff --git a/go/examples/pose/main.go b/go/examples/pose/main.go index 621df96..f007a9c 100644 --- a/go/examples/pose/main.go +++ b/go/examples/pose/main.go @@ -23,8 +23,12 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) d := ultralightDetector(modelPath) defer d.Destroy() + common.SetEstimatorThreads(d, cpuCores) detect(d, imgPath, "ultralight-pose3.jpg") } diff --git a/go/examples/recognizer/main.go b/go/examples/recognizer/main.go index 332ad44..c6d5147 100644 --- a/go/examples/recognizer/main.go +++ b/go/examples/recognizer/main.go @@ -22,10 +22,15 @@ func main() { modelPath := filepath.Join(dataPath, "./models") common.CreateGPUInstance() defer common.DestroyGPUInstance() + cpuCores := common.GetBigCPUCount() + common.SetOMPThreads(cpuCores) + log.Printf("CPU big cores:%d\n", cpuCores) d := retinaface(modelPath) defer d.Destroy() + common.SetEstimatorThreads(d, cpuCores) m := mobilefacenet(modelPath) defer m.Destroy() + common.SetEstimatorThreads(m, cpuCores) extract_features(d, m, imgPath, "4.jpg") } @@ -54,7 +59,7 @@ func extract_features(d detecter.Detecter, r recognizer.Recognizer, imgPath stri log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - faces, err := d.DetectFace(img) + faces, err := d.Detect(img) if err != nil { log.Fatalln(err) } diff --git a/go/examples/tracker/main.go b/go/examples/tracker/main.go index 7f4a785..69e6cb1 100644 --- a/go/examples/tracker/main.go +++ b/go/examples/tracker/main.go @@ -48,7 +48,7 @@ func track(d detecter.Detecter, t *tracker.Tracker, imgPath string, filename str log.Fatalln("load image failed,", err) } img := common.NewImage(imgLoaded) - faces, err := d.DetectFace(img) + faces, err := d.Detect(img) if err != nil { log.Fatalln(err) } diff --git a/go/face/detecter/anticonv.go b/go/face/detecter/anticonv.go index 7b9bace..b55ac73 100644 --- a/go/face/detecter/anticonv.go +++ b/go/face/detecter/anticonv.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face" ) @@ -25,20 +27,20 @@ func NewAnticonv() *Anticonv { // Destroy free detecter func (d *Anticonv) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IDetecter -func (d *Anticonv) Handler() C.IFaceDetecter { - return d.d +// Pointer implement Estimator interface +func (d *Anticonv) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for detecter func (d *Anticonv) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } -// DetectFace implement Detecter interface -func (d *Anticonv) DetectFace(img *common.Image) ([]face.FaceInfo, error) { - return DetectFace(d, img) +// Detect implement Detecter interface +func (d *Anticonv) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) } diff --git a/go/face/detecter/centerface.go b/go/face/detecter/centerface.go index a012407..63b3cac 100644 --- a/go/face/detecter/centerface.go +++ b/go/face/detecter/centerface.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face" ) @@ -25,20 +27,20 @@ func NewCenterface() *Centerface { // Destroy free detecter func (d *Centerface) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IDetecter -func (d *Centerface) Handler() C.IFaceDetecter { - return d.d +// Pointer implement Estimator interface +func (d *Centerface) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for detecter func (d *Centerface) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } -// DetectFace implement Detecter interface -func (d *Centerface) DetectFace(img *common.Image) ([]face.FaceInfo, error) { - return DetectFace(d, img) +// Detect implement Detecter interface +func (d *Centerface) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) } diff --git a/go/face/detecter/detecter.go b/go/face/detecter/detecter.go index 3294919..2522c78 100644 --- a/go/face/detecter/detecter.go +++ b/go/face/detecter/detecter.go @@ -17,36 +17,18 @@ import ( // Detecter represents deteter interface type Detecter interface { - Handler() C.IFaceDetecter - LoadModel(modelPath string) error - DetectFace(img *common.Image) ([]face.FaceInfo, error) - Destroy() + common.Estimator + Detect(img *common.Image) ([]face.FaceInfo, error) } -// LoadModel load detecter model -func LoadModel(d Detecter, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a detecter -func Destroy(d Detecter) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler()))) -} - -// DetectFace detect face useing detecter -func DetectFace(d Detecter, img *common.Image) ([]face.FaceInfo, error) { +// Detect detect face useing detecter +func Detect(d Detecter, img *common.Image) ([]face.FaceInfo, error) { imgWidth := img.WidthF64() imgHeight := img.HeightF64() data := img.Bytes() CFaces := face.NewCFaceInfoVector() defer face.FreeCFaceInfoVector(CFaces) - errCode := C.detect_face(d.Handler(), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.FaceInfoVector)(unsafe.Pointer(CFaces))) + errCode := C.detect_face((C.IFaceDetecter)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.FaceInfoVector)(unsafe.Pointer(CFaces))) if errCode != 0 { return nil, openvision.DetectFaceError(int(errCode)) } diff --git a/go/face/detecter/mtcnn.go b/go/face/detecter/mtcnn.go index db44426..fa89b58 100644 --- a/go/face/detecter/mtcnn.go +++ b/go/face/detecter/mtcnn.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face" ) @@ -25,20 +27,20 @@ func NewMtcnn() *Mtcnn { // Destroy free detecter func (d *Mtcnn) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IFaceDetecter -func (d *Mtcnn) Handler() C.IFaceDetecter { - return d.d +// Pointer implement Estimator interface +func (d *Mtcnn) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Detecter interface func (d *Mtcnn) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } -// DetectFace implement Detecter interface -func (d *Mtcnn) DetectFace(img *common.Image) ([]face.FaceInfo, error) { - return DetectFace(d, img) +// Detect implement Detecter interface +func (d *Mtcnn) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) } diff --git a/go/face/detecter/retinaface.go b/go/face/detecter/retinaface.go index be9fb1d..0330575 100644 --- a/go/face/detecter/retinaface.go +++ b/go/face/detecter/retinaface.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face" ) @@ -25,19 +27,20 @@ func NewRetinaFace() *RetinaFace { // Destroy free detecter func (d *RetinaFace) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IFaceDetecter -func (d *RetinaFace) Handler() C.IFaceDetecter { - return d.d +// Pointer implement Estimator interface +func (d *RetinaFace) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Detecter interface func (d *RetinaFace) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } -func (d *RetinaFace) DetectFace(img *common.Image) ([]face.FaceInfo, error) { - return DetectFace(d, img) +// Detect implement Detecter interface +func (d *RetinaFace) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) } diff --git a/go/face/detecter/scrfd.go b/go/face/detecter/scrfd.go new file mode 100644 index 0000000..ca7b242 --- /dev/null +++ b/go/face/detecter/scrfd.go @@ -0,0 +1,46 @@ +package detecter + +/* +#include +#include +#include "openvision/face/detecter.h" +*/ +import "C" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" + "github.com/bububa/openvision/go/face" +) + +// Scrfd represents scrfd detecter +type Scrfd struct { + d C.IFaceDetecter +} + +// NewScrfd returns a new Scrfd +func NewScrfd() *Scrfd { + return &Scrfd{ + d: C.new_scrfd(), + } +} + +// Destroy free detecter +func (d *Scrfd) Destroy() { + common.DestroyEstimator(d) +} + +// LoadModel implement Detecter interface +func (d *Scrfd) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(d, modelPath) +} + +// Pointer implement Estimator interface +func (d *Scrfd) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) +} + +// Detect implement Detecter interface +func (d *Scrfd) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) +} diff --git a/go/face/detecter/yoloface.go b/go/face/detecter/yoloface.go index b9906d1..417e2e0 100644 --- a/go/face/detecter/yoloface.go +++ b/go/face/detecter/yoloface.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" "github.com/bububa/openvision/go/face" ) @@ -25,20 +27,20 @@ func NewYoloFace() *YoloFace { // Destroy free detecter func (d *YoloFace) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IFaceDetecter -func (d *YoloFace) Handler() C.IFaceDetecter { - return d.d +// Pointer implement Estimator interface +func (d *YoloFace) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Detecter interface func (d *YoloFace) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } -// DetectFace implement Detecter interface -func (d *YoloFace) DetectFace(img *common.Image) ([]face.FaceInfo, error) { - return DetectFace(d, img) +// Detect implement Detecter interface +func (d *YoloFace) Detect(img *common.Image) ([]face.FaceInfo, error) { + return Detect(d, img) } diff --git a/go/face/face_info.go b/go/face/face_info.go index e309410..ed93f0d 100644 --- a/go/face/face_info.go +++ b/go/face/face_info.go @@ -27,13 +27,13 @@ type FaceInfo struct { // GoFaceInfo convert c FaceInfo to go type func GoFaceInfo(cInfo *C.FaceInfo, w float64, h float64) FaceInfo { info := FaceInfo{ - Score: float32(cInfo.score_), + Score: float32(cInfo.score), Mask: bool(cInfo.mask_), Rect: common.Rect( - float64(cInfo.location_.x)/w, - float64(cInfo.location_.y)/h, - float64(cInfo.location_.width)/w, - float64(cInfo.location_.height)/h, + float64(cInfo.rect.x)/w, + float64(cInfo.rect.y)/h, + float64(cInfo.rect.width)/w, + float64(cInfo.rect.height)/h, ), } for i := 0; i < 5; i++ { @@ -48,9 +48,9 @@ func GoFaceInfo(cInfo *C.FaceInfo, w float64, h float64) FaceInfo { // CFaceInfo convert FaceInfo to C.FaceInfo func (f FaceInfo) CFaceInfo(w float64, h float64) *C.FaceInfo { ret := (*C.FaceInfo)(C.malloc(C.sizeof_FaceInfo)) - ret.score_ = C.float(f.Score) + ret.score = C.float(f.Score) ret.mask_ = C.bool(f.Mask) - ret.location_ = C.Rect{ + ret.rect = C.Rect{ C.int(f.Rect.X * w), C.int(f.Rect.Y * h), C.int(f.Rect.Width * w), diff --git a/go/face/hopenet/hopenet.go b/go/face/hopenet/hopenet.go index 108c19e..16dd55b 100644 --- a/go/face/hopenet/hopenet.go +++ b/go/face/hopenet/hopenet.go @@ -26,20 +26,20 @@ func NewHopenet() *Hopenet { } } +// Pointer implement Estimator interface +func (h *Hopenet) Pointer() unsafe.Pointer { + return unsafe.Pointer(h.d) +} + // LoadModel load detecter model func (h *Hopenet) LoadModel(modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(h.d)), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } + return common.EstimatorLoadModel(h, modelPath) return nil } // Destroy destroy C.IHopeNet func (h *Hopenet) Destroy() { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(h.d))) + common.DestroyEstimator(h) } // Detect head pose @@ -51,7 +51,7 @@ func (h *Hopenet) Detect(img *common.Image, faceRect common.Rectangle) (face.Hea CHeadPose := face.NewCHeadPose() defer C.free(unsafe.Pointer(CHeadPose)) errCode := C.hopenet_detect( - h.d, + (C.IHopenet)(h.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.Rect)(unsafe.Pointer(CRect)), diff --git a/go/face/landmarker/insightface.go b/go/face/landmarker/insightface.go index 8d0207b..0803f4c 100644 --- a/go/face/landmarker/insightface.go +++ b/go/face/landmarker/insightface.go @@ -6,7 +6,11 @@ package landmarker #include "openvision/face/landmarker.h" */ import "C" -import "github.com/bububa/openvision/go/common" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) // Insightface represents Insightface landmarker type Insightface struct { @@ -20,19 +24,19 @@ func NewInsightface() *Insightface { } } -// Handler returns C.ILandmarker -func (d *Insightface) Handler() C.IFaceLandmarker { - return d.d +// Pointer implement Estimator interface +func (d *Insightface) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Landmarker interface func (d *Insightface) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Destroy implement Landmarker interface func (d *Insightface) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } // ExtractKeypoints implement Landmarker interface diff --git a/go/face/landmarker/landmarker.go b/go/face/landmarker/landmarker.go index 2b743c3..b8ade31 100644 --- a/go/face/landmarker/landmarker.go +++ b/go/face/landmarker/landmarker.go @@ -16,26 +16,8 @@ import ( // Landmarker represents landmarker interface type Landmarker interface { - Handler() C.IFaceLandmarker - LoadModel(modelPath string) error + common.Estimator ExtractKeypoints(img *common.Image, face common.Rectangle) ([]common.Point, error) - Destroy() -} - -// LoadModel load landmarker model -func LoadModel(d Landmarker, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a landmarker -func Destroy(d Landmarker) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler()))) } // ExtractKeypoints extract keypoints using landmarker @@ -47,7 +29,7 @@ func ExtractKeypoints(d Landmarker, img *common.Image, faceRect common.Rectangle defer common.FreeCPoint2fVector(CPoints) CRect := faceRect.CRect(imgWidth, imgHeight) errCode := C.extract_face_keypoints( - d.Handler(), + (C.IFaceLandmarker)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.Rect)(unsafe.Pointer(CRect)), diff --git a/go/face/landmarker/scrfd.go b/go/face/landmarker/scrfd.go new file mode 100644 index 0000000..3a8a0b0 --- /dev/null +++ b/go/face/landmarker/scrfd.go @@ -0,0 +1,45 @@ +package landmarker + +/* +#include +#include +#include "openvision/face/landmarker.h" +*/ +import "C" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) + +// Scrfd represents Scrfd landmarker +type Scrfd struct { + d C.IFaceLandmarker +} + +// NewScrfd returns a new Scrfd landmarker +func NewScrfd() *Scrfd { + return &Scrfd{ + d: C.new_scrfd_landmarker(), + } +} + +// Pointer implement Estimator interface +func (d *Scrfd) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) +} + +// LoadModel implement Landmarker interface +func (d *Scrfd) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(d, modelPath) +} + +// Destroy implement Landmarker interface +func (d *Scrfd) Destroy() { + common.DestroyEstimator(d) +} + +// ExtractKeypoints implement Landmarker interface +func (d *Scrfd) ExtractKeypoints(img *common.Image, faceRect common.Rectangle) ([]common.Point, error) { + return ExtractKeypoints(d, img, faceRect) +} diff --git a/go/face/landmarker/zq.go b/go/face/landmarker/zq.go index 7cc2d15..33d2804 100644 --- a/go/face/landmarker/zq.go +++ b/go/face/landmarker/zq.go @@ -6,7 +6,11 @@ package landmarker #include "openvision/face/landmarker.h" */ import "C" -import "github.com/bububa/openvision/go/common" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) // Zq represents Zq landmarker type Zq struct { @@ -20,19 +24,19 @@ func NewZq() *Zq { } } -// Handler returns C.ILandmarker -func (d *Zq) Handler() C.IFaceLandmarker { - return d.d +// Pointer implement Estimator interface +func (d *Zq) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Landmarker interface func (d *Zq) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Destroy implement Landmarker interface func (d *Zq) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } // ExtractKeypoints implement Landmarker interface diff --git a/go/face/recognizer/mobilefacenet.go b/go/face/recognizer/mobilefacenet.go index b6425c5..dd36f1b 100644 --- a/go/face/recognizer/mobilefacenet.go +++ b/go/face/recognizer/mobilefacenet.go @@ -6,7 +6,11 @@ package recognizer #include "openvision/face/recognizer.h" */ import "C" -import "github.com/bububa/openvision/go/common" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) // Mobilefacenet represents Mobilefacenet recognizer type Mobilefacenet struct { @@ -20,19 +24,19 @@ func NewMobilefacenet() *Mobilefacenet { } } -// Handler returns C.IFaceRecognizer -func (d *Mobilefacenet) Handler() C.IFaceRecognizer { - return d.d +// Pointer implement Estimator interface +func (d *Mobilefacenet) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel implement Recognizer interface func (d *Mobilefacenet) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Destroy implement Recognizer interface func (d *Mobilefacenet) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } // ExtractFeatures implement Recognizer interface diff --git a/go/face/recognizer/recognizer.go b/go/face/recognizer/recognizer.go index f136e00..10ed1e5 100644 --- a/go/face/recognizer/recognizer.go +++ b/go/face/recognizer/recognizer.go @@ -16,26 +16,8 @@ import ( // Recognizer represents Recognizer interface type Recognizer interface { - Handler() C.IFaceRecognizer - LoadModel(modelPath string) error + common.Estimator ExtractFeatures(img *common.Image, face common.Rectangle) ([]float64, error) - Destroy() -} - -// LoadModel load recognizer model -func LoadModel(r Recognizer, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(r.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a recognizer -func Destroy(r Recognizer) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(r.Handler()))) } // ExtractFeatures extract features using recognizer @@ -47,7 +29,7 @@ func ExtractFeatures(r Recognizer, img *common.Image, faceRect common.Rectangle) defer common.FreeCFloatVector(CFeatures) CRect := faceRect.CRect(imgWidth, imgHeight) errCode := C.extract_feature( - r.Handler(), + (C.IFaceRecognizer)(r.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.Rect)(unsafe.Pointer(CRect)), diff --git a/go/hand/detecter/detecter.go b/go/hand/detecter/detecter.go index 52f9194..e00bf45 100644 --- a/go/hand/detecter/detecter.go +++ b/go/hand/detecter/detecter.go @@ -16,26 +16,8 @@ import ( // Detecter represents deteter interface type Detecter interface { - Handler() C.IHandDetecter - LoadModel(modelPath string) error + common.Estimator Detect(img *common.Image) ([]common.ObjectInfo, error) - Destroy() -} - -// LoadModel load detecter model -func LoadModel(d Detecter, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a detecter -func Destroy(d Detecter) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler()))) } // Detect detect hand ROI @@ -46,7 +28,7 @@ func Detect(d Detecter, img *common.Image) ([]common.ObjectInfo, error) { cObjs := common.NewCObjectInfoVector() defer common.FreeCObjectInfoVector(cObjs) errCode := C.extract_hand_rois( - d.Handler(), + (C.IHandDetecter)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), diff --git a/go/hand/detecter/nanodet.go b/go/hand/detecter/nanodet.go index 518bd54..9032832 100644 --- a/go/hand/detecter/nanodet.go +++ b/go/hand/detecter/nanodet.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" ) @@ -24,17 +26,17 @@ func NewNanodet() *Nanodet { // Destroy free detecter func (d *Nanodet) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IHandDetecter -func (d *Nanodet) Handler() C.IHandDetecter { - return d.d +// Pointer implement Estimator interface +func (d *Nanodet) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for detecter func (d *Nanodet) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Detect implement Detecter interface diff --git a/go/hand/detecter/yolox.go b/go/hand/detecter/yolox.go index feb46dd..fa9f9cc 100644 --- a/go/hand/detecter/yolox.go +++ b/go/hand/detecter/yolox.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" ) @@ -24,17 +26,17 @@ func NewYolox() *Yolox { // Destroy free detecter func (d *Yolox) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IHandDetecter -func (d *Yolox) Handler() C.IHandDetecter { - return d.d +// Pointer implenment Estimator interface +func (d *Yolox) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for detecter func (d *Yolox) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Detect implement Detecter interface diff --git a/go/hand/pose/estimator.go b/go/hand/pose/estimator.go index 1e04254..41b3d09 100644 --- a/go/hand/pose/estimator.go +++ b/go/hand/pose/estimator.go @@ -16,26 +16,8 @@ import ( // Estimator represents estimator interface type Estimator interface { - Handler() C.IHandPoseEstimator - LoadModel(modelPath string) error + common.Estimator Detect(img *common.Image, rect common.Rectangle) ([]common.Point, error) - Destroy() -} - -// LoadModel load detecter model -func LoadModel(d Estimator, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a estimator -func Destroy(d Estimator) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler()))) } // Detect detect hand pose @@ -47,7 +29,7 @@ func Detect(d Estimator, img *common.Image, rect common.Rectangle) ([]common.Poi defer common.FreeCPoint2fVector(CPoints) CRect := rect.CRect(imgWidth, imgHeight) errCode := C.hand_pose( - d.Handler(), + (C.IHandPoseEstimator)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), (*C.Rect)(unsafe.Pointer(CRect)), diff --git a/go/hand/pose/handpose.go b/go/hand/pose/handpose.go index f66a1f3..2383b97 100644 --- a/go/hand/pose/handpose.go +++ b/go/hand/pose/handpose.go @@ -7,6 +7,8 @@ package pose */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" ) @@ -24,17 +26,17 @@ func NewHandPoseEstimator() *HandPoseEstimator { // Destroy free Estimator func (d *HandPoseEstimator) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IHandPoseEstimator -func (d *HandPoseEstimator) Handler() C.IHandPoseEstimator { - return d.d +// Pointer implement Estimator interface +func (d *HandPoseEstimator) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for estimator func (d *HandPoseEstimator) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // Detect implement Estimator interface diff --git a/go/pose/detecter/detecter.go b/go/pose/detecter/detecter.go index 332fa5a..4140c46 100644 --- a/go/pose/detecter/detecter.go +++ b/go/pose/detecter/detecter.go @@ -16,26 +16,8 @@ import ( // Detecter represents deteter interface type Detecter interface { - Handler() C.IPoseDetecter - LoadModel(modelPath string) error + common.Estimator ExtractKeypoints(img *common.Image) ([]common.ObjectInfo, error) - Destroy() -} - -// LoadModel load detecter model -func LoadModel(d Detecter, modelPath string) error { - cpath := C.CString(modelPath) - defer C.free(unsafe.Pointer(cpath)) - retCode := C.load_model((C.IEstimator)(unsafe.Pointer(d.Handler())), cpath) - if retCode != 0 { - return openvision.LoadModelError(int(retCode)) - } - return nil -} - -// Destroy a detecter -func Destroy(d Detecter) { - C.destroy_estimator((C.IEstimator)(unsafe.Pointer(d.Handler()))) } // ExtractKeypoints detect pose keypoints using detecter @@ -46,7 +28,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error cObjs := common.NewCObjectInfoVector() defer common.FreeCObjectInfoVector(cObjs) errCode := C.extract_pose_rois( - d.Handler(), + (C.IPoseDetecter)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), @@ -62,7 +44,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error defer common.FreeCKeypointVector(cKeypoints) cROI := (*C.ObjectInfo)(unsafe.Pointer(uintptr(ptr) + uintptr(C.sizeof_ObjectInfo*C.int(i)))) errCode := C.extract_pose_keypoints( - d.Handler(), + (C.IPoseDetecter)(d.Pointer()), (*C.uchar)(unsafe.Pointer(&data[0])), C.int(imgWidth), C.int(imgHeight), @@ -80,7 +62,7 @@ func ExtractKeypoints(d Detecter, img *common.Image) ([]common.ObjectInfo, error float64(cROI.rect.width)/imgWidth, float64(cROI.rect.height)/imgHeight, ), - Score: float32(cROI.prob), + Score: float32(cROI.score), }) } diff --git a/go/pose/detecter/ultralight.go b/go/pose/detecter/ultralight.go index 90816bb..be8b759 100644 --- a/go/pose/detecter/ultralight.go +++ b/go/pose/detecter/ultralight.go @@ -7,6 +7,8 @@ package detecter */ import "C" import ( + "unsafe" + "github.com/bububa/openvision/go/common" ) @@ -24,17 +26,17 @@ func NewUltralight() *Ultralight { // Destroy free detecter func (d *Ultralight) Destroy() { - Destroy(d) + common.DestroyEstimator(d) } -// Handler returns C.IPoseDetecter -func (d *Ultralight) Handler() C.IPoseDetecter { - return d.d +// Pointer implement Estimator interface +func (d *Ultralight) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) } // LoadModel load model for detecter func (d *Ultralight) LoadModel(modelPath string) error { - return LoadModel(d, modelPath) + return common.EstimatorLoadModel(d, modelPath) } // ExtractKeypoints implement Detecter interface diff --git a/src/common/common.cpp b/src/common/common.cpp index b844019..efd9806 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -1,8 +1,8 @@ #include "common.h" #include -#include #include #include +#include "cpu.h" #ifdef OV_VULKAN #include "gpu.h" @@ -28,6 +28,16 @@ void destroy_gpu_instance() { #endif // OV_VULKAN } +int get_big_cpu_count() { + return ncnn::get_big_cpu_count(); +} + +void set_omp_num_threads(int n) { +#ifdef OV_OPENMP + ncnn::set_omp_num_threads(n); +#endif +} + int load_model(IEstimator d, const char *root_path) { return static_cast(d)->LoadModel(root_path); } @@ -36,6 +46,10 @@ void destroy_estimator(IEstimator d) { delete static_cast(d); } +void set_num_threads(IEstimator d, int n) { + static_cast(d)->set_num_threads(n); +} + void FreePoint2fVector(Point2fVector* p) { if (p->points != NULL) { free(p->points); @@ -97,9 +111,44 @@ void FreeObjectInfoVector(ObjectInfoVector *p) { } namespace ov { +Estimator::Estimator() { + net_ = new ncnn::Net(); + initialized_ = false; +#ifdef OV_VULKAN + net_->opt.use_vulkan_compute = true; +#endif // OV_VULKAN +} + +Estimator::~Estimator() { + if (net_) { + net_->clear(); + } +} + +int Estimator::LoadModel(const char * root_path) { + std::string param_file = std::string(root_path) + "/param"; + std::string bin_file = std::string(root_path) + "/bin"; + if (net_->load_param(param_file.c_str()) == -1 || + net_->load_model(bin_file.c_str()) == -1) { + return 10000; + } + + initialized_ = true; + + return 0; +} + +void Estimator::set_num_threads(int n) { + num_threads = n; + if (net_) { + net_->opt.num_threads = n; + } +} + + int RatioAnchors(const Rect & anchor, const std::vector& ratios, - std::vector* anchors) { + std::vector* anchors, int threads_num) { anchors->clear(); Point center = Point(anchor.x + (anchor.width - 1) * 0.5f, anchor.y + (anchor.height - 1) * 0.5f); @@ -123,7 +172,7 @@ int RatioAnchors(const Rect & anchor, } int ScaleAnchors(const std::vector& ratio_anchors, - const std::vector& scales, std::vector* anchors) { + const std::vector& scales, std::vector* anchors, int threads_num) { anchors->clear(); #if defined(_OPENMP) #pragma omp parallel for num_threads(threads_num) @@ -150,12 +199,13 @@ int ScaleAnchors(const std::vector& ratio_anchors, int GenerateAnchors(const int & base_size, const std::vector& ratios, const std::vector scales, - std::vector* anchors) { + std::vector* anchors, + int threads_num) { anchors->clear(); Rect anchor = Rect(0, 0, base_size, base_size); std::vector ratio_anchors; - RatioAnchors(anchor, ratios, &ratio_anchors); - ScaleAnchors(ratio_anchors, scales, anchors); + RatioAnchors(anchor, ratios, &ratio_anchors, threads_num); + ScaleAnchors(ratio_anchors, scales, anchors, threads_num); return 0; } @@ -207,14 +257,14 @@ void qsort_descent_inplace(std::vector& objects, int left, int right { int i = left; int j = right; - float p = objects[(left + right) / 2].prob; + float p = objects[(left + right) / 2].score; while (i <= j) { - while (objects[i].prob > p) + while (objects[i].score > p) i++; - while (objects[j].prob < p) + while (objects[j].score < p) j--; if (i <= j) @@ -281,6 +331,44 @@ void nms_sorted_bboxes(const std::vector& objects, std::vector& picked.push_back(i); } } +// +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() +ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales) +{ + int num_ratio = ratios.w; + int num_scale = scales.w; + + ncnn::Mat anchors; + anchors.create(4, num_ratio * num_scale); + + const float cx = 0; + const float cy = 0; + + for (int i = 0; i < num_ratio; i++) + { + float ar = ratios[i]; + + int r_w = round(base_size / sqrt(ar)); + int r_h = round(r_w * ar); //round(base_size * sqrt(ar)); + + for (int j = 0; j < num_scale; j++) + { + float scale = scales[j]; + + float rs_w = r_w * scale; + float rs_h = r_h * scale; + + float* anchor = anchors.row(i * num_scale + j); + + anchor[0] = cx - rs_w * 0.5f; + anchor[1] = cy - rs_h * 0.5f; + anchor[2] = cx + rs_w * 0.5f; + anchor[3] = cy + rs_h * 0.5f; + } + } + + return anchors; +} int generate_grids_and_stride(const int target_size, std::vector& strides, std::vector& grid_strides) { diff --git a/src/common/common.h b/src/common/common.h index d7e12d8..7511e89 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -43,7 +43,7 @@ typedef struct Rect { typedef struct Keypoint { Point2f p; - float prob; + float score; } Keypoint; @@ -54,8 +54,11 @@ typedef void* IEstimator; int get_gpu_count(); int create_gpu_instance(); void destroy_gpu_instance(); +int get_big_cpu_count(); +void set_omp_num_threads(int n); int load_model(IEstimator e, const char* root_path); void destroy_estimator(IEstimator e); +void set_num_threads(IEstimator e, int n); typedef struct Point2fVector { Point2f* points; @@ -96,7 +99,7 @@ void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val); typedef struct ObjectInfoC { Rect rect; - float prob; + float score; int label; KeypointVector* pts; } ObjectInfo; diff --git a/src/common/common.hpp b/src/common/common.hpp index 943214c..f4e9489 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -10,12 +10,17 @@ #endif namespace ov { -const int threads_num = 2; class Estimator { public: - virtual ~Estimator(){}; - virtual int LoadModel(const char* root_path) = 0; + Estimator(); + virtual ~Estimator(); + virtual int LoadModel(const char* root_path); + virtual void set_num_threads(int n); +protected: + int num_threads = 2; + ncnn::Net* net_; + bool initialized_ = false; }; // Wrapper for an individual cv::cvSize @@ -78,17 +83,17 @@ typedef struct Rect { struct ImageInfo { std::string label_; - float score_; + float score; }; struct Keypoint { - ov::Point2f p; - float prob; + Point2f p; + float score; }; struct ObjectInfo { Rect rect; - float prob; + float score; int label; std::vector pts; }; @@ -101,14 +106,15 @@ struct GridAndStride }; int RatioAnchors(const Rect & anchor, - const std::vector& ratios, std::vector* anchors); + const std::vector& ratios, std::vector* anchors, int threads_num); int ScaleAnchors(const std::vector& ratio_anchors, - const std::vector& scales, std::vector* anchors); + const std::vector& scales, std::vector* anchors, int threads_num); int GenerateAnchors(const int & base_size, const std::vector& ratios, const std::vector scales, - std::vector* anchors); + std::vector* anchors, + int threads_num); float InterRectArea(const Rect & a, const Rect & b); @@ -128,7 +134,7 @@ int const NMS(const std::vector& inputs, std::vector* result, inputs_tmp.assign(inputs.begin(), inputs.end()); std::sort(inputs_tmp.begin(), inputs_tmp.end(), [](const T& a, const T& b) { - return a.score_ > b.score_; + return a.score > b.score; }); std::vector indexes(inputs_tmp.size()); @@ -145,7 +151,7 @@ int const NMS(const std::vector& inputs, std::vector* result, for (int i = 1; i < tmp_indexes.size(); i++) { int tmp_i = tmp_indexes[i]; float iou = 0.0f; - ComputeIOU(inputs_tmp[good_idx].location_, inputs_tmp[tmp_i].location_, &iou, type); + ComputeIOU(inputs_tmp[good_idx].rect, inputs_tmp[tmp_i].rect, &iou, type); if (iou <= threshold) { indexes.push_back(tmp_i); } @@ -160,6 +166,9 @@ void qsort_descent_inplace(std::vector& objects); void nms_sorted_bboxes(const std::vector& objects, std::vector& picked, float nms_threshold); +// insightface/detection/scrfd/mmdet/core/anchor/anchor_generator.py gen_single_level_base_anchors() +ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales); + int generate_grids_and_stride(const int target_size, std::vector& strides, std::vector& grid_strides); float sigmoid(float x); diff --git a/src/face/common.h b/src/face/common.h index 9f23c80..b8ae8c1 100644 --- a/src/face/common.h +++ b/src/face/common.h @@ -13,8 +13,8 @@ typedef ovface::TrackedFaceInfo TrackedFaceInfo; typedef ovface::HeadPose HeadPose; #else typedef struct FaceInfo { - Rect location_; - float score_; + Rect rect; + float score; float keypoints_[10]; bool mask_; } FaceInfo; diff --git a/src/face/common/common.cpp b/src/face/common/common.cpp index 160d8da..f57aaa2 100644 --- a/src/face/common/common.cpp +++ b/src/face/common/common.cpp @@ -33,7 +33,86 @@ void FreeTrackedFaceInfoVector(TrackedFaceInfoVector *p) { namespace ovface { -float CalculateSimilarity(const std::vector&feature1, const std::vector& feature2) { +void qsort_descent_inplace(std::vector& objects, int left, int right) +{ + int i = left; + int j = right; + float p = objects[(left + right) / 2].score; + + while (i <= j) + { + while (objects[i].score > p) + i++; + + while (objects[j].score < p) + j--; + + if (i <= j) + { + // swap + std::swap(objects[i], objects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(objects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(objects, i, right); + } + } +} + +void qsort_descent_inplace(std::vector& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +void nms_sorted_bboxes(const std::vector& objects, std::vector& picked, float nms_threshold) +{ + picked.clear(); + + const int n = objects.size(); + + std::vector areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = objects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const FaceInfo& a = objects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const FaceInfo& b = objects[picked[j]]; + + // intersection over union + float inter_area = InterRectArea(a.rect, b.rect); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +float CalculateSimilarity(const std::vector&feature1, const std::vector& feature2, int threads_num) { if (feature1.size() != feature2.size()) { return 10003; } @@ -41,7 +120,7 @@ float CalculateSimilarity(const std::vector&feature1, const std::vector& objects, int left, int right); + +void qsort_descent_inplace(std::vector& objects); + +void nms_sorted_bboxes(const std::vector& objects, std::vector& picked, float nms_threshold); float CalculateSimilarity(const std::vector&feature1, const std::vector& feature2); + +} + #endif // !_FACE_COMMON_H_ diff --git a/src/face/detecter.h b/src/face/detecter.h index aec5723..95c94e4 100644 --- a/src/face/detecter.h +++ b/src/face/detecter.h @@ -12,6 +12,7 @@ extern "C" { IFaceDetecter new_centerface(); IFaceDetecter new_mtcnn(); IFaceDetecter new_yoloface(); + IFaceDetecter new_scrfd(); IFaceDetecter new_anticonv(); int detect_face(IFaceDetecter d, const unsigned char* rgbdata, int img_width, int img_height, FaceInfoVector* faces); #ifdef __cplusplus diff --git a/src/face/detecter/anticonv/anticonv.cpp b/src/face/detecter/anticonv/anticonv.cpp index a3b1dab..7bbc90c 100644 --- a/src/face/detecter/anticonv/anticonv.cpp +++ b/src/face/detecter/anticonv/anticonv.cpp @@ -5,44 +5,26 @@ #endif // OV_VULKAN namespace ovface { -AntiConv::AntiConv() : - anticonv_net_(new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - anticonv_net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN - -} - -AntiConv::~AntiConv() { - if (anticonv_net_) { - anticonv_net_->clear(); - } -} int AntiConv::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (anticonv_net_->load_param(param_file.c_str()) == -1 || - anticonv_net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - + int ret = Estimator::LoadModel(root_path); + if (ret != 0) { + return ret; + } // generate anchors for (int i = 0; i < 3; ++i) { ANCHORS anchors; if (0 == i) { - GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors, num_threads); } else if (1 == i) { - GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors, num_threads); } else { - GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors, num_threads); } anchors_generated_.push_back(anchors); } - initialized_ = true; return 0; } @@ -60,7 +42,7 @@ int AntiConv::DetectFace(const unsigned char* rgbdata, float factor_x = static_cast(img_width) / inputSize_.width; float factor_y = static_cast(img_height) / inputSize_.height; - ncnn::Extractor ex = anticonv_net_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height); ex.input("data", in); @@ -115,9 +97,9 @@ int AntiConv::DetectFace(const unsigned char* rgbdata, FaceInfo face_info; memset(&face_info, 0, sizeof(face_info)); - face_info.score_ = score; + face_info.score = score; face_info.mask_ = (prob > maskThreshold_); - face_info.location_ = curr_box; + face_info.rect = curr_box; faces_tmp.push_back(face_info); } } diff --git a/src/face/detecter/anticonv/anticonv.hpp b/src/face/detecter/anticonv/anticonv.hpp index 9524e1c..23e91b7 100644 --- a/src/face/detecter/anticonv/anticonv.hpp +++ b/src/face/detecter/anticonv/anticonv.hpp @@ -8,17 +8,13 @@ namespace ovface { using ANCHORS = std::vector; class AntiConv : public Detecter { public: - AntiConv(); - ~AntiConv(); int LoadModel(const char* root_path); int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces); private: - ncnn::Net* anticonv_net_; std::vector anchors_generated_; - bool initialized_; const int RPNs_[3] = { 32, 16, 8 }; const Size inputSize_ = { 640, 640 }; const float iouThreshold_ = 0.4f; diff --git a/src/face/detecter/centerface/centerface.cpp b/src/face/detecter/centerface/centerface.cpp index a445237..6bc61ac 100644 --- a/src/face/detecter/centerface/centerface.cpp +++ b/src/face/detecter/centerface/centerface.cpp @@ -5,31 +5,6 @@ #endif // OV_VULKAN namespace ovface { -CenterFace::CenterFace() { - centernet_ = new ncnn::Net(); - initialized_ = false; -#ifdef OV_VULKAN - centernet_->opt.use_vulkan_compute = true; -#endif // MIRROR_VULKAN -} - -CenterFace::~CenterFace(){ - if (centernet_) { - centernet_->clear(); - } -} - -int CenterFace::LoadModel(const char* root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string model_file = std::string(root_path) + "/bin"; - if (centernet_->load_param(param_file.c_str()) == -1 || - centernet_->load_model(model_file.c_str()) == -1) { - return 10000; - } - - initialized_ = true; - return 0; -} int CenterFace::DetectFace(const unsigned char* rgbdata, int img_width, int img_height, @@ -49,7 +24,7 @@ int CenterFace::DetectFace(const unsigned char* rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, img_width_new, img_height_new); - ncnn::Extractor ex = centernet_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ex.input("input.1", in); ncnn::Mat mat_heatmap, mat_scale, mat_offset, mat_landmark; ex.extract("537", mat_heatmap); @@ -78,11 +53,11 @@ int CenterFace::DetectFace(const unsigned char* rgbdata, float xmax = fminf(xmin + s1, img_width_new); FaceInfo face_info; - face_info.score_ = score; - face_info.location_.x = scale_x * xmin; - face_info.location_.y = scale_y * ymin; - face_info.location_.width = scale_x * (xmax - xmin); - face_info.location_.height = scale_y * (ymax - ymin); + face_info.score = score; + face_info.rect.x = scale_x * xmin; + face_info.rect.y = scale_y * ymin; + face_info.rect.width = scale_x * (xmax - xmin); + face_info.rect.height = scale_y * (ymax - ymin); for (int num = 0; num < 5; ++num) { face_info.keypoints_[num ] = scale_x * (s1 * mat_landmark.channel(2 * num + 1)[index] + xmin); diff --git a/src/face/detecter/centerface/centerface.hpp b/src/face/detecter/centerface/centerface.hpp index 77012b9..e9cc598 100644 --- a/src/face/detecter/centerface/centerface.hpp +++ b/src/face/detecter/centerface/centerface.hpp @@ -8,18 +8,13 @@ namespace ovface { class CenterFace : public Detecter { public: - CenterFace(); - ~CenterFace(); - int LoadModel(const char* root_path); int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces); private: - ncnn::Net* centernet_ = nullptr; const float scoreThreshold_ = 0.5f; const float nmsThreshold_ = 0.5f; - bool initialized_; }; } diff --git a/src/face/detecter/detecter.cpp b/src/face/detecter/detecter.cpp index 44ed743..4ae0f37 100644 --- a/src/face/detecter/detecter.cpp +++ b/src/face/detecter/detecter.cpp @@ -4,6 +4,7 @@ #include "retinaface/retinaface.hpp" #include "anticonv/anticonv.hpp" #include "yoloface/yoloface.hpp" +#include "scrfd/scrfd.hpp" IFaceDetecter new_retinaface() { return new ovface::RetinaFace(); @@ -21,6 +22,10 @@ IFaceDetecter new_yoloface() { return new ovface::YoloFace(); } +IFaceDetecter new_scrfd() { + return new ovface::Scrfd(); +} + IFaceDetecter new_anticonv() { return new ovface::AntiConv(); } @@ -58,6 +63,10 @@ Detecter* YoloFaceFactory::CreateDetecter() { return new YoloFace(); } +Detecter* ScrfdFactory::CreateDetecter() { + return new Scrfd(); +} + Detecter* AnticonvFactory::CreateDetecter() { return new AntiConv(); } diff --git a/src/face/detecter/detecter.hpp b/src/face/detecter/detecter.hpp index 80f8c0a..af4acfc 100644 --- a/src/face/detecter/detecter.hpp +++ b/src/face/detecter/detecter.hpp @@ -7,7 +7,6 @@ namespace ovface { // 抽象类 class Detecter: public ov::Estimator { public: - virtual ~Detecter() {}; virtual int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces) = 0; @@ -51,6 +50,13 @@ public: Detecter* CreateDetecter(); }; +class ScrfdFactory : public DetecterFactory { +public: + ScrfdFactory() {} + ~ScrfdFactory() {} + Detecter* CreateDetecter(); +}; + class AnticonvFactory : public DetecterFactory { public: AnticonvFactory() {} diff --git a/src/face/detecter/mtcnn/mtcnn.cpp b/src/face/detecter/mtcnn/mtcnn.cpp index 4eb80f2..fe7d1e9 100644 --- a/src/face/detecter/mtcnn/mtcnn.cpp +++ b/src/face/detecter/mtcnn/mtcnn.cpp @@ -32,6 +32,19 @@ Mtcnn::~Mtcnn() { } } +void Mtcnn::set_num_threads(int n) { + num_threads = n; + if (pnet_) { + pnet_->opt.num_threads = n; + } + if (rnet_) { + rnet_->opt.num_threads = n; + } + if (onet_) { + onet_->opt.num_threads = n; + } +} + int Mtcnn::LoadModel(const char * root_path) { std::string pnet_param = std::string(root_path) + "/pnet.param"; std::string pnet_bin = std::string(root_path) + "/pnet.bin"; @@ -141,12 +154,12 @@ int Mtcnn::PDetect(const ncnn::Mat & img_in, int bbox_height = y2 - y1 + 1; FaceInfo face_info; - face_info.score_ = score; - face_info.location_.x = x1 + x1_reg * bbox_width; - face_info.location_.y = y1 + y1_reg * bbox_height; - face_info.location_.width = x2 + x2_reg * bbox_width - face_info.location_.x; - face_info.location_.height = y2 + y2_reg * bbox_height - face_info.location_.y; - face_info.location_ = face_info.location_ & Rect(0, 0, width, height); + face_info.score = score; + face_info.rect.x = x1 + x1_reg * bbox_width; + face_info.rect.y = y1 + y1_reg * bbox_height; + face_info.rect.width = x2 + x2_reg * bbox_width - face_info.rect.x; + face_info.rect.height = y2 + y2_reg * bbox_height - face_info.rect.y; + face_info.rect = face_info.rect & Rect(0, 0, width, height); first_bboxes->push_back(face_info); } } @@ -159,13 +172,13 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in, std::vector* second_bboxes) { second_bboxes->clear(); for (int i = 0; i < static_cast(first_bboxes.size()); ++i) { - Rect face = first_bboxes.at(i).location_ & Rect(0, 0, img_in.w, img_in.h); + Rect face = first_bboxes.at(i).rect & Rect(0, 0, img_in.w, img_in.h); ncnn::Mat img_face, img_resized; ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x); ncnn::resize_bilinear(img_face, img_resized, 24, 24); ncnn::Extractor ex = rnet_->create_extractor(); ex.set_light_mode(true); - ex.set_num_threads(2); + // ex.set_num_threads(2); ex.input("data", img_resized); ncnn::Mat score_mat, location_mat; ex.extract("prob1", score_mat); @@ -178,13 +191,13 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in, float h_reg = location_mat[3]; FaceInfo face_info; - face_info.score_ = score; - face_info.location_.x = face.x + x_reg * face.width; - face_info.location_.y = face.y + y_reg * face.height; - face_info.location_.width = face.x + face.width + - w_reg * face.width - face_info.location_.x; - face_info.location_.height = face.y + face.height + - h_reg * face.height - face_info.location_.y; + face_info.score = score; + face_info.rect.x = face.x + x_reg * face.width; + face_info.rect.y = face.y + y_reg * face.height; + face_info.rect.width = face.x + face.width + + w_reg * face.width - face_info.rect.x; + face_info.rect.height = face.y + face.height + + h_reg * face.height - face_info.rect.y; second_bboxes->push_back(face_info); } return 0; @@ -195,14 +208,14 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in, std::vector* third_bboxes) { third_bboxes->clear(); for (int i = 0; i < static_cast(second_bboxes.size()); ++i) { - Rect face = second_bboxes.at(i).location_ & Rect(0, 0, img_in.w, img_in.h); + Rect face = second_bboxes.at(i).rect & Rect(0, 0, img_in.w, img_in.h); ncnn::Mat img_face, img_resized; ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x); ncnn::resize_bilinear(img_face, img_resized, 48, 48); ncnn::Extractor ex = onet_->create_extractor(); ex.set_light_mode(true); - ex.set_num_threads(2); + // ex.set_num_threads(2); ex.input("data", img_resized); ncnn::Mat score_mat, location_mat, keypoints_mat; ex.extract("prob1", score_mat); @@ -216,13 +229,13 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in, float h_reg = location_mat[3]; FaceInfo face_info; - face_info.score_ = score; - face_info.location_.x = face.x + x_reg * face.width; - face_info.location_.y = face.y + y_reg * face.height; - face_info.location_.width = face.x + face.width + - w_reg * face.width - face_info.location_.x; - face_info.location_.height = face.y + face.height + - h_reg * face.height - face_info.location_.y; + face_info.score = score; + face_info.rect.x = face.x + x_reg * face.width; + face_info.rect.y = face.y + y_reg * face.height; + face_info.rect.width = face.x + face.width + + w_reg * face.width - face_info.rect.x; + face_info.rect.height = face.y + face.height + + h_reg * face.height - face_info.rect.y; for (int num = 0; num < 5; num++) { face_info.keypoints_[num] = face.x + face.width * keypoints_mat[num]; @@ -238,15 +251,15 @@ int Mtcnn::Refine(std::vector* bboxes, const Size max_size) { int num_boxes = static_cast(bboxes->size()); for (int i = 0; i < num_boxes; ++i) { FaceInfo face_info = bboxes->at(i); - int width = face_info.location_.width; - int height = face_info.location_.height; + int width = face_info.rect.width; + int height = face_info.rect.height; float max_side = fmaxf(width, height); - face_info.location_.x = face_info.location_.x + 0.5 * width - 0.5 * max_side; - face_info.location_.y = face_info.location_.y + 0.5 * height - 0.5 * max_side; - face_info.location_.width = max_side; - face_info.location_.height = max_side; - face_info.location_ = face_info.location_ & Rect(0, 0, max_size.width, max_size.height); + face_info.rect.x = face_info.rect.x + 0.5 * width - 0.5 * max_side; + face_info.rect.y = face_info.rect.y + 0.5 * height - 0.5 * max_side; + face_info.rect.width = max_side; + face_info.rect.height = max_side; + face_info.rect = face_info.rect & Rect(0, 0, max_size.width, max_size.height); bboxes->at(i) = face_info; } diff --git a/src/face/detecter/mtcnn/mtcnn.hpp b/src/face/detecter/mtcnn/mtcnn.hpp index 461fe57..d4219e5 100644 --- a/src/face/detecter/mtcnn/mtcnn.hpp +++ b/src/face/detecter/mtcnn/mtcnn.hpp @@ -11,6 +11,7 @@ public: Mtcnn(); ~Mtcnn(); int LoadModel(const char* root_path); + void set_num_threads(int n); int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces); diff --git a/src/face/detecter/retinaface/retinaface.cpp b/src/face/detecter/retinaface/retinaface.cpp index 93a82b4..50a03df 100644 --- a/src/face/detecter/retinaface/retinaface.cpp +++ b/src/face/detecter/retinaface/retinaface.cpp @@ -5,44 +5,26 @@ #endif // OV_VULKAN namespace ovface { -RetinaFace::RetinaFace() : - retina_net_(new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - retina_net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN - -} - -RetinaFace::~RetinaFace() { - if (retina_net_) { - retina_net_->clear(); - } -} int RetinaFace::LoadModel(const char * root_path) { - std::string fd_param = std::string(root_path) + "/param"; - std::string fd_bin = std::string(root_path) + "/bin"; - if (retina_net_->load_param(fd_param.c_str()) == -1 || - retina_net_->load_model(fd_bin.c_str()) == -1) { - return 10000; - } - + int ret = Estimator::LoadModel(root_path); + if (ret != 0) { + return ret; + } // generate anchors for (int i = 0; i < 3; ++i) { ANCHORS anchors; if (0 == i) { - GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 32, 16 }, &anchors, num_threads); } else if (1 == i) { - GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 8, 4 }, &anchors, num_threads); } else { - GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors); + GenerateAnchors(16, { 1.0f }, { 2, 1 }, &anchors, num_threads); } anchors_generated_.push_back(anchors); } - initialized_ = true; return 0; } @@ -60,7 +42,7 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata, float factor_x = static_cast(img_width) / inputSize_.width; float factor_y = static_cast(img_height) / inputSize_.height; - ncnn::Extractor ex = retina_net_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height); ex.input("data", in); @@ -131,8 +113,8 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata, face_info.keypoints_[k + 5] = fminf(fmaxf(y * factor_y, 0.0f), img_height - 1); } - face_info.score_ = score; - face_info.location_ = curr_box; + face_info.score = score; + face_info.rect = curr_box; faces_tmp.push_back(face_info); } } diff --git a/src/face/detecter/retinaface/retinaface.hpp b/src/face/detecter/retinaface/retinaface.hpp index 21a2317..c2da09d 100644 --- a/src/face/detecter/retinaface/retinaface.hpp +++ b/src/face/detecter/retinaface/retinaface.hpp @@ -5,20 +5,16 @@ #include "net.h" namespace ovface { -using ANCHORS = std::vector; +using ANCHORS = std::vector; class RetinaFace : public Detecter { public: - RetinaFace(); - ~RetinaFace(); int LoadModel(const char* root_path); int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces); private: - ncnn::Net* retina_net_; std::vector anchors_generated_; - bool initialized_; const int RPNs_[3] = { 32, 16, 8 }; const Size inputSize_ = { 300, 300 }; const float iouThreshold_ = 0.4f; diff --git a/src/face/detecter/scrfd/scrfd.cpp b/src/face/detecter/scrfd/scrfd.cpp new file mode 100644 index 0000000..00a7931 --- /dev/null +++ b/src/face/detecter/scrfd/scrfd.cpp @@ -0,0 +1,262 @@ +#include "scrfd.hpp" + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovface { +static void generate_scrfd_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& kps_blob, float prob_threshold, std::vector& faceobjects) +{ + int w = score_blob.w; + int h = score_blob.h; + + // generate face proposal from bbox deltas and shifted anchors + const int num_anchors = anchors.h; + + for (int q = 0; q < num_anchors; q++) + { + const float* anchor = anchors.row(q); + + const ncnn::Mat score = score_blob.channel(q); + const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4); + + // shifted anchor + float anchor_y = anchor[1]; + + float anchor_w = anchor[2] - anchor[0]; + float anchor_h = anchor[3] - anchor[1]; + + for (int i = 0; i < h; i++) + { + float anchor_x = anchor[0]; + + for (int j = 0; j < w; j++) + { + int index = i * w + j; + + float prob = score[index]; + + if (prob >= prob_threshold) + { + // insightface/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py _get_bboxes_single() + float dx = bbox.channel(0)[index] * feat_stride; + float dy = bbox.channel(1)[index] * feat_stride; + float dw = bbox.channel(2)[index] * feat_stride; + float dh = bbox.channel(3)[index] * feat_stride; + + // insightface/detection/scrfd/mmdet/core/bbox/transforms.py distance2bbox() + float cx = anchor_x + anchor_w * 0.5f; + float cy = anchor_y + anchor_h * 0.5f; + + float x0 = cx - dx; + float y0 = cy - dy; + float x1 = cx + dw; + float y1 = cy + dh; + + FaceInfo obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0 + 1; + obj.rect.height = y1 - y0 + 1; + obj.score = prob; + + if (!kps_blob.empty()) + { + const ncnn::Mat kps = kps_blob.channel_range(q * 10, 10); + + obj.keypoints_[0] = cx + kps.channel(0)[index] * feat_stride; + obj.keypoints_[5] = cy + kps.channel(1)[index] * feat_stride; + obj.keypoints_[1] = cx + kps.channel(2)[index] * feat_stride; + obj.keypoints_[6] = cy + kps.channel(3)[index] * feat_stride; + obj.keypoints_[2] = cx + kps.channel(4)[index] * feat_stride; + obj.keypoints_[7] = cy + kps.channel(5)[index] * feat_stride; + obj.keypoints_[3] = cx + kps.channel(6)[index] * feat_stride; + obj.keypoints_[8] = cy + kps.channel(7)[index] * feat_stride; + obj.keypoints_[4] = cx + kps.channel(8)[index] * feat_stride; + obj.keypoints_[9] = cy + kps.channel(9)[index] * feat_stride; + } + + faceobjects.push_back(obj); + } + + anchor_x += feat_stride; + } + + anchor_y += feat_stride; + } + } +} + +int Scrfd::DetectFace(const unsigned char* rgbdata, + int img_width, int img_height, + std::vector* faces) { + faces->clear(); + if (!initialized_) { + return 10000; + } + if (rgbdata == 0){ + return 10001; + } + + // pad to multiple of 32 + int w = img_width; + int h = img_height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, w, h); + + // pad to target_size rectangle + float wpad = (float)(w + 31) / 32 * 32 - w; + float hpad = (float)(h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1/128.f, 1/128.f, 1/128.f}; + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net_->create_extractor(); + + ex.input("input.1", in_pad); + + std::vector faceproposals; + + // stride 8 + { + ncnn::Mat score_blob, bbox_blob, kps_blob; + ex.extract("score_8", score_blob); + ex.extract("bbox_8", bbox_blob); + if (has_kps) + ex.extract("kps_8", kps_blob); + + const int base_size = 16; + const int feat_stride = 8; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales); + + std::vector faceobjects32; + generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects32); + + faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end()); + } + + // stride 16 + { + ncnn::Mat score_blob, bbox_blob, kps_blob; + ex.extract("score_16", score_blob); + ex.extract("bbox_16", bbox_blob); + if (has_kps) + ex.extract("kps_16", kps_blob); + + const int base_size = 64; + const int feat_stride = 16; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales); + + std::vector faceobjects16; + generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects16); + + faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end()); + } + + // stride 32 + { + ncnn::Mat score_blob, bbox_blob, kps_blob; + ex.extract("score_32", score_blob); + ex.extract("bbox_32", bbox_blob); + if (has_kps) + ex.extract("kps_32", kps_blob); + + const int base_size = 256; + const int feat_stride = 32; + ncnn::Mat ratios(1); + ratios[0] = 1.f; + ncnn::Mat scales(2); + scales[0] = 1.f; + scales[1] = 2.f; + ncnn::Mat anchors = ov::generate_anchors(base_size, ratios, scales); + + std::vector faceobjects8; + generate_scrfd_proposals(anchors, feat_stride, score_blob, bbox_blob, kps_blob, prob_threshold, faceobjects8); + + faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(faceproposals); + + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(faceproposals, picked, nms_threshold); + + int face_count = picked.size(); + + for (int i = 0; i < face_count; i++) + { + FaceInfo obj = faceproposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (obj.rect.x - (wpad / 2)) / scale; + float y0 = (obj.rect.y - (hpad / 2)) / scale; + float x1 = (obj.rect.x + obj.rect.width - (wpad / 2)) / scale; + float y1 = (obj.rect.y + obj.rect.height - (hpad / 2)) / scale; + + x0 = std::max(std::min(x0, (float)img_width - 1), 0.f); + y0 = std::max(std::min(y0, (float)img_height - 1), 0.f); + x1 = std::max(std::min(x1, (float)img_width - 1), 0.f); + y1 = std::max(std::min(y1, (float)img_height - 1), 0.f); + + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + + if (has_kps) + { + float x0 = (obj.keypoints_[0] - (wpad / 2)) / scale; + float y0 = (obj.keypoints_[5] - (hpad / 2)) / scale; + float x1 = (obj.keypoints_[1] - (wpad / 2)) / scale; + float y1 = (obj.keypoints_[6] - (hpad / 2)) / scale; + float x2 = (obj.keypoints_[2] - (wpad / 2)) / scale; + float y2 = (obj.keypoints_[7] - (hpad / 2)) / scale; + float x3 = (obj.keypoints_[3] - (wpad / 2)) / scale; + float y3 = (obj.keypoints_[8] - (hpad / 2)) / scale; + float x4 = (obj.keypoints_[4] - (wpad / 2)) / scale; + float y4 = (obj.keypoints_[9] - (hpad / 2)) / scale; + + obj.keypoints_[0] = std::max(std::min(x0, (float)img_width - 1), 0.f); + obj.keypoints_[5] = std::max(std::min(y0, (float)img_height - 1), 0.f); + obj.keypoints_[1] = std::max(std::min(x1, (float)img_width - 1), 0.f); + obj.keypoints_[6] = std::max(std::min(y1, (float)img_height - 1), 0.f); + obj.keypoints_[2] = std::max(std::min(x2, (float)img_width - 1), 0.f); + obj.keypoints_[7] = std::max(std::min(y2, (float)img_height - 1), 0.f); + obj.keypoints_[3] = std::max(std::min(x3, (float)img_width - 1), 0.f); + obj.keypoints_[8] = std::max(std::min(y3, (float)img_height - 1), 0.f); + obj.keypoints_[4] = std::max(std::min(x4, (float)img_width - 1), 0.f); + obj.keypoints_[9] = std::max(std::min(y4, (float)img_height - 1), 0.f); + } + faces->push_back(obj); + } + return 0; +} +} diff --git a/src/face/detecter/scrfd/scrfd.hpp b/src/face/detecter/scrfd/scrfd.hpp new file mode 100644 index 0000000..dd411bf --- /dev/null +++ b/src/face/detecter/scrfd/scrfd.hpp @@ -0,0 +1,26 @@ +#ifndef _SCRFD_DETECT_H_ +#define _SCRFD_DETECT_H_ + +#include "../detecter.hpp" +#include "net.h" + +namespace ovface { +class Scrfd : public Detecter { +public: + int DetectFace(const unsigned char* rgbdata, + int img_width, int img_height, + std::vector* faces); + +private: + const int target_size = 640; + const float meanVals[3] = { 123.675f, 116.28f, 103.53f }; + const float normVals[3] = { 0.01712475f, 0.0175f, 0.01742919f }; + const float prob_threshold = 0.5f; + const float nms_threshold = 0.45f; + const bool has_kps = false; +}; + +} + +#endif // !_RETINAFACE_H_ + diff --git a/src/face/detecter/yoloface/yoloface.cpp b/src/face/detecter/yoloface/yoloface.cpp index 32c75f1..a732c79 100644 --- a/src/face/detecter/yoloface/yoloface.cpp +++ b/src/face/detecter/yoloface/yoloface.cpp @@ -92,7 +92,7 @@ static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn: obj.rect.width = x1 - x0; obj.rect.height = y1 - y0; obj.label = class_index; - obj.prob = confidence; + obj.score = confidence; for (int l = 0; l < 5; l++) { @@ -107,28 +107,10 @@ static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn: } } -YoloFace::YoloFace() : - net_ (new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -YoloFace::~YoloFace() { - net_->clear(); -} int YoloFace::LoadModel(const char * root_path) { register_yolov5focus(net_); - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - initialized_ = true; - return 0; + return Estimator::LoadModel(root_path); } int YoloFace::DetectFace(const unsigned char* rgbdata, @@ -274,7 +256,7 @@ int YoloFace::DetectFace(const unsigned char* rgbdata, obj.rect.height = y1 - y0; FaceInfo info; - info.location_ = obj.rect; + info.rect = obj.rect; for (int k = 0; k < 5; ++k) { info.keypoints_[k] = obj.pts[k].x; info.keypoints_[k + 5] = obj.pts[k].y; diff --git a/src/face/detecter/yoloface/yoloface.hpp b/src/face/detecter/yoloface/yoloface.hpp index 7a5276f..5e63060 100644 --- a/src/face/detecter/yoloface/yoloface.hpp +++ b/src/face/detecter/yoloface/yoloface.hpp @@ -7,16 +7,12 @@ namespace ovface { class YoloFace : public Detecter { public: - YoloFace(); - ~YoloFace(); - int LoadModel(const char* root_path); + int LoadModel(const char * root_path); int DetectFace(const unsigned char* rgbdata, int img_width, int img_height, std::vector* faces); private: - ncnn::Net* net_; - bool initialized_; const int target_size = 640; const float mean_vals[3] = {127.f, 127.f, 127.f}; const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; diff --git a/src/face/hopenet/hopenet.cpp b/src/face/hopenet/hopenet.cpp index b53f47a..2e50405 100644 --- a/src/face/hopenet/hopenet.cpp +++ b/src/face/hopenet/hopenet.cpp @@ -21,34 +21,15 @@ namespace ovface { #define NEAR_0 1e-10 #define ODIM 66 -Hopenet::Hopenet(): - net_(new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -Hopenet::~Hopenet() { - if (net_) { - net_->clear(); - } -} - -int Hopenet::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } +int Hopenet::LoadModel(const char* root_path) { + int ret = Estimator::LoadModel(root_path); + if (ret != 0) { + return ret; + } for (uint i=1; i<67; i++) idx_tensor[i-1] = i; - initialized_ = true; - - return 0; + return 0; } - int Hopenet::Detect(const unsigned char* rgbdata, int img_width, int img_height, Rect roi, HeadPose* head_angles) { @@ -80,15 +61,15 @@ int Hopenet::Detect(const unsigned char* rgbdata, float* pred_roll = output.range(ODIM, ODIM*2); float* pred_yaw = output.range(ODIM*2, ODIM*3); - softmax(pred_pitch, ODIM); - softmax(pred_roll, ODIM); - softmax(pred_yaw, ODIM); + this->softmax(pred_pitch, ODIM); + this->softmax(pred_roll, ODIM); + this->softmax(pred_yaw, ODIM); // printArray(pred_pitch, ODIM); - head_angles->pitch = getAngle(pred_pitch, ODIM); - head_angles->roll = getAngle(pred_roll, ODIM); - head_angles->yaw = getAngle(pred_yaw, ODIM); + head_angles->pitch = this->getAngle(pred_pitch, ODIM); + head_angles->roll = this->getAngle(pred_roll, ODIM); + head_angles->yaw = this->getAngle(pred_yaw, ODIM); free(img_face); diff --git a/src/face/hopenet/hopenet.hpp b/src/face/hopenet/hopenet.hpp index 81a476d..2ed3744 100644 --- a/src/face/hopenet/hopenet.hpp +++ b/src/face/hopenet/hopenet.hpp @@ -7,16 +7,12 @@ namespace ovface { class Hopenet : public ov::Estimator { public: - Hopenet(); - ~Hopenet(); - int LoadModel(const char* root_path); + int LoadModel(const char* root_path); int Detect(const unsigned char* rgbdata, int img_width, int img_height, Rect roi, HeadPose* euler_angles); private: - ncnn::Net* net_; - bool initialized_; float idx_tensor[66]; void softmax(float* z, size_t el); double getAngle(float* prediction, size_t len); diff --git a/src/face/landmarker.h b/src/face/landmarker.h index 3b94a27..50aa8fd 100644 --- a/src/face/landmarker.h +++ b/src/face/landmarker.h @@ -10,6 +10,7 @@ extern "C" { typedef void* IFaceLandmarker; IFaceLandmarker new_insightface(); IFaceLandmarker new_zq(); + IFaceLandmarker new_scrfd_landmarker(); int extract_face_keypoints(IFaceLandmarker m, const unsigned char* rgbdata, int img_width, int img_height, const Rect* face, Point2fVector* keypoints); #ifdef __cplusplus } diff --git a/src/face/landmarker/insightface/insightface.cpp b/src/face/landmarker/insightface/insightface.cpp index 090c020..14e55ba 100644 --- a/src/face/landmarker/insightface/insightface.cpp +++ b/src/face/landmarker/insightface/insightface.cpp @@ -6,34 +6,12 @@ #endif // OV_VULKAN namespace ovface { -InsightfaceLandmarker::InsightfaceLandmarker() { - insightface_landmarker_net_ = new ncnn::Net(); - initialized = false; -#ifdef OV_VULKAN - insightface_landmarker_net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -InsightfaceLandmarker::~InsightfaceLandmarker() { - insightface_landmarker_net_->clear(); -} - -int InsightfaceLandmarker::LoadModel(const char * root_path) { - std::string fl_param = std::string(root_path) + "/param"; - std::string fl_bin = std::string(root_path) + "/bin"; - if (insightface_landmarker_net_->load_param(fl_param.c_str()) == -1 || - insightface_landmarker_net_->load_model(fl_bin.c_str()) == -1) { - return 10000; - } - initialized = true; - return 0; -} int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect & face, std::vector* keypoints) { keypoints->clear(); - if (!initialized) { + if (!initialized_) { return 10000; } @@ -42,7 +20,7 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata, } // 1 enlarge the face rect - Rect face_enlarged = face; + ov::Rect face_enlarged = face; const float enlarge_scale = 1.5f; EnlargeRect(enlarge_scale, &face_enlarged); @@ -61,7 +39,7 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata, } // 4 do inference - ncnn::Extractor ex = insightface_landmarker_net_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192); ex.input("data", in); diff --git a/src/face/landmarker/insightface/insightface.hpp b/src/face/landmarker/insightface/insightface.hpp index e00d3df..8448512 100644 --- a/src/face/landmarker/insightface/insightface.hpp +++ b/src/face/landmarker/insightface/insightface.hpp @@ -7,17 +7,10 @@ namespace ovface { class InsightfaceLandmarker : public Landmarker { public: - InsightfaceLandmarker(); - ~InsightfaceLandmarker(); - - int LoadModel(const char* root_path); int ExtractKeypoints(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect& face, std::vector* keypoints); -private: - ncnn::Net* insightface_landmarker_net_; - bool initialized; }; } diff --git a/src/face/landmarker/landmarker.cpp b/src/face/landmarker/landmarker.cpp index b345828..dd11e86 100644 --- a/src/face/landmarker/landmarker.cpp +++ b/src/face/landmarker/landmarker.cpp @@ -1,6 +1,7 @@ #include "../landmarker.h" #include "zqlandmarker/zqlandmarker.hpp" #include "insightface/insightface.hpp" +#include "scrfd/scrfd.hpp" IFaceLandmarker new_zq() { return new ovface::ZQLandmarker(); @@ -10,6 +11,10 @@ IFaceLandmarker new_insightface() { return new ovface::InsightfaceLandmarker(); } +IFaceLandmarker new_scrfd_landmarker() { + return new ovface::ScrfdLandmarker(); +} + int extract_face_keypoints( IFaceLandmarker m, const unsigned char* rgbdata, @@ -39,4 +44,8 @@ Landmarker* InsightfaceLandmarkerFactory::CreateLandmarker() { return new InsightfaceLandmarker(); } +Landmarker* ScrfdLandmarkerFactory::CreateLandmarker() { + return new ScrfdLandmarker(); +} + } diff --git a/src/face/landmarker/landmarker.hpp b/src/face/landmarker/landmarker.hpp index c98baa3..3f73396 100644 --- a/src/face/landmarker/landmarker.hpp +++ b/src/face/landmarker/landmarker.hpp @@ -7,7 +7,6 @@ namespace ovface { // 抽象类 class Landmarker: public ov::Estimator { public: - virtual ~Landmarker() {}; virtual int ExtractKeypoints(const unsigned char* rgbdata, int img_width, int img_height, const Rect& face, std::vector* keypoints) = 0; @@ -35,6 +34,13 @@ public: ~InsightfaceLandmarkerFactory() {} }; +class ScrfdLandmarkerFactory : public LandmarkerFactory { +public: + ScrfdLandmarkerFactory(){} + Landmarker* CreateLandmarker(); + ~ScrfdLandmarkerFactory() {} +}; + } #endif // !_FACE_LANDMARKER_H_ diff --git a/src/face/landmarker/scrfd/scrfd.cpp b/src/face/landmarker/scrfd/scrfd.cpp new file mode 100644 index 0000000..26b6f1c --- /dev/null +++ b/src/face/landmarker/scrfd/scrfd.cpp @@ -0,0 +1,57 @@ +#include "scrfd.hpp" + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovface { + +int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata, + int img_width, int img_height, + const ov::Rect & face, std::vector* keypoints) { + keypoints->clear(); + if (!initialized_) { + return 10000; + } + + if (rgbdata == 0){ + return 10001; + } + // 1 enlarge the face rect + ov::Rect box = face; + const float enlarge_scale = 1.5f; + EnlargeRect(enlarge_scale, &box); + + // 2 square the rect + RectifyRect(&box); + + box = box & Rect(0, 0, img_width, img_height); + + size_t total_size = box.width * box.height * 3 * sizeof(unsigned char); + unsigned char* img_face = (unsigned char*)malloc(total_size); + const unsigned char *start_ptr = rgbdata; + for(size_t i = 0; i < box.height; ++i) { + const unsigned char* srcCursor = start_ptr + ((i + box.y) * img_width + box.x) * 3; + unsigned char* dstCursor = img_face + i * box.width * 3; + memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width); + } + + ncnn::Extractor ex = net_->create_extractor(); + ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192); + ncnn_in.substract_mean_normalize(means, norms); + ex.input("input.1",ncnn_in); + ncnn::Mat ncnn_out; + ex.extract("482",ncnn_out); + float *scoredata = (float*)ncnn_out.data; + for(int i = 0; i < 468; i++) + { + ov::Point2f pt; + pt.x = scoredata[i*3]*box.width/192 + box.x; + pt.y = scoredata[i*3+1]*box.height/192 + box.y; + keypoints->push_back(pt); + } + + free(img_face); + return 0; +} +} diff --git a/src/face/landmarker/scrfd/scrfd.hpp b/src/face/landmarker/scrfd/scrfd.hpp new file mode 100644 index 0000000..0f4dc9b --- /dev/null +++ b/src/face/landmarker/scrfd/scrfd.hpp @@ -0,0 +1,22 @@ +#ifndef _FACE_SCRFD_LANDMARKER_H_ +#define _FACE_SCRFD_LANDMARKER_H_ + +#include "../landmarker.hpp" +#include "net.h" + +namespace ovface { +class ScrfdLandmarker : public Landmarker { +public: + int ExtractKeypoints(const unsigned char* rgbdata, + int img_width, int img_height, + const ov::Rect& face, std::vector* keypoints); + +private: + const float means[3] = { 127.5f, 127.5f, 127.5f }; + const float norms[3] = { 1/127.5f, 1 / 127.5f, 1 / 127.5f }; +}; + +} + +#endif // !_FACE_SCRFD_ANDMARKER_H_ + diff --git a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp index f7f4b05..8d22a24 100644 --- a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp +++ b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp @@ -1,39 +1,16 @@ #include "zqlandmarker.hpp" -#include #ifdef OV_VULKAN #include "gpu.h" #endif // OV_VULKAN namespace ovface { -ZQLandmarker::ZQLandmarker() { - zq_landmarker_net_ = new ncnn::Net(); - initialized = false; -#ifdef OV_VULKAN - zq_landmarker_net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -ZQLandmarker::~ZQLandmarker() { - zq_landmarker_net_->clear(); -} - -int ZQLandmarker::LoadModel(const char * root_path) { - std::string fl_param = std::string(root_path) + "/param"; - std::string fl_bin = std::string(root_path) + "/bin"; - if (zq_landmarker_net_->load_param(fl_param.c_str()) == -1 || - zq_landmarker_net_->load_model(fl_bin.c_str()) == -1) { - return 10000; - } - initialized = true; - return 0; -} int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect & face, std::vector* keypoints) { keypoints->clear(); - if (!initialized) { + if (!initialized_) { return 10000; } @@ -49,7 +26,7 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata, unsigned char* dstCursor = img_face + i * face.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width); } - ncnn::Extractor ex = zq_landmarker_net_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112); in.substract_mean_normalize(meanVals, normVals); diff --git a/src/face/landmarker/zqlandmarker/zqlandmarker.hpp b/src/face/landmarker/zqlandmarker/zqlandmarker.hpp index 4542292..773db74 100644 --- a/src/face/landmarker/zqlandmarker/zqlandmarker.hpp +++ b/src/face/landmarker/zqlandmarker/zqlandmarker.hpp @@ -7,19 +7,13 @@ namespace ovface { class ZQLandmarker : public Landmarker { public: - ZQLandmarker(); - ~ZQLandmarker(); - - int LoadModel(const char* root_path); int ExtractKeypoints(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect& face, std::vector* keypoints); private: - ncnn::Net* zq_landmarker_net_; const float meanVals[3] = { 127.5f, 127.5f, 127.5f }; const float normVals[3] = { 0.0078125f, 0.0078125f, 0.0078125f }; - bool initialized; }; } diff --git a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp index e38b3e2..159425f 100644 --- a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp +++ b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp @@ -5,29 +5,6 @@ #endif // OV_VULKAN namespace ovface { -Mobilefacenet::Mobilefacenet() { - mobileface_net_ = new ncnn::Net(); - initialized_ = false; -#ifdef OV_VULKAN - mobileface_net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -Mobilefacenet::~Mobilefacenet() { - mobileface_net_->clear(); -} - -int Mobilefacenet::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (mobileface_net_->load_param(param_file.c_str()) == -1 || - mobileface_net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - - initialized_ = true; - return 0; -} int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata, int img_width, int img_height, @@ -52,7 +29,7 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112); feature->resize(kFaceFeatureDim); - ncnn::Extractor ex = mobileface_net_->create_extractor(); + ncnn::Extractor ex = net_->create_extractor(); ex.input("data", in); ncnn::Mat out; ex.extract("fc1", out); diff --git a/src/face/recognizer/mobilefacenet/mobilefacenet.hpp b/src/face/recognizer/mobilefacenet/mobilefacenet.hpp index 60344e5..1adda52 100644 --- a/src/face/recognizer/mobilefacenet/mobilefacenet.hpp +++ b/src/face/recognizer/mobilefacenet/mobilefacenet.hpp @@ -9,18 +9,11 @@ namespace ovface { class Mobilefacenet : public Recognizer { public: - Mobilefacenet(); - ~Mobilefacenet(); - - int LoadModel(const char* root_path); int ExtractFeature(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect& face, std::vector* feature); -private: - ncnn::Net* mobileface_net_; - bool initialized_; }; } diff --git a/src/face/recognizer/recognizer.hpp b/src/face/recognizer/recognizer.hpp index 3ea2ef3..48667dc 100644 --- a/src/face/recognizer/recognizer.hpp +++ b/src/face/recognizer/recognizer.hpp @@ -7,7 +7,6 @@ namespace ovface { class Recognizer: public ov::Estimator { public: - virtual ~Recognizer() {}; virtual int ExtractFeature(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect& face, diff --git a/src/face/tracker/tracker.cpp b/src/face/tracker/tracker.cpp index c1a57e4..c629120 100644 --- a/src/face/tracker/tracker.cpp +++ b/src/face/tracker/tracker.cpp @@ -45,8 +45,8 @@ int Tracker::Track(const std::vector& curr_faces, std::vector 0) { std::partial_sort(scored_tracked_faces.begin(), @@ -61,10 +61,10 @@ int Tracker::Track(const std::vector& curr_faces, std::vectoritems[i] = ObjectInfo{ o.rect, - o.prob, + o.score, o.label, NULL }; diff --git a/src/hand/detecter/detecter.hpp b/src/hand/detecter/detecter.hpp index b96b53f..9a185a3 100644 --- a/src/hand/detecter/detecter.hpp +++ b/src/hand/detecter/detecter.hpp @@ -5,7 +5,6 @@ namespace ovhand { class Detecter: public ov::Estimator { public: - virtual ~Detecter() {}; virtual int Detect(const unsigned char*rgbdata, int img_width, int img_height, std::vector& rois) = 0; diff --git a/src/hand/detecter/nanodet/nanodet.cpp b/src/hand/detecter/nanodet/nanodet.cpp index 623f1ef..9171910 100644 --- a/src/hand/detecter/nanodet/nanodet.cpp +++ b/src/hand/detecter/nanodet/nanodet.cpp @@ -62,7 +62,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma softmax->load_param(pd); ncnn::Option opt; - opt.num_threads = 1; + // opt.num_threads = 1; opt.use_packing_layout = false; softmax->create_pipeline(opt); @@ -101,7 +101,7 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma obj.rect.width = x1 - x0; obj.rect.height = y1 - y0; obj.label = label; - obj.prob = score; + obj.score= score; objects.push_back(obj); } @@ -109,29 +109,6 @@ static void generate_nanodet_proposals(const ncnn::Mat& cls_pred, const ncnn::Ma } } -Nanodet::Nanodet() : - net_ (new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -Nanodet::~Nanodet() { - net_->clear(); -} - -int Nanodet::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - initialized_ = true; - return 0; -} - int Nanodet::Detect(const unsigned char* rgbdata, int img_width, int img_height, std::vector& rois) { diff --git a/src/hand/detecter/nanodet/nanodet.hpp b/src/hand/detecter/nanodet/nanodet.hpp index 2536917..503c304 100644 --- a/src/hand/detecter/nanodet/nanodet.hpp +++ b/src/hand/detecter/nanodet/nanodet.hpp @@ -9,17 +9,11 @@ namespace ovhand { class Nanodet : public Detecter { public: - Nanodet(); - ~Nanodet(); - - int LoadModel(const char* root_path); int Detect(const unsigned char* rgbadata, int img_width, int img_height, std::vector& rois); private: - ncnn::Net* net_; - bool initialized_; const int target_size = 320; const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; const float norm_vals[3] = {1.f / 57.375f, 1.f / 57.12f, 1.f / 58.395f}; diff --git a/src/hand/detecter/yolox/yolox.cpp b/src/hand/detecter/yolox/yolox.cpp index b9f6c36..6021a0c 100644 --- a/src/hand/detecter/yolox/yolox.cpp +++ b/src/hand/detecter/yolox/yolox.cpp @@ -46,7 +46,7 @@ static void generate_yolox_proposals(std::vector grid_strides obj.rect.width = w; obj.rect.height = h; obj.label = class_idx; - obj.prob = box_prob; + obj.score= box_prob; objects.push_back(obj); } @@ -57,28 +57,9 @@ static void generate_yolox_proposals(std::vector grid_strides } // point anchor loop } -Yolox::Yolox() : - net_ (new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -Yolox::~Yolox() { - net_->clear(); -} - int Yolox::LoadModel(const char * root_path) { register_yolov5focus(net_); - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - initialized_ = true; - return 0; + return Estimator::LoadModel(root_path); } int Yolox::Detect(const unsigned char* rgbdata, @@ -116,7 +97,6 @@ int Yolox::Detect(const unsigned char* rgbdata, in_pad.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = net_->create_extractor(); - ex.set_num_threads(4); ex.input("input", in_pad); ncnn::Mat out; ex.extract("output", out); diff --git a/src/hand/detecter/yolox/yolox.hpp b/src/hand/detecter/yolox/yolox.hpp index a46f6cd..d576327 100644 --- a/src/hand/detecter/yolox/yolox.hpp +++ b/src/hand/detecter/yolox/yolox.hpp @@ -9,17 +9,12 @@ namespace ovhand { class Yolox : public Detecter { public: - Yolox(); - ~Yolox(); - - int LoadModel(const char* root_path); + int LoadModel(const char * root_path); int Detect(const unsigned char* rgbadata, int img_width, int img_height, std::vector& rois); private: - ncnn::Net* net_; - bool initialized_; const int target_size = 416; const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f}; const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)}; diff --git a/src/hand/pose/estimator.hpp b/src/hand/pose/estimator.hpp index 485dd7a..c02bf19 100644 --- a/src/hand/pose/estimator.hpp +++ b/src/hand/pose/estimator.hpp @@ -7,7 +7,6 @@ namespace ovhand { class PoseEstimator: public ov::Estimator { public: - virtual ~PoseEstimator() {}; virtual int Detect(const unsigned char*rgbdata, int img_width, int img_height, const ov::Rect& rect, diff --git a/src/hand/pose/handpose/handpose.cpp b/src/hand/pose/handpose/handpose.cpp index 299182b..2ddc6af 100644 --- a/src/hand/pose/handpose/handpose.cpp +++ b/src/hand/pose/handpose/handpose.cpp @@ -6,28 +6,6 @@ #endif // OV_VULKAN namespace ovhand { -HandPose::HandPose() : - net_ (new ncnn::Net()), - initialized_(false) { -#ifdef OV_VULKAN - net_->opt.use_vulkan_compute = true; -#endif // OV_VULKAN -} - -HandPose::~HandPose() { - net_->clear(); -} - -int HandPose::LoadModel(const char * root_path) { - std::string param_file = std::string(root_path) + "/param"; - std::string bin_file = std::string(root_path) + "/bin"; - if (net_->load_param(param_file.c_str()) == -1 || - net_->load_model(bin_file.c_str()) == -1) { - return 10000; - } - initialized_ = true; - return 0; -} int HandPose::Detect(const unsigned char* rgbdata, int img_width, int img_height, diff --git a/src/hand/pose/handpose/handpose.hpp b/src/hand/pose/handpose/handpose.hpp index 795c301..7ff0e10 100644 --- a/src/hand/pose/handpose/handpose.hpp +++ b/src/hand/pose/handpose/handpose.hpp @@ -9,18 +9,12 @@ namespace ovhand { class HandPose : public PoseEstimator { public: - HandPose(); - ~HandPose(); - - int LoadModel(const char* root_path); int Detect(const unsigned char* rgbdata, int img_width, int img_height, const ov::Rect& rect, std::vector& keypoints); private: - ncnn::Net* net_; - bool initialized_; const float meanVals[3] = { 128.0f, 128.0f, 128.0f }; const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f }; }; diff --git a/src/pose/detecter/detecter.cpp b/src/pose/detecter/detecter.cpp index 2d4368d..1db18a6 100644 --- a/src/pose/detecter/detecter.cpp +++ b/src/pose/detecter/detecter.cpp @@ -18,7 +18,7 @@ int extract_pose_rois(IPoseDetecter d, const unsigned char* rgbdata, int img_wid ov::ObjectInfo o = detected[i]; rois->items[i] = ObjectInfo{ o.rect, - o.prob, + o.score, o.label, NULL }; diff --git a/src/pose/detecter/detecter.hpp b/src/pose/detecter/detecter.hpp index 2a30bd1..7c6e2b2 100644 --- a/src/pose/detecter/detecter.hpp +++ b/src/pose/detecter/detecter.hpp @@ -7,7 +7,6 @@ namespace ovpose { class Detecter: public ov::Estimator { public: - virtual ~Detecter(){}; virtual int ExtractROIs(const unsigned char* rgbadata, int img_width, int img_height, std::vector* rois) = 0; diff --git a/src/pose/detecter/ultralight/ultralight.cpp b/src/pose/detecter/ultralight/ultralight.cpp index 399290c..caa4f80 100644 --- a/src/pose/detecter/ultralight/ultralight.cpp +++ b/src/pose/detecter/ultralight/ultralight.cpp @@ -55,7 +55,6 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata, in.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = roi_net_->create_extractor(); - ex.set_num_threads(4); ex.input("data", in); ncnn::Mat out; ex.extract("output", out); @@ -99,7 +98,7 @@ int Ultralight::ExtractROIs(const unsigned char* rgbdata, ov::Rect rect = ov::Rect(x1, y1, x2-x1, y2-y1); ov::ObjectInfo roi; roi.rect = rect; - roi.prob = score; + roi.score = score; rois->push_back(roi); } return 0; @@ -124,7 +123,6 @@ int Ultralight::ExtractKeypoints(const unsigned char* rgbdata, in.substract_mean_normalize(meanVals, normVals); ncnn::Extractor ex = pose_net_->create_extractor(); - ex.set_num_threads(4); ex.input("data", in); ncnn::Mat out; ex.extract("hybridsequential0_conv7_fwd", out); @@ -152,7 +150,7 @@ int Ultralight::ExtractKeypoints(const unsigned char* rgbdata, ov::Keypoint keypoint; keypoint.p = ov::Point2f(max_x * w / (float)out.w+rect.x, max_y * h / (float)out.h+rect.y); - keypoint.prob = max_prob; + keypoint.score = max_prob; keypoints->push_back(keypoint); }