feat(face): add aligner

This commit is contained in:
Syd Xu
2021-11-04 13:14:53 +08:00
parent 021c6b71d0
commit c04819bfd3
24 changed files with 418 additions and 57 deletions

View File

@@ -1,10 +1,17 @@
package common
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/common/common.h"
*/
import "C"
import (
"bytes"
"image"
"image/color"
"io"
"sync"
"unsafe"
"github.com/llgcode/draw2d/draw2dimg"
"github.com/llgcode/draw2d/draw2dkit"
@@ -61,53 +68,43 @@ func (i Image) HeightF64() float64 {
return float64(i.Bounds().Dy())
}
// Crop returns cropped image bytes in Rectangle
func (i Image) Crop(rect Rectangle) []byte {
imgW := i.WidthF64()
imgH := i.HeightF64()
imgWInt := i.Width()
imgHInt := i.Height()
cropWidth := int(rect.Width)
if rect.MaxX() > imgW {
cropWidth = imgWInt
}
cropHeight := int(rect.Height)
if rect.MaxY() > imgH {
cropHeight = imgHInt
}
xOffset := int(rect.X)
if rect.X < 1e-15 {
xOffset = 0
}
yOffset := int(rect.Y)
if rect.Y < 1e-15 {
yOffset = 0
}
imgData := i.Bytes()
ret := make([]byte, 0, cropWidth*cropHeight*3)
pool := &sync.Pool{
New: func() interface{} {
return make([]byte, cropWidth*3)
},
}
for y := 0; y < cropHeight; y++ {
srcCur := ((y+yOffset)*imgWInt + xOffset) * 3
dist := pool.Get().([]byte)
dist = dist[:0]
copy(dist, imgData[srcCur:cropWidth*3])
pool.Put(dist)
ret = append(ret, dist...)
}
// NewCImage returns new C.Image
func NewCImage() *C.Image {
ret := (*C.Image)(C.malloc(C.sizeof_Image))
return ret
}
func FreeCImage(c *C.Image) {
C.FreeImage(c)
C.free(unsafe.Pointer(c))
}
func GoImage(c *C.Image) (image.Image, error) {
w := int(c.width)
h := int(c.height)
channels := int(c.channels)
data := C.GoBytes(unsafe.Pointer(c.data), C.int(w*h*channels)*C.sizeof_uchar)
return NewImageFromBytes(data, w, h, channels)
}
func NewImageFromBytes(data []byte, w int, h int, channels int) (image.Image, error) {
img := image.NewRGBA(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
pos := (y*w + x) * channels
img.SetRGBA(x, y, color.RGBA{uint8(data[pos]), uint8(data[pos+1]), uint8(data[pos+2]), 255})
}
}
return img, nil
}
// Image2RGB write image rgbdata to buffer
func Image2RGB(buf io.Writer, img image.Image) {
bounds := img.Bounds()
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
for i := bounds.Min.X; i < bounds.Max.X; i++ {
r, g, b, _ := img.At(i, j).RGBA()
buf.Write([]byte{byte(b >> 8), byte(g >> 8), byte(r >> 8)})
buf.Write([]byte{byte(r >> 8), byte(g >> 8), byte(b >> 8)})
}
}
}
@@ -118,7 +115,7 @@ func Image2RGBA(buf io.Writer, img image.Image) {
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
for i := bounds.Min.X; i < bounds.Max.X; i++ {
r, g, b, a := img.At(i, j).RGBA()
buf.Write([]byte{byte(b >> 8), byte(g >> 8), byte(r >> 8), byte(a >> 8)})
buf.Write([]byte{byte(r >> 8), byte(g >> 8), byte(b >> 8), byte(a >> 8)})
}
}
}
@@ -129,7 +126,7 @@ func Image2BGR(buf io.Writer, img image.Image) {
for j := bounds.Min.Y; j < bounds.Max.Y; j++ {
for i := bounds.Min.X; i < bounds.Max.X; i++ {
r, g, b, _ := img.At(i, j).RGBA()
buf.Write([]byte{byte(r >> 8), byte(g >> 8), byte(b >> 8)})
buf.Write([]byte{byte(b >> 8), byte(g >> 8), byte(r >> 8)})
}
}
}

View File

@@ -38,6 +38,12 @@ var (
Message: "recognize face failed",
}
}
AlignFaceError = func(code int) Error {
return Error{
Code: code,
Message: "align face failed",
}
}
TrackFaceError = func(code int) Error {
return Error{
Code: code,

104
go/examples/aligner/main.go Normal file
View File

@@ -0,0 +1,104 @@
package main
import (
"bytes"
"fmt"
"image"
"image/jpeg"
"log"
"os"
"os/user"
"path/filepath"
"strings"
"github.com/bububa/openvision/go/common"
"github.com/bububa/openvision/go/face/aligner"
"github.com/bububa/openvision/go/face/detecter"
)
func main() {
wd, _ := os.Getwd()
dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data")
imgPath := filepath.Join(dataPath, "./images")
modelPath := filepath.Join(dataPath, "./models")
common.CreateGPUInstance()
defer common.DestroyGPUInstance()
d := retinaface(modelPath)
defer d.Destroy()
a := aligner.NewAligner()
defer a.Destroy()
align(d, a, imgPath, "4.jpg")
}
func retinaface(modelPath string) detecter.Detecter {
modelPath = filepath.Join(modelPath, "fd")
d := detecter.NewRetinaFace()
if err := d.LoadModel(modelPath); err != nil {
log.Fatalln(err)
}
return d
}
func align(d detecter.Detecter, a *aligner.Aligner, imgPath string, filename string) {
inPath := filepath.Join(imgPath, filename)
imgLoaded, err := loadImage(inPath)
if err != nil {
log.Fatalln("load image failed,", err)
}
img := common.NewImage(imgLoaded)
faces, err := d.Detect(img)
if err != nil {
log.Fatalln(err)
}
for idx, face := range faces {
aligned, err := a.Align(img, face)
if err != nil {
log.Fatalln(err)
}
outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("align-%d-%s", idx, filename))
if err := saveImage(aligned, outPath); err != nil {
log.Fatalln(err)
}
}
}
func loadImage(filePath string) (image.Image, error) {
fn, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer fn.Close()
img, _, err := image.Decode(fn)
if err != nil {
return nil, err
}
return img, nil
}
func saveImage(img image.Image, filePath string) error {
buf := new(bytes.Buffer)
if err := jpeg.Encode(buf, img, nil); err != nil {
return err
}
fn, err := os.Create(filePath)
if err != nil {
return err
}
defer fn.Close()
fn.Write(buf.Bytes())
return nil
}
func cleanPath(wd string, path string) string {
usr, _ := user.Current()
dir := usr.HomeDir
if path == "~" {
return dir
} else if strings.HasPrefix(path, "~/") {
return filepath.Join(dir, path[2:])
}
return filepath.Join(wd, path)
}

View File

@@ -0,0 +1,67 @@
package aligner
/*
#include <stdlib.h>
#include <stdbool.h>
#include "openvision/face/common.h"
#include "openvision/face/aligner.h"
*/
import "C"
import (
"image"
"unsafe"
openvision "github.com/bububa/openvision/go"
"github.com/bububa/openvision/go/common"
face "github.com/bububa/openvision/go/face"
)
// Aligner represents Aligner
type Aligner struct {
d C.IFaceAligner
}
// NewAligner returns a new Aligner
func NewAligner() *Aligner {
return &Aligner{
d: C.new_face_aligner(),
}
}
// Destroy destroy C.IFaceAligner
func (a *Aligner) Destroy() {
C.destroy_face_aligner(a.d)
}
// SetThreads set num_threads
func (a *Aligner) SetThreads(n int) {
C.set_face_aligner_threads(a.d, C.int(n))
}
// Align face
func (a *Aligner) Align(img *common.Image, faceInfo face.FaceInfo) (image.Image, error) {
imgWidth := img.WidthF64()
imgHeight := img.HeightF64()
data := img.Bytes()
outImgC := common.NewCImage()
defer common.FreeCImage(outImgC)
var cPts [10]C.float
for idx, pt := range faceInfo.Keypoints {
cPts[idx] = C.float(pt.X * imgWidth)
cPts[idx+5] = C.float(pt.Y * imgHeight)
}
cRect := faceInfo.Rect.CRect(imgWidth, imgHeight)
defer C.free(unsafe.Pointer(cRect))
errCode := C.align_face(
a.d,
(*C.uchar)(unsafe.Pointer(&data[0])),
C.int(imgWidth), C.int(imgHeight),
(*C.Rect)(unsafe.Pointer(cRect)),
(*C.float)(unsafe.Pointer(&cPts)),
(*C.Image)(unsafe.Pointer(outImgC)),
)
if errCode != 0 {
return nil, openvision.AlignFaceError(int(errCode))
}
return common.GoImage(outImgC)
}

11
go/face/aligner/cgo.go Normal file
View File

@@ -0,0 +1,11 @@
// +build !vulkan
package aligner
/*
#cgo CXXFLAGS: --std=c++11 -fopenmp
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
*/
import "C"

View File

@@ -0,0 +1,11 @@
// +build vulkan
package aligner
/*
#cgo CXXFLAGS: --std=c++11 -fopenmp
#cgo CPPFLAGS: -I ${SRCDIR}/../../../include -I /usr/local/include
#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
*/
import "C"

2
go/face/aligner/doc.go Normal file
View File

@@ -0,0 +1,2 @@
// Package aligner defines face Aligner
package aligner

View File

@@ -23,13 +23,13 @@ type Tracker struct {
// NewTracker returns a new Tracker
func NewTracker() *Tracker {
return &Tracker{
d: C.new_tracker(),
d: C.new_face_tracker(),
}
}
// Destroy destroy C.ITracker
func (t *Tracker) Destroy() {
C.destroy_tracker(t.d)
C.destroy_face_tracker(t.d)
}
// Track track faces
@@ -42,7 +42,7 @@ func (t *Tracker) Track(img *common.Image, faces []face.FaceInfo) ([]face.Tracke
defer face.FreeCFaceInfoVector(CCurrFaces)
CTrackedFaces := face.NewCTrackedFaceInfoVector()
defer face.FreeCTrackedFaceInfoVector(CTrackedFaces)
errCode := C.track(
errCode := C.track_face(
t.d,
(*C.FaceInfoVector)(unsafe.Pointer(CCurrFaces)),
(*C.TrackedFaceInfoVector)(unsafe.Pointer(CTrackedFaces)),

View File

@@ -59,6 +59,7 @@ target_include_directories(openvision
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/face/tracker>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/face/aligner>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/hand/detecter>
@@ -82,6 +83,7 @@ file(COPY
${CMAKE_CURRENT_SOURCE_DIR}/face/recognizer.h
${CMAKE_CURRENT_SOURCE_DIR}/face/tracker.h
${CMAKE_CURRENT_SOURCE_DIR}/face/hopenet.h
${CMAKE_CURRENT_SOURCE_DIR}/face/aligner.h
DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/face
)

View File

@@ -110,11 +110,22 @@ void FreeObjectInfoVector(ObjectInfoVector *p) {
p->items= NULL;
}
}
void FreeImage(Image* p) {
if (p->data != NULL) {
free(p->data);
p->data = NULL;
}
}
namespace ov {
Estimator::Estimator() {
Estimator::Estimator() : EstimatorBase() {
net_ = new ncnn::Net();
initialized_ = false;
if (num_threads > 0) {
net_->opt.num_threads = num_threads;
}
#ifdef OV_VULKAN
net_->opt.use_vulkan_compute = true;
#endif // OV_VULKAN
@@ -139,8 +150,18 @@ int Estimator::LoadModel(const char * root_path) {
return 0;
}
void Estimator::set_num_threads(int n) {
EstimatorBase::EstimatorBase() {
num_threads = ncnn::get_big_cpu_count();
}
EstimatorBase::~EstimatorBase() {}
void EstimatorBase::set_num_threads(int n) {
num_threads = n;
}
void Estimator::set_num_threads(int n) {
EstimatorBase::set_num_threads(n);
if (net_) {
net_->opt.num_threads = n;
}

View File

@@ -55,6 +55,7 @@ typedef struct Keypoint {
int id;
} Keypoint;
#endif
typedef void* IEstimator;
@@ -105,6 +106,15 @@ typedef struct KeypointVector {
void FreeKeypointVector(KeypointVector *p);
void KeypointVectorSetValue(KeypointVector *p, int i, const Keypoint* val);
typedef struct ImageC {
unsigned char* data;
int width;
int height;
int channels;
} Image;
void FreeImage(Image* p);
typedef struct ObjectInfoC {
Rect rect;
float score;

View File

@@ -12,14 +12,22 @@
namespace ov {
class Estimator {
class EstimatorBase {
public:
EstimatorBase();
virtual ~EstimatorBase();
virtual void set_num_threads(int n);
protected:
int num_threads = 2;
};
class Estimator : public EstimatorBase {
public:
Estimator();
virtual ~Estimator();
virtual int LoadModel(const char* root_path);
virtual void set_num_threads(int n);
protected:
int num_threads = 2;
ncnn::Net* net_;
bool initialized_ = false;
};
@@ -136,6 +144,7 @@ struct Image {
data.resize(data_size);
data.assign(ptr, ptr + data_size);
free(ptr);
ptr=NULL;
};
};

16
src/face/aligner.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef _FACE_ALIGNER_C_H_
#define _FACE_ALIGNER_C_H_
#ifdef __cplusplus
#include "aligner/aligner.hpp"
extern "C" {
#endif
typedef void* IFaceAligner;
IFaceAligner new_face_aligner();
void destroy_face_aligner(IFaceAligner d);
void set_face_aligner_threads(IFaceAligner d, int n);
int align_face(IFaceAligner d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, const float keypoints[10], Image* face_aligned);
#ifdef __cplusplus
}
#endif
#endif // !_FACE_ALIGNER_C_H_

View File

@@ -0,0 +1,57 @@
#include "aligner.h"
IFaceAligner new_face_aligner() {
return new ovface::Aligner();
}
void destroy_face_aligner(IFaceAligner t) {
delete static_cast<ovface::Aligner*>(t);
}
void set_face_aligner_threads(IFaceAligner t, int n) {
static_cast<ovface::Aligner*>(t)->set_num_threads(n);
}
int align_face(IFaceAligner d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, const float keypoints[10], Image* face_aligned) {
int ret = static_cast<ovface::Aligner*>(d)->Align(rgbdata, img_width, img_height, *rect, keypoints, face_aligned);
return ret;
}
namespace ovface {
int Aligner::Align(const unsigned char* rgbdata,
int img_width, int img_height,
const Rect& rect,
const float keypoints[10],
Image* face_aligned) {
if (rgbdata == 0) {
return 10001;
}
float points_src[10] = {
keypoints[0] - rect.x, keypoints[5] - rect.y,
keypoints[1] - rect.x, keypoints[6] - rect.y,
keypoints[2] - rect.x, keypoints[7] - rect.y,
keypoints[3] - rect.x, keypoints[8] - rect.y,
keypoints[4] - rect.x, keypoints[9] - rect.y
};
size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < rect.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3;
unsigned char* dstCursor = img_face + i * rect.width * 3;
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
}
float tm_inv[6];
face_aligned->width = crop_width;
face_aligned->height = crop_height;
face_aligned->channels = 3;
face_aligned->data = (unsigned char*)malloc(crop_width * crop_height * 3 * sizeof(unsigned char));
ncnn::get_affine_transform(points_dst, points_src, 5, tm_inv);
ncnn::warpaffine_bilinear_c3(img_face, rect.width, rect.height, face_aligned->data, crop_width, crop_height, tm_inv);
free(img_face);
return 0;
}
}

View File

@@ -0,0 +1,30 @@
#ifndef _FACE_ALIGNER_H_
#define _FACE_ALIGNER_H_
#include "../../common/common.h"
#include <vector>
namespace ovface {
class Aligner : public ov::EstimatorBase {
public:
int Align(const unsigned char* rgbdata,
int img_width, int img_height,
const ov::Rect& rect,
const float keypoints[10],
Image* face_aligned);
private:
float points_dst[10] = {
30.2946f + 8.0f, 51.6963f, // left eye
65.5318f + 8.0f, 51.5014f, // right eye
48.0252f + 8.0f, 71.7366f, // nose tip
33.5493f + 8.0f, 92.3655f, // mouth left
62.7299f + 8.0f, 92.2041f // mouth right
};
int crop_width = 112;
int crop_height = 112;
};
}
#endif // !_FACE_ALIGNER_H_

View File

@@ -45,6 +45,9 @@ int Hopenet::Detect(const unsigned char* rgbdata,
size_t total_size = roi.width * roi.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < roi.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + roi.y) * img_width + roi.x) * 3;
unsigned char* dstCursor = img_face + i * roi.width * 3;

View File

@@ -32,6 +32,9 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
size_t total_size = face_enlarged.width * face_enlarged.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < face_enlarged.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + face_enlarged.y) * img_width + face_enlarged.x) * 3;
unsigned char* dstCursor = img_face + i * face_enlarged.width * 3;

View File

@@ -30,6 +30,9 @@ int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
size_t total_size = box.width * box.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < box.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + box.y) * img_width + box.x) * 3;
unsigned char* dstCursor = img_face + i * box.width * 3;

View File

@@ -21,6 +21,9 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
size_t total_size = face.width * face.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < face.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + face.y) * img_width + face.x) * 3;
unsigned char* dstCursor = img_face + i * face.width * 3;

View File

@@ -20,6 +20,9 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata,
size_t total_size = face.width * face.height * 3 * sizeof(unsigned char);
unsigned char* img_face = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < face.height; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + face.y) * img_width + face.x) * 3;
unsigned char* dstCursor = img_face + i * face.width * 3;

View File

@@ -7,9 +7,9 @@
extern "C" {
#endif
typedef void* IFaceTracker;
IFaceTracker new_tracker();
void destroy_tracker(IFaceTracker t);
int track(IFaceTracker t, const FaceInfoVector* curr_faces, TrackedFaceInfoVector* faces);
IFaceTracker new_face_tracker();
void destroy_face_tracker(IFaceTracker t);
int track_face(IFaceTracker t, const FaceInfoVector* curr_faces, TrackedFaceInfoVector* faces);
#ifdef __cplusplus
}
#endif

View File

@@ -1,15 +1,15 @@
#include "../tracker.h"
#include <queue>
IFaceTracker new_tracker() {
IFaceTracker new_face_tracker() {
return new ovface::Tracker();
}
void destroy_tracker(IFaceTracker t) {
void destroy_face_tracker(IFaceTracker t) {
delete static_cast<ovface::Tracker*>(t);
}
int track(IFaceTracker t, const FaceInfoVector* curr_faces, TrackedFaceInfoVector* faces) {
int track_face(IFaceTracker t, const FaceInfoVector* curr_faces, TrackedFaceInfoVector* faces) {
std::vector<ovface::FaceInfo> cfaces;
for (int i = 0; i < curr_faces->length; ++i) {
cfaces.push_back(static_cast<ovface::FaceInfo>(curr_faces->faces[i]));

View File

@@ -384,9 +384,9 @@ void OpenPose::postProcess(const ncnn::Mat &pafs, const ncnn::Mat &heatmaps,
std::vector<std::vector<ov::Keypoint> > peaksFromHeatMap(cv_heatmaps_upsample.size());
// #if defined(_OPENMP)
// #pragma omp parallel for num_threads(num_threads)
// #endif
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for (int i = 0; i < cv_heatmaps_upsample.size(); i++) {
this->findPeaks(cv_heatmaps_upsample, minPeaksDistance, peaksFromHeatMap, i);
}

View File

@@ -16,6 +16,9 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata,
size_t total_size = w * h * 3 * sizeof(unsigned char);
unsigned char* data = (unsigned char*)malloc(total_size);
const unsigned char *start_ptr = rgbdata;
#if defined(_OPENMP)
#pragma omp parallel for num_threads(num_threads)
#endif
for(size_t i = 0; i < h; ++i) {
const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3;
unsigned char* dstCursor = data + i * w * 3;