mirror of
https://github.com/bububa/openvision.git
synced 2025-09-27 01:56:04 +08:00
feat(pose): add movenet pose estimator
This commit is contained in:
@@ -17,6 +17,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
|
||||
## Features
|
||||
|
||||
- face
|
||||
- aligner (for face keypoints alignment)
|
||||
- detecter (for face location and keypoints detection)
|
||||
- mtcnn [Google Drive](https://drive.google.com/drive/folders/14ToHyDXZr4Ihuk8WYp1mVS7QnVxnzEjn?usp=sharing)
|
||||
- centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing)
|
||||
@@ -38,6 +39,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
|
||||
- openpose [Google Drive](https://drive.google.com/drive/folders/1Q2mq7dOE-eHsvu4BYpBaWVLkU5roKsm5?usp=sharing)
|
||||
- estimator (for pose estimation)
|
||||
- ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing)
|
||||
- movenet [Google Drive](https://drive.google.com/drive/folders/14zgKk0tro1kjRrSTs0EAlEKrV8Q4XA34?usp=sharing)
|
||||
- hand
|
||||
- detector (for hand detect)
|
||||
- yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing)
|
||||
|
@@ -24,6 +24,11 @@ func SetEstimatorThreads(e Estimator, n int) {
|
||||
C.set_num_threads((C.IEstimator)(e.Pointer()), C.int(n))
|
||||
}
|
||||
|
||||
// SetEstimatorLightMode set ncnn net opt.lightmode
|
||||
func SetEstimatorLightMode(e Estimator, mode bool) {
|
||||
C.set_light_mode((C.IEstimator)(e.Pointer()), C.bool(mode))
|
||||
}
|
||||
|
||||
// DestroyEstimator destory an Estimator
|
||||
func DestroyEstimator(e Estimator) {
|
||||
C.destroy_estimator((C.IEstimator)(e.Pointer()))
|
||||
|
@@ -35,6 +35,8 @@ func main() {
|
||||
common.SetEstimatorThreads(d, cpuCores)
|
||||
for mid, m := range []estimator.Estimator{
|
||||
ultralightEstimator(modelPath),
|
||||
moveNetEstimator(modelPath, estimator.MoveNetType_Lightning),
|
||||
moveNetEstimator(modelPath, estimator.MoveNetType_Thunder),
|
||||
} {
|
||||
defer m.Destroy()
|
||||
common.SetEstimatorThreads(d, cpuCores)
|
||||
@@ -70,6 +72,19 @@ func ultralightEstimator(modelPath string) estimator.Estimator {
|
||||
return d
|
||||
}
|
||||
|
||||
func moveNetEstimator(modelPath string, modelType estimator.MoveNetType) estimator.Estimator {
|
||||
if modelType == estimator.MoveNetType_Lightning {
|
||||
modelPath = filepath.Join(modelPath, "movenet/lightning")
|
||||
} else {
|
||||
modelPath = filepath.Join(modelPath, "movenet/thunder")
|
||||
}
|
||||
d := estimator.NewMoveNet(modelType)
|
||||
if err := d.LoadModel(modelPath); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func detect(d detecter.Detecter, m estimator.Estimator, imgPath string, filename string, did int, mid int) {
|
||||
inPath := filepath.Join(imgPath, filename)
|
||||
imgSrc, err := loadImage(inPath)
|
||||
|
55
go/pose/estimator/movenet.go
Normal file
55
go/pose/estimator/movenet.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package estimator
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "openvision/pose/estimator.h"
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/bububa/openvision/go/common"
|
||||
)
|
||||
|
||||
// MoveNetType (lightning/thunder)
|
||||
type MoveNetType = int
|
||||
|
||||
const (
|
||||
// MoveNetType_Lightning lightning model
|
||||
MoveNetType_Lightning MoveNetType = 0
|
||||
// MoveNetType_Thunder thunder model
|
||||
MoveNetType_Thunder MoveNetType = 1
|
||||
)
|
||||
|
||||
// MoveNet represents movenet estimator
|
||||
type MoveNet struct {
|
||||
d C.IPoseEstimator
|
||||
}
|
||||
|
||||
// NewMoveNet returns a new MoveNet
|
||||
func NewMoveNet(modelType MoveNetType) *MoveNet {
|
||||
return &MoveNet{
|
||||
d: C.new_movenet(C.int(modelType)),
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy free detecter
|
||||
func (d *MoveNet) Destroy() {
|
||||
common.DestroyEstimator(d)
|
||||
}
|
||||
|
||||
// Pointer implement Estimator interface
|
||||
func (d *MoveNet) Pointer() unsafe.Pointer {
|
||||
return unsafe.Pointer(d.d)
|
||||
}
|
||||
|
||||
// LoadModel load model for detecter
|
||||
func (d *MoveNet) LoadModel(modelPath string) error {
|
||||
return common.EstimatorLoadModel(d, modelPath)
|
||||
}
|
||||
|
||||
// ExtractKeypoints implement Detecter interface
|
||||
func (d *MoveNet) ExtractKeypoints(img *common.Image, rect common.Rectangle) ([]common.Keypoint, error) {
|
||||
return ExtractKeypoints(d, img, rect)
|
||||
}
|
@@ -51,6 +51,10 @@ void set_num_threads(IEstimator d, int n) {
|
||||
static_cast<ov::Estimator*>(d)->set_num_threads(n);
|
||||
}
|
||||
|
||||
void set_light_mode(IEstimator d, bool mode) {
|
||||
static_cast<ov::Estimator*>(d)->set_light_mode(mode);
|
||||
}
|
||||
|
||||
void FreePoint2fVector(Point2fVector* p) {
|
||||
if (p->points != NULL) {
|
||||
free(p->points);
|
||||
@@ -167,6 +171,13 @@ void Estimator::set_num_threads(int n) {
|
||||
}
|
||||
}
|
||||
|
||||
void Estimator::set_light_mode(bool mode) {
|
||||
if (net_) {
|
||||
net_->opt.lightmode = mode;
|
||||
light_mode_ = mode;
|
||||
}
|
||||
}
|
||||
|
||||
int RatioAnchors(const Rect & anchor,
|
||||
const std::vector<float>& ratios,
|
||||
std::vector<Rect>* anchors, int threads_num) {
|
||||
|
@@ -68,6 +68,7 @@ void set_omp_num_threads(int n);
|
||||
int load_model(IEstimator e, const char* root_path);
|
||||
void destroy_estimator(IEstimator e);
|
||||
void set_num_threads(IEstimator e, int n);
|
||||
void set_light_mode(IEstimator e, bool mode);
|
||||
|
||||
typedef struct Point2fVector {
|
||||
Point2f* points;
|
||||
|
@@ -27,9 +27,11 @@ public:
|
||||
virtual ~Estimator();
|
||||
virtual int LoadModel(const char* root_path);
|
||||
virtual void set_num_threads(int n);
|
||||
virtual void set_light_mode(bool mode);
|
||||
protected:
|
||||
ncnn::Net* net_;
|
||||
bool initialized_ = false;
|
||||
bool light_mode_ = true;
|
||||
};
|
||||
|
||||
// Wrapper for an individual cv::cvSize
|
||||
@@ -240,6 +242,11 @@ float sigmoid(float x);
|
||||
void EnlargeRect(const float& scale, Rect* rect);
|
||||
void RectifyRect(Rect* rect);
|
||||
|
||||
template<class ForwardIterator>
|
||||
inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
|
||||
return std::distance(first, std::max_element(first, last));
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // !_COMMON_H_
|
||||
|
@@ -42,9 +42,12 @@ int AntiConv::DetectFace(const unsigned char* rgbdata,
|
||||
|
||||
float factor_x = static_cast<float>(img_width) / inputSize_.width;
|
||||
float factor_y = static_cast<float>(img_height) / inputSize_.height;
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
|
||||
ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
|
||||
std::vector<FaceInfo> faces_tmp;
|
||||
|
@@ -24,7 +24,10 @@ int CenterFace::DetectFace(const unsigned char* rgbdata,
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
|
||||
img_width, img_height, img_width_new, img_height_new);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input.1", in);
|
||||
ncnn::Mat mat_heatmap, mat_scale, mat_offset, mat_landmark;
|
||||
ex.extract("537", mat_heatmap);
|
||||
|
@@ -122,8 +122,9 @@ int Mtcnn::PDetect(const ncnn::Mat & img_in,
|
||||
ncnn::Mat img_resized;
|
||||
ncnn::resize_bilinear(img_in, img_resized, w, h);
|
||||
ncnn::Extractor ex = pnet_->create_extractor();
|
||||
//ex.set_num_threads(2);
|
||||
ex.set_light_mode(true);
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
|
||||
ex.input("data", img_resized);
|
||||
ncnn::Mat score_mat, location_mat;
|
||||
ex.extract("prob1", score_mat);
|
||||
@@ -177,8 +178,8 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in,
|
||||
ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x);
|
||||
ncnn::resize_bilinear(img_face, img_resized, 24, 24);
|
||||
ncnn::Extractor ex = rnet_->create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
// ex.set_num_threads(2);
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", img_resized);
|
||||
ncnn::Mat score_mat, location_mat;
|
||||
ex.extract("prob1", score_mat);
|
||||
@@ -214,8 +215,8 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in,
|
||||
ncnn::resize_bilinear(img_face, img_resized, 48, 48);
|
||||
|
||||
ncnn::Extractor ex = onet_->create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
// ex.set_num_threads(2);
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", img_resized);
|
||||
ncnn::Mat score_mat, location_mat, keypoints_mat;
|
||||
ex.extract("prob1", score_mat);
|
||||
|
@@ -42,9 +42,12 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata,
|
||||
|
||||
float factor_x = static_cast<float>(img_width) / inputSize_.width;
|
||||
float factor_y = static_cast<float>(img_height) / inputSize_.height;
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
|
||||
ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
|
||||
std::vector<FaceInfo> faces_tmp;
|
||||
|
@@ -128,6 +128,8 @@ int Scrfd::DetectFace(const unsigned char* rgbdata,
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
|
@@ -154,7 +154,8 @@ int YoloFace::DetectFace(const unsigned char* rgbdata,
|
||||
in_pad.substract_mean_normalize(0, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in_pad);
|
||||
|
||||
std::vector<ov::ObjectInfo> proposals;
|
||||
|
@@ -56,6 +56,8 @@ int Hopenet::Detect(const unsigned char* rgbdata,
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB2GRAY, roi.width, roi.height, 48, 48);
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat output;
|
||||
|
@@ -40,11 +40,13 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
unsigned char* dstCursor = img_face + i * face_enlarged.width * 3;
|
||||
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face_enlarged.width);
|
||||
}
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
|
||||
ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
|
||||
|
||||
// 4 do inference
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
|
||||
ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("fc1", out);
|
||||
|
@@ -39,9 +39,12 @@ int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width);
|
||||
}
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192);
|
||||
ncnn_in.substract_mean_normalize(means, norms);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input.1",ncnn_in);
|
||||
ncnn::Mat ncnn_out;
|
||||
ex.extract("482",ncnn_out);
|
||||
|
@@ -29,10 +29,13 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
unsigned char* dstCursor = img_face + i * face.width * 3;
|
||||
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width);
|
||||
}
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
|
||||
ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
|
||||
in.substract_mean_normalize(meanVals, normVals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("bn6_3", out);
|
||||
|
@@ -31,11 +31,14 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata,
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
|
||||
ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
|
||||
feature->resize(kFaceFeatureDim);
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("fc1", out);
|
||||
|
||||
feature->resize(kFaceFeatureDim);
|
||||
for (int i = 0; i < kFaceFeatureDim; ++i) {
|
||||
feature->at(i) = out[i];
|
||||
}
|
||||
|
@@ -143,7 +143,8 @@ int Nanodet::Detect(const unsigned char* rgbdata,
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
//__android_log_print(ANDROID_LOG_WARN, "ncnn","input w:%d,h:%d",in_pad.w,in_pad.h);
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input.1", in_pad);
|
||||
|
||||
std::vector<ov::ObjectInfo> proposals;
|
||||
|
@@ -97,6 +97,8 @@ int Yolox::Detect(const unsigned char* rgbdata,
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input", in_pad);
|
||||
ncnn::Mat out;
|
||||
ex.extract("output", out);
|
||||
|
@@ -32,10 +32,12 @@ int HandPose::Detect(const unsigned char* rgbdata,
|
||||
const float meanVals[3] = { 128.0f, 128.0f, 128.0f };
|
||||
const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f };
|
||||
ncnn_in.substract_mean_normalize(meanVals, normVals);
|
||||
ncnn::Extractor ex1 = net_->create_extractor();
|
||||
ex1.input("input", ncnn_in);
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("input", ncnn_in);
|
||||
ncnn::Mat ncnn_out;
|
||||
ex1.extract("output", ncnn_out);
|
||||
ex.extract("output", ncnn_out);
|
||||
keypoints.resize(21);
|
||||
|
||||
for (int c = 0; c < ncnn_out.c; c++)
|
||||
|
@@ -51,6 +51,8 @@ int OpenPose::Detect(const unsigned char* rgbdata,
|
||||
ncnn::Mat pafs;
|
||||
ncnn::Mat heatmaps;
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ex.extract("stage_1_output_1_heatmaps", heatmaps); // or stage_0_output_1_heatmaps
|
||||
ex.extract("stage_1_output_0_pafs", pafs); // or stage_0_output_0_pafs
|
||||
|
@@ -22,6 +22,8 @@ int Ultralight::Detect(const unsigned char* rgbdata,
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("output", out);
|
||||
|
@@ -9,6 +9,7 @@ extern "C" {
|
||||
#endif
|
||||
typedef void* IPoseEstimator;
|
||||
IPoseEstimator new_ultralight_estimator();
|
||||
IPoseEstimator new_movenet(const int model_type);
|
||||
int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata,
|
||||
int img_width, int img_height,
|
||||
const Rect* rect, KeypointVector* keypoints);
|
||||
|
@@ -1,10 +1,15 @@
|
||||
#include "../estimator.h"
|
||||
#include "ultralight/ultralight.hpp"
|
||||
#include "movenet/movenet.hpp"
|
||||
|
||||
IPoseEstimator new_ultralight_estimator() {
|
||||
return new ovpose::UltralightEstimator();
|
||||
}
|
||||
|
||||
IPoseEstimator new_movenet(const int model_type) {
|
||||
return new ovpose::MoveNet(model_type);
|
||||
}
|
||||
|
||||
int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, KeypointVector* keypoints) {
|
||||
std::vector<ov::Keypoint> points;
|
||||
int ret = static_cast<ovpose::Estimator*>(d)->ExtractKeypoints(rgbdata, img_width, img_height, *rect, &points);
|
||||
@@ -24,4 +29,8 @@ namespace ovpose {
|
||||
Estimator* UltralightEstimatorFactory::CreateEstimator() {
|
||||
return new UltralightEstimator();
|
||||
}
|
||||
|
||||
Estimator* MoveNetFactory::CreateEstimator(const int model_type) {
|
||||
return new MoveNet(model_type);
|
||||
}
|
||||
}
|
||||
|
@@ -24,5 +24,12 @@ public:
|
||||
~UltralightEstimatorFactory() {}
|
||||
Estimator* CreateEstimator();
|
||||
};
|
||||
|
||||
class MoveNetFactory: public EstimatorFactory {
|
||||
public:
|
||||
MoveNetFactory(const int model_type) {}
|
||||
~MoveNetFactory() {}
|
||||
Estimator* CreateEstimator(const int model_type);
|
||||
};
|
||||
}
|
||||
#endif // !_POSE_ESTIMATOR_H
|
||||
|
157
src/pose/estimator/movenet/movenet.cpp
Normal file
157
src/pose/estimator/movenet/movenet.cpp
Normal file
@@ -0,0 +1,157 @@
|
||||
#include "movenet.hpp"
|
||||
#include <string>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef OV_VULKAN
|
||||
#include "gpu.h"
|
||||
#endif // OV_VULKAN
|
||||
|
||||
namespace ovpose {
|
||||
|
||||
MoveNet::MoveNet(const int model_type) : Estimator() {
|
||||
if (model_type == 0) {
|
||||
target_size = 192;
|
||||
kpt_scale = 0.02083333395421505;
|
||||
feature_size = 48;
|
||||
} else {
|
||||
target_size = 256;
|
||||
kpt_scale = 0.015625;
|
||||
feature_size = 64;
|
||||
}
|
||||
for (int i = 0; i < feature_size; i++)
|
||||
{
|
||||
std::vector<float> x, y;
|
||||
for (int j = 0; j < feature_size; j++)
|
||||
{
|
||||
x.push_back(j);
|
||||
y.push_back(i);
|
||||
}
|
||||
dist_y.push_back(y);
|
||||
dist_x.push_back(x);
|
||||
}
|
||||
}
|
||||
|
||||
int MoveNet::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
int img_width, int img_height,
|
||||
const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) {
|
||||
keypoints->clear();
|
||||
int w = rect.width;
|
||||
int h = rect.height;
|
||||
float scale = 1.f;
|
||||
if (w > h)
|
||||
{
|
||||
scale = (float)target_size / w;
|
||||
w = target_size;
|
||||
h = h * scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = (float)target_size / h;
|
||||
h = target_size;
|
||||
w = w * scale;
|
||||
}
|
||||
|
||||
size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
|
||||
unsigned char* data = (unsigned char*)malloc(total_size);
|
||||
const unsigned char *start_ptr = rgbdata;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for num_threads(num_threads)
|
||||
#endif
|
||||
for(size_t i = 0; i < rect.height; ++i) {
|
||||
const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3;
|
||||
unsigned char* dstCursor = data + i * rect.width * 3;
|
||||
memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
|
||||
}
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, rect.width, rect.height, w, h);
|
||||
int wpad = target_size - w;
|
||||
int hpad = target_size - h;
|
||||
ncnn::Mat in_pad;
|
||||
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
|
||||
//数据预处理
|
||||
in_pad.substract_mean_normalize(mean_vals, norm_vals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
|
||||
ex.input("input", in_pad);
|
||||
|
||||
ncnn::Mat regress, center, heatmap, offset;
|
||||
|
||||
ex.extract("regress", regress);
|
||||
ex.extract("offset", offset);
|
||||
ex.extract("heatmap", heatmap);
|
||||
ex.extract("center", center);
|
||||
|
||||
float* center_data = (float*)center.data;
|
||||
float* heatmap_data = (float*)heatmap.data;
|
||||
float* offset_data = (float*)offset.data;
|
||||
|
||||
// int top_index = 0;
|
||||
// float top_score = 0;
|
||||
|
||||
int top_index = int(ov::argmax(center_data, center_data+center.h));
|
||||
float top_score = *std::max_element(center_data, center_data + center.h);
|
||||
|
||||
int ct_y = (top_index / feature_size);
|
||||
int ct_x = top_index - ct_y * feature_size;
|
||||
|
||||
std::vector<float> y_regress(num_joints), x_regress(num_joints);
|
||||
float* regress_data = (float*)regress.channel(ct_y).row(ct_x);
|
||||
for (size_t i = 0; i < num_joints; i++)
|
||||
{
|
||||
y_regress[i] = regress_data[i] + (float)ct_y;
|
||||
x_regress[i] = regress_data[i + num_joints] + (float)ct_x;
|
||||
}
|
||||
|
||||
ncnn::Mat kpt_scores = ncnn::Mat(feature_size * feature_size, num_joints, sizeof(float));
|
||||
float* scores_data = (float*)kpt_scores.data;
|
||||
for (int i = 0; i < feature_size; i++)
|
||||
{
|
||||
for (int j = 0; j < feature_size; j++)
|
||||
{
|
||||
std::vector<float> score;
|
||||
for (int c = 0; c < num_joints; c++)
|
||||
{
|
||||
float y = (dist_y[i][j] - y_regress[c]) * (dist_y[i][j] - y_regress[c]);
|
||||
float x = (dist_x[i][j] - x_regress[c]) * (dist_x[i][j] - x_regress[c]);
|
||||
float dist_weight = sqrt(y + x) + 1.8;
|
||||
scores_data[c* feature_size * feature_size +i* feature_size +j] = heatmap_data[i * feature_size * num_joints + j * num_joints + c] / dist_weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<int> kpts_ys, kpts_xs;
|
||||
for (int i = 0; i < num_joints; i++)
|
||||
{
|
||||
// top_index = 0;
|
||||
// top_score = 0;
|
||||
top_index = int(ov::argmax(scores_data + feature_size * feature_size *i, scores_data + feature_size * feature_size *(i+1)));
|
||||
top_score = *std::max_element(scores_data + feature_size * feature_size * i, scores_data + feature_size * feature_size * (i + 1));
|
||||
|
||||
int top_y = (top_index / feature_size);
|
||||
int top_x = top_index - top_y * feature_size;
|
||||
kpts_ys.push_back(top_y);
|
||||
kpts_xs.push_back(top_x);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_joints; i++)
|
||||
{
|
||||
float kpt_offset_x = offset_data[kpts_ys[i] * feature_size * num_joints*2 + kpts_xs[i] * num_joints * 2 + i * 2];
|
||||
float kpt_offset_y = offset_data[kpts_ys[i] * feature_size * num_joints * 2 + kpts_xs[i] * num_joints * 2 + i * 2+1];
|
||||
|
||||
float x = (kpts_xs[i] + kpt_offset_y) * kpt_scale * target_size;
|
||||
float y = (kpts_ys[i] + kpt_offset_x) * kpt_scale * target_size;
|
||||
|
||||
ov::Keypoint kpt;
|
||||
kpt.p = ov::Point2f((x - ((float)wpad / 2)) / scale + rect.x, (y - ((float)hpad / 2)) / scale + rect.y);
|
||||
kpt.score = heatmap_data[kpts_ys[i] * feature_size * num_joints + kpts_xs[i] * num_joints + i];
|
||||
keypoints->push_back(kpt);
|
||||
|
||||
}
|
||||
|
||||
free(data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
31
src/pose/estimator/movenet/movenet.hpp
Normal file
31
src/pose/estimator/movenet/movenet.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef _POSE_MOVENET_ESTIMATOR_H_
|
||||
#define _POSE_MOVENET_ESTIMATOR_H_
|
||||
|
||||
#include "../estimator.hpp"
|
||||
#include <vector>
|
||||
#include "net.h"
|
||||
|
||||
namespace ovpose {
|
||||
|
||||
class MoveNet: public Estimator {
|
||||
public:
|
||||
MoveNet(const int model_type); // 0: ligntning, 1: thunder
|
||||
int ExtractKeypoints(const unsigned char* rgbdata,
|
||||
int img_width, int img_height,
|
||||
const ov::Rect& rect,
|
||||
std::vector<ov::Keypoint>* keypoints);
|
||||
|
||||
private:
|
||||
int feature_size;
|
||||
float kpt_scale;
|
||||
int target_size;
|
||||
const int num_joints = 17;
|
||||
const float mean_vals[3] = {127.5f, 127.5f, 127.5f};
|
||||
const float norm_vals[3] = {1/ 127.5f, 1 / 127.5f, 1 / 127.5f};
|
||||
std::vector<std::vector<float>> dist_y, dist_x;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // !_POSE_MOVENET_ESTIMATOR_H_
|
||||
|
@@ -13,6 +13,7 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
keypoints->clear();
|
||||
int w = rect.width;
|
||||
int h = rect.height;
|
||||
|
||||
size_t total_size = w * h * 3 * sizeof(unsigned char);
|
||||
unsigned char* data = (unsigned char*)malloc(total_size);
|
||||
const unsigned char *start_ptr = rgbdata;
|
||||
@@ -29,6 +30,8 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata,
|
||||
in.substract_mean_normalize(meanVals, normVals);
|
||||
|
||||
ncnn::Extractor ex = net_->create_extractor();
|
||||
ex.set_light_mode(light_mode_);
|
||||
ex.set_num_threads(num_threads);
|
||||
ex.input("data", in);
|
||||
ncnn::Mat out;
|
||||
ex.extract("hybridsequential0_conv7_fwd", out);
|
||||
|
Reference in New Issue
Block a user