diff --git a/README.md b/README.md index 2f20778..0813a9f 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM ## Features - face + - aligner (for face keypoints alignment) - detecter (for face location and keypoints detection) - mtcnn [Google Drive](https://drive.google.com/drive/folders/14ToHyDXZr4Ihuk8WYp1mVS7QnVxnzEjn?usp=sharing) - centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing) @@ -38,6 +39,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM - openpose [Google Drive](https://drive.google.com/drive/folders/1Q2mq7dOE-eHsvu4BYpBaWVLkU5roKsm5?usp=sharing) - estimator (for pose estimation) - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing) + - movenet [Google Drive](https://drive.google.com/drive/folders/14zgKk0tro1kjRrSTs0EAlEKrV8Q4XA34?usp=sharing) - hand - detector (for hand detect) - yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing) diff --git a/go/common/estimator.go b/go/common/estimator.go index 0d4529a..78598d3 100644 --- a/go/common/estimator.go +++ b/go/common/estimator.go @@ -24,6 +24,11 @@ func SetEstimatorThreads(e Estimator, n int) { C.set_num_threads((C.IEstimator)(e.Pointer()), C.int(n)) } +// SetEstimatorLightMode set ncnn net opt.lightmode +func SetEstimatorLightMode(e Estimator, mode bool) { + C.set_light_mode((C.IEstimator)(e.Pointer()), C.bool(mode)) +} + // DestroyEstimator destory an Estimator func DestroyEstimator(e Estimator) { C.destroy_estimator((C.IEstimator)(e.Pointer())) diff --git a/go/examples/pose/main.go b/go/examples/pose/main.go index 073feff..bc02d53 100644 --- a/go/examples/pose/main.go +++ b/go/examples/pose/main.go @@ -35,6 +35,8 @@ func main() { common.SetEstimatorThreads(d, cpuCores) for mid, m := range []estimator.Estimator{ ultralightEstimator(modelPath), + moveNetEstimator(modelPath, estimator.MoveNetType_Lightning), + moveNetEstimator(modelPath, estimator.MoveNetType_Thunder), } { defer m.Destroy() common.SetEstimatorThreads(d, cpuCores) @@ -70,6 +72,19 @@ func ultralightEstimator(modelPath string) estimator.Estimator { return d } +func moveNetEstimator(modelPath string, modelType estimator.MoveNetType) estimator.Estimator { + if modelType == estimator.MoveNetType_Lightning { + modelPath = filepath.Join(modelPath, "movenet/lightning") + } else { + modelPath = filepath.Join(modelPath, "movenet/thunder") + } + d := estimator.NewMoveNet(modelType) + if err := d.LoadModel(modelPath); err != nil { + log.Fatalln(err) + } + return d +} + func detect(d detecter.Detecter, m estimator.Estimator, imgPath string, filename string, did int, mid int) { inPath := filepath.Join(imgPath, filename) imgSrc, err := loadImage(inPath) diff --git a/go/pose/estimator/movenet.go b/go/pose/estimator/movenet.go new file mode 100644 index 0000000..b6014e0 --- /dev/null +++ b/go/pose/estimator/movenet.go @@ -0,0 +1,55 @@ +package estimator + +/* +#include +#include +#include "openvision/pose/estimator.h" +*/ +import "C" +import ( + "unsafe" + + "github.com/bububa/openvision/go/common" +) + +// MoveNetType (lightning/thunder) +type MoveNetType = int + +const ( + // MoveNetType_Lightning lightning model + MoveNetType_Lightning MoveNetType = 0 + // MoveNetType_Thunder thunder model + MoveNetType_Thunder MoveNetType = 1 +) + +// MoveNet represents movenet estimator +type MoveNet struct { + d C.IPoseEstimator +} + +// NewMoveNet returns a new MoveNet +func NewMoveNet(modelType MoveNetType) *MoveNet { + return &MoveNet{ + d: C.new_movenet(C.int(modelType)), + } +} + +// Destroy free detecter +func (d *MoveNet) Destroy() { + common.DestroyEstimator(d) +} + +// Pointer implement Estimator interface +func (d *MoveNet) Pointer() unsafe.Pointer { + return unsafe.Pointer(d.d) +} + +// LoadModel load model for detecter +func (d *MoveNet) LoadModel(modelPath string) error { + return common.EstimatorLoadModel(d, modelPath) +} + +// ExtractKeypoints implement Detecter interface +func (d *MoveNet) ExtractKeypoints(img *common.Image, rect common.Rectangle) ([]common.Keypoint, error) { + return ExtractKeypoints(d, img, rect) +} diff --git a/src/common/common.cpp b/src/common/common.cpp index 1accbec..6c1b6ee 100644 --- a/src/common/common.cpp +++ b/src/common/common.cpp @@ -51,6 +51,10 @@ void set_num_threads(IEstimator d, int n) { static_cast(d)->set_num_threads(n); } +void set_light_mode(IEstimator d, bool mode) { + static_cast(d)->set_light_mode(mode); +} + void FreePoint2fVector(Point2fVector* p) { if (p->points != NULL) { free(p->points); @@ -167,6 +171,13 @@ void Estimator::set_num_threads(int n) { } } +void Estimator::set_light_mode(bool mode) { + if (net_) { + net_->opt.lightmode = mode; + light_mode_ = mode; + } +} + int RatioAnchors(const Rect & anchor, const std::vector& ratios, std::vector* anchors, int threads_num) { diff --git a/src/common/common.h b/src/common/common.h index 5b6e58b..1786397 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -68,6 +68,7 @@ void set_omp_num_threads(int n); int load_model(IEstimator e, const char* root_path); void destroy_estimator(IEstimator e); void set_num_threads(IEstimator e, int n); +void set_light_mode(IEstimator e, bool mode); typedef struct Point2fVector { Point2f* points; diff --git a/src/common/common.hpp b/src/common/common.hpp index f94b1bb..a9af502 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -27,9 +27,11 @@ public: virtual ~Estimator(); virtual int LoadModel(const char* root_path); virtual void set_num_threads(int n); + virtual void set_light_mode(bool mode); protected: ncnn::Net* net_; bool initialized_ = false; + bool light_mode_ = true; }; // Wrapper for an individual cv::cvSize @@ -240,6 +242,11 @@ float sigmoid(float x); void EnlargeRect(const float& scale, Rect* rect); void RectifyRect(Rect* rect); +template +inline static size_t argmax(ForwardIterator first, ForwardIterator last) { + return std::distance(first, std::max_element(first, last)); +}; + } #endif // !_COMMON_H_ diff --git a/src/face/detecter/anticonv/anticonv.cpp b/src/face/detecter/anticonv/anticonv.cpp index 7bbc90c..7f47779 100644 --- a/src/face/detecter/anticonv/anticonv.cpp +++ b/src/face/detecter/anticonv/anticonv.cpp @@ -42,9 +42,12 @@ int AntiConv::DetectFace(const unsigned char* rgbdata, float factor_x = static_cast(img_width) / inputSize_.width; float factor_y = static_cast(img_height) / inputSize_.height; - ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); std::vector faces_tmp; diff --git a/src/face/detecter/centerface/centerface.cpp b/src/face/detecter/centerface/centerface.cpp index 6bc61ac..54fe10f 100644 --- a/src/face/detecter/centerface/centerface.cpp +++ b/src/face/detecter/centerface/centerface.cpp @@ -24,7 +24,10 @@ int CenterFace::DetectFace(const unsigned char* rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, img_width_new, img_height_new); + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("input.1", in); ncnn::Mat mat_heatmap, mat_scale, mat_offset, mat_landmark; ex.extract("537", mat_heatmap); diff --git a/src/face/detecter/mtcnn/mtcnn.cpp b/src/face/detecter/mtcnn/mtcnn.cpp index fe7d1e9..e713689 100644 --- a/src/face/detecter/mtcnn/mtcnn.cpp +++ b/src/face/detecter/mtcnn/mtcnn.cpp @@ -122,8 +122,9 @@ int Mtcnn::PDetect(const ncnn::Mat & img_in, ncnn::Mat img_resized; ncnn::resize_bilinear(img_in, img_resized, w, h); ncnn::Extractor ex = pnet_->create_extractor(); - //ex.set_num_threads(2); - ex.set_light_mode(true); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("data", img_resized); ncnn::Mat score_mat, location_mat; ex.extract("prob1", score_mat); @@ -177,8 +178,8 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in, ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x); ncnn::resize_bilinear(img_face, img_resized, 24, 24); ncnn::Extractor ex = rnet_->create_extractor(); - ex.set_light_mode(true); - // ex.set_num_threads(2); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", img_resized); ncnn::Mat score_mat, location_mat; ex.extract("prob1", score_mat); @@ -214,8 +215,8 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in, ncnn::resize_bilinear(img_face, img_resized, 48, 48); ncnn::Extractor ex = onet_->create_extractor(); - ex.set_light_mode(true); - // ex.set_num_threads(2); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", img_resized); ncnn::Mat score_mat, location_mat, keypoints_mat; ex.extract("prob1", score_mat); diff --git a/src/face/detecter/retinaface/retinaface.cpp b/src/face/detecter/retinaface/retinaface.cpp index 50a03df..213f293 100644 --- a/src/face/detecter/retinaface/retinaface.cpp +++ b/src/face/detecter/retinaface/retinaface.cpp @@ -42,9 +42,12 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata, float factor_x = static_cast(img_width) / inputSize_.width; float factor_y = static_cast(img_height) / inputSize_.height; - ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); std::vector faces_tmp; diff --git a/src/face/detecter/scrfd/scrfd.cpp b/src/face/detecter/scrfd/scrfd.cpp index 00a7931..fd521fe 100644 --- a/src/face/detecter/scrfd/scrfd.cpp +++ b/src/face/detecter/scrfd/scrfd.cpp @@ -128,6 +128,8 @@ int Scrfd::DetectFace(const unsigned char* rgbdata, in_pad.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("input.1", in_pad); diff --git a/src/face/detecter/yoloface/yoloface.cpp b/src/face/detecter/yoloface/yoloface.cpp index 98f3b33..8f07af6 100644 --- a/src/face/detecter/yoloface/yoloface.cpp +++ b/src/face/detecter/yoloface/yoloface.cpp @@ -154,7 +154,8 @@ int YoloFace::DetectFace(const unsigned char* rgbdata, in_pad.substract_mean_normalize(0, norm_vals); ncnn::Extractor ex = net_->create_extractor(); - + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in_pad); std::vector proposals; diff --git a/src/face/hopenet/hopenet.cpp b/src/face/hopenet/hopenet.cpp index e773897..381e712 100644 --- a/src/face/hopenet/hopenet.cpp +++ b/src/face/hopenet/hopenet.cpp @@ -56,6 +56,8 @@ int Hopenet::Detect(const unsigned char* rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB2GRAY, roi.width, roi.height, 48, 48); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat output; diff --git a/src/face/landmarker/insightface/insightface.cpp b/src/face/landmarker/insightface/insightface.cpp index 96da4b1..cac54ed 100644 --- a/src/face/landmarker/insightface/insightface.cpp +++ b/src/face/landmarker/insightface/insightface.cpp @@ -40,11 +40,13 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata, unsigned char* dstCursor = img_face + i * face_enlarged.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face_enlarged.width); } + ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, + ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192); // 4 do inference ncnn::Extractor ex = net_->create_extractor(); - ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, - ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat out; ex.extract("fc1", out); diff --git a/src/face/landmarker/scrfd/scrfd.cpp b/src/face/landmarker/scrfd/scrfd.cpp index dcbc107..0f2b862 100644 --- a/src/face/landmarker/scrfd/scrfd.cpp +++ b/src/face/landmarker/scrfd/scrfd.cpp @@ -39,9 +39,12 @@ int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata, memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width); } - ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192); ncnn_in.substract_mean_normalize(means, norms); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("input.1",ncnn_in); ncnn::Mat ncnn_out; ex.extract("482",ncnn_out); diff --git a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp index 3be01c3..ba74f5a 100644 --- a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp +++ b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp @@ -29,10 +29,13 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata, unsigned char* dstCursor = img_face + i * face.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width); } - ncnn::Extractor ex = net_->create_extractor(); ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112); in.substract_mean_normalize(meanVals, normVals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat out; ex.extract("bn6_3", out); diff --git a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp index 51440d1..489fd11 100644 --- a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp +++ b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp @@ -31,11 +31,14 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata, ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112); - feature->resize(kFaceFeatureDim); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat out; ex.extract("fc1", out); + + feature->resize(kFaceFeatureDim); for (int i = 0; i < kFaceFeatureDim; ++i) { feature->at(i) = out[i]; } diff --git a/src/hand/detecter/nanodet/nanodet.cpp b/src/hand/detecter/nanodet/nanodet.cpp index 9171910..2c48cd7 100644 --- a/src/hand/detecter/nanodet/nanodet.cpp +++ b/src/hand/detecter/nanodet/nanodet.cpp @@ -143,7 +143,8 @@ int Nanodet::Detect(const unsigned char* rgbdata, in_pad.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = net_->create_extractor(); - //__android_log_print(ANDROID_LOG_WARN, "ncnn","input w:%d,h:%d",in_pad.w,in_pad.h); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("input.1", in_pad); std::vector proposals; diff --git a/src/hand/detecter/yolox/yolox.cpp b/src/hand/detecter/yolox/yolox.cpp index 6021a0c..78e2ec7 100644 --- a/src/hand/detecter/yolox/yolox.cpp +++ b/src/hand/detecter/yolox/yolox.cpp @@ -97,6 +97,8 @@ int Yolox::Detect(const unsigned char* rgbdata, in_pad.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("input", in_pad); ncnn::Mat out; ex.extract("output", out); diff --git a/src/hand/pose/handpose/handpose.cpp b/src/hand/pose/handpose/handpose.cpp index 2ddc6af..12613b6 100644 --- a/src/hand/pose/handpose/handpose.cpp +++ b/src/hand/pose/handpose/handpose.cpp @@ -32,10 +32,12 @@ int HandPose::Detect(const unsigned char* rgbdata, const float meanVals[3] = { 128.0f, 128.0f, 128.0f }; const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f }; ncnn_in.substract_mean_normalize(meanVals, normVals); - ncnn::Extractor ex1 = net_->create_extractor(); - ex1.input("input", ncnn_in); + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + ex.input("input", ncnn_in); ncnn::Mat ncnn_out; - ex1.extract("output", ncnn_out); + ex.extract("output", ncnn_out); keypoints.resize(21); for (int c = 0; c < ncnn_out.c; c++) diff --git a/src/pose/detecter/openpose/openpose.cpp b/src/pose/detecter/openpose/openpose.cpp index d131789..9245a24 100644 --- a/src/pose/detecter/openpose/openpose.cpp +++ b/src/pose/detecter/openpose/openpose.cpp @@ -51,6 +51,8 @@ int OpenPose::Detect(const unsigned char* rgbdata, ncnn::Mat pafs; ncnn::Mat heatmaps; ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ex.extract("stage_1_output_1_heatmaps", heatmaps); // or stage_0_output_1_heatmaps ex.extract("stage_1_output_0_pafs", pafs); // or stage_0_output_0_pafs diff --git a/src/pose/detecter/ultralight/ultralight.cpp b/src/pose/detecter/ultralight/ultralight.cpp index 7f9de64..1855255 100644 --- a/src/pose/detecter/ultralight/ultralight.cpp +++ b/src/pose/detecter/ultralight/ultralight.cpp @@ -22,6 +22,8 @@ int Ultralight::Detect(const unsigned char* rgbdata, in.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat out; ex.extract("output", out); diff --git a/src/pose/estimator.h b/src/pose/estimator.h index a1e519d..63d5890 100644 --- a/src/pose/estimator.h +++ b/src/pose/estimator.h @@ -9,6 +9,7 @@ extern "C" { #endif typedef void* IPoseEstimator; IPoseEstimator new_ultralight_estimator(); + IPoseEstimator new_movenet(const int model_type); int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, KeypointVector* keypoints); diff --git a/src/pose/estimator/estimator.cpp b/src/pose/estimator/estimator.cpp index f759ef4..15cac73 100644 --- a/src/pose/estimator/estimator.cpp +++ b/src/pose/estimator/estimator.cpp @@ -1,10 +1,15 @@ #include "../estimator.h" #include "ultralight/ultralight.hpp" +#include "movenet/movenet.hpp" IPoseEstimator new_ultralight_estimator() { return new ovpose::UltralightEstimator(); } +IPoseEstimator new_movenet(const int model_type) { + return new ovpose::MoveNet(model_type); +} + int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, KeypointVector* keypoints) { std::vector points; int ret = static_cast(d)->ExtractKeypoints(rgbdata, img_width, img_height, *rect, &points); @@ -24,4 +29,8 @@ namespace ovpose { Estimator* UltralightEstimatorFactory::CreateEstimator() { return new UltralightEstimator(); } + +Estimator* MoveNetFactory::CreateEstimator(const int model_type) { + return new MoveNet(model_type); +} } diff --git a/src/pose/estimator/estimator.hpp b/src/pose/estimator/estimator.hpp index e113328..a2adb5b 100644 --- a/src/pose/estimator/estimator.hpp +++ b/src/pose/estimator/estimator.hpp @@ -24,5 +24,12 @@ public: ~UltralightEstimatorFactory() {} Estimator* CreateEstimator(); }; + +class MoveNetFactory: public EstimatorFactory { +public: + MoveNetFactory(const int model_type) {} + ~MoveNetFactory() {} + Estimator* CreateEstimator(const int model_type); +}; } #endif // !_POSE_ESTIMATOR_H diff --git a/src/pose/estimator/movenet/movenet.cpp b/src/pose/estimator/movenet/movenet.cpp new file mode 100644 index 0000000..c870666 --- /dev/null +++ b/src/pose/estimator/movenet/movenet.cpp @@ -0,0 +1,157 @@ +#include "movenet.hpp" +#include +#include + +#ifdef OV_VULKAN +#include "gpu.h" +#endif // OV_VULKAN + +namespace ovpose { + +MoveNet::MoveNet(const int model_type) : Estimator() { + if (model_type == 0) { + target_size = 192; + kpt_scale = 0.02083333395421505; + feature_size = 48; + } else { + target_size = 256; + kpt_scale = 0.015625; + feature_size = 64; + } + for (int i = 0; i < feature_size; i++) + { + std::vector x, y; + for (int j = 0; j < feature_size; j++) + { + x.push_back(j); + y.push_back(i); + } + dist_y.push_back(y); + dist_x.push_back(x); + } +} + +int MoveNet::ExtractKeypoints(const unsigned char* rgbdata, + int img_width, int img_height, + const ov::Rect& rect, std::vector* keypoints) { + keypoints->clear(); + int w = rect.width; + int h = rect.height; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char); + unsigned char* data = (unsigned char*)malloc(total_size); + const unsigned char *start_ptr = rgbdata; +#if defined(_OPENMP) +#pragma omp parallel for num_threads(num_threads) +#endif + for(size_t i = 0; i < rect.height; ++i) { + const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3; + unsigned char* dstCursor = data + i * rect.width * 3; + memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width); + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, rect.width, rect.height, w, h); + int wpad = target_size - w; + int hpad = target_size - h; + ncnn::Mat in_pad; + ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f); + //数据预处理 + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); + + ex.input("input", in_pad); + + ncnn::Mat regress, center, heatmap, offset; + + ex.extract("regress", regress); + ex.extract("offset", offset); + ex.extract("heatmap", heatmap); + ex.extract("center", center); + + float* center_data = (float*)center.data; + float* heatmap_data = (float*)heatmap.data; + float* offset_data = (float*)offset.data; + + // int top_index = 0; + // float top_score = 0; + + int top_index = int(ov::argmax(center_data, center_data+center.h)); + float top_score = *std::max_element(center_data, center_data + center.h); + + int ct_y = (top_index / feature_size); + int ct_x = top_index - ct_y * feature_size; + + std::vector y_regress(num_joints), x_regress(num_joints); + float* regress_data = (float*)regress.channel(ct_y).row(ct_x); + for (size_t i = 0; i < num_joints; i++) + { + y_regress[i] = regress_data[i] + (float)ct_y; + x_regress[i] = regress_data[i + num_joints] + (float)ct_x; + } + + ncnn::Mat kpt_scores = ncnn::Mat(feature_size * feature_size, num_joints, sizeof(float)); + float* scores_data = (float*)kpt_scores.data; + for (int i = 0; i < feature_size; i++) + { + for (int j = 0; j < feature_size; j++) + { + std::vector score; + for (int c = 0; c < num_joints; c++) + { + float y = (dist_y[i][j] - y_regress[c]) * (dist_y[i][j] - y_regress[c]); + float x = (dist_x[i][j] - x_regress[c]) * (dist_x[i][j] - x_regress[c]); + float dist_weight = sqrt(y + x) + 1.8; + scores_data[c* feature_size * feature_size +i* feature_size +j] = heatmap_data[i * feature_size * num_joints + j * num_joints + c] / dist_weight; + } + } + } + std::vector kpts_ys, kpts_xs; + for (int i = 0; i < num_joints; i++) + { + // top_index = 0; + // top_score = 0; + top_index = int(ov::argmax(scores_data + feature_size * feature_size *i, scores_data + feature_size * feature_size *(i+1))); + top_score = *std::max_element(scores_data + feature_size * feature_size * i, scores_data + feature_size * feature_size * (i + 1)); + + int top_y = (top_index / feature_size); + int top_x = top_index - top_y * feature_size; + kpts_ys.push_back(top_y); + kpts_xs.push_back(top_x); + } + + for (int i = 0; i < num_joints; i++) + { + float kpt_offset_x = offset_data[kpts_ys[i] * feature_size * num_joints*2 + kpts_xs[i] * num_joints * 2 + i * 2]; + float kpt_offset_y = offset_data[kpts_ys[i] * feature_size * num_joints * 2 + kpts_xs[i] * num_joints * 2 + i * 2+1]; + + float x = (kpts_xs[i] + kpt_offset_y) * kpt_scale * target_size; + float y = (kpts_ys[i] + kpt_offset_x) * kpt_scale * target_size; + + ov::Keypoint kpt; + kpt.p = ov::Point2f((x - ((float)wpad / 2)) / scale + rect.x, (y - ((float)hpad / 2)) / scale + rect.y); + kpt.score = heatmap_data[kpts_ys[i] * feature_size * num_joints + kpts_xs[i] * num_joints + i]; + keypoints->push_back(kpt); + + } + + free(data); + return 0; +} + +} diff --git a/src/pose/estimator/movenet/movenet.hpp b/src/pose/estimator/movenet/movenet.hpp new file mode 100644 index 0000000..058561b --- /dev/null +++ b/src/pose/estimator/movenet/movenet.hpp @@ -0,0 +1,31 @@ +#ifndef _POSE_MOVENET_ESTIMATOR_H_ +#define _POSE_MOVENET_ESTIMATOR_H_ + +#include "../estimator.hpp" +#include +#include "net.h" + +namespace ovpose { + +class MoveNet: public Estimator { +public: + MoveNet(const int model_type); // 0: ligntning, 1: thunder + int ExtractKeypoints(const unsigned char* rgbdata, + int img_width, int img_height, + const ov::Rect& rect, + std::vector* keypoints); + +private: + int feature_size; + float kpt_scale; + int target_size; + const int num_joints = 17; + const float mean_vals[3] = {127.5f, 127.5f, 127.5f}; + const float norm_vals[3] = {1/ 127.5f, 1 / 127.5f, 1 / 127.5f}; + std::vector> dist_y, dist_x; +}; + +} + +#endif // !_POSE_MOVENET_ESTIMATOR_H_ + diff --git a/src/pose/estimator/ultralight/ultralight.cpp b/src/pose/estimator/ultralight/ultralight.cpp index a1dbf8a..44b3680 100644 --- a/src/pose/estimator/ultralight/ultralight.cpp +++ b/src/pose/estimator/ultralight/ultralight.cpp @@ -13,6 +13,7 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata, keypoints->clear(); int w = rect.width; int h = rect.height; + size_t total_size = w * h * 3 * sizeof(unsigned char); unsigned char* data = (unsigned char*)malloc(total_size); const unsigned char *start_ptr = rgbdata; @@ -29,6 +30,8 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata, in.substract_mean_normalize(meanVals, normVals); ncnn::Extractor ex = net_->create_extractor(); + ex.set_light_mode(light_mode_); + ex.set_num_threads(num_threads); ex.input("data", in); ncnn::Mat out; ex.extract("hybridsequential0_conv7_fwd", out);