diff --git a/README.md b/README.md
index 2f20778..0813a9f 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
 ## Features
 
 - face
+  - aligner (for face keypoints alignment)
   - detecter (for face location and keypoints detection)
     - mtcnn [Google Drive](https://drive.google.com/drive/folders/14ToHyDXZr4Ihuk8WYp1mVS7QnVxnzEjn?usp=sharing)
     - centerface [Google Drive](https://drive.google.com/drive/folders/1xMhO6aCnkkjt90Fh8BxVD_JHB3QJ2q-q?usp=sharing)
@@ -38,6 +39,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
     - openpose [Google Drive](https://drive.google.com/drive/folders/1Q2mq7dOE-eHsvu4BYpBaWVLkU5roKsm5?usp=sharing)
   - estimator (for pose estimation)
     - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing)
+    - movenet [Google Drive](https://drive.google.com/drive/folders/14zgKk0tro1kjRrSTs0EAlEKrV8Q4XA34?usp=sharing)
 - hand
   - detector (for hand detect)
     - yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing)
diff --git a/go/common/estimator.go b/go/common/estimator.go
index 0d4529a..78598d3 100644
--- a/go/common/estimator.go
+++ b/go/common/estimator.go
@@ -24,6 +24,11 @@ func SetEstimatorThreads(e Estimator, n int) {
 	C.set_num_threads((C.IEstimator)(e.Pointer()), C.int(n))
 }
 
+// SetEstimatorLightMode set ncnn net opt.lightmode
+func SetEstimatorLightMode(e Estimator, mode bool) {
+	C.set_light_mode((C.IEstimator)(e.Pointer()), C.bool(mode))
+}
+
 // DestroyEstimator destory an Estimator
 func DestroyEstimator(e Estimator) {
 	C.destroy_estimator((C.IEstimator)(e.Pointer()))
diff --git a/go/examples/pose/main.go b/go/examples/pose/main.go
index 073feff..bc02d53 100644
--- a/go/examples/pose/main.go
+++ b/go/examples/pose/main.go
@@ -35,6 +35,8 @@ func main() {
 		common.SetEstimatorThreads(d, cpuCores)
 		for mid, m := range []estimator.Estimator{
 			ultralightEstimator(modelPath),
+			moveNetEstimator(modelPath, estimator.MoveNetType_Lightning),
+			moveNetEstimator(modelPath, estimator.MoveNetType_Thunder),
 		} {
 			defer m.Destroy()
 			common.SetEstimatorThreads(d, cpuCores)
@@ -70,6 +72,19 @@ func ultralightEstimator(modelPath string) estimator.Estimator {
 	return d
 }
 
+func moveNetEstimator(modelPath string, modelType estimator.MoveNetType) estimator.Estimator {
+	if modelType == estimator.MoveNetType_Lightning {
+		modelPath = filepath.Join(modelPath, "movenet/lightning")
+	} else {
+		modelPath = filepath.Join(modelPath, "movenet/thunder")
+	}
+	d := estimator.NewMoveNet(modelType)
+	if err := d.LoadModel(modelPath); err != nil {
+		log.Fatalln(err)
+	}
+	return d
+}
+
 func detect(d detecter.Detecter, m estimator.Estimator, imgPath string, filename string, did int, mid int) {
 	inPath := filepath.Join(imgPath, filename)
 	imgSrc, err := loadImage(inPath)
diff --git a/go/pose/estimator/movenet.go b/go/pose/estimator/movenet.go
new file mode 100644
index 0000000..b6014e0
--- /dev/null
+++ b/go/pose/estimator/movenet.go
@@ -0,0 +1,55 @@
+package estimator
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/pose/estimator.h"
+*/
+import "C"
+import (
+	"unsafe"
+
+	"github.com/bububa/openvision/go/common"
+)
+
+// MoveNetType (lightning/thunder)
+type MoveNetType = int
+
+const (
+	// MoveNetType_Lightning lightning model
+	MoveNetType_Lightning MoveNetType = 0
+	// MoveNetType_Thunder thunder model
+	MoveNetType_Thunder MoveNetType = 1
+)
+
+// MoveNet represents movenet estimator
+type MoveNet struct {
+	d C.IPoseEstimator
+}
+
+// NewMoveNet returns a new MoveNet
+func NewMoveNet(modelType MoveNetType) *MoveNet {
+	return &MoveNet{
+		d: C.new_movenet(C.int(modelType)),
+	}
+}
+
+// Destroy free detecter
+func (d *MoveNet) Destroy() {
+	common.DestroyEstimator(d)
+}
+
+// Pointer implement Estimator interface
+func (d *MoveNet) Pointer() unsafe.Pointer {
+	return unsafe.Pointer(d.d)
+}
+
+// LoadModel load model for detecter
+func (d *MoveNet) LoadModel(modelPath string) error {
+	return common.EstimatorLoadModel(d, modelPath)
+}
+
+// ExtractKeypoints implement Detecter interface
+func (d *MoveNet) ExtractKeypoints(img *common.Image, rect common.Rectangle) ([]common.Keypoint, error) {
+	return ExtractKeypoints(d, img, rect)
+}
diff --git a/src/common/common.cpp b/src/common/common.cpp
index 1accbec..6c1b6ee 100644
--- a/src/common/common.cpp
+++ b/src/common/common.cpp
@@ -51,6 +51,10 @@ void set_num_threads(IEstimator d, int n) {
     static_cast<ov::Estimator*>(d)->set_num_threads(n);
 }
 
+void set_light_mode(IEstimator d, bool mode) {
+    static_cast<ov::Estimator*>(d)->set_light_mode(mode);
+}
+
 void FreePoint2fVector(Point2fVector* p) {
     if (p->points != NULL) {
         free(p->points);
@@ -167,6 +171,13 @@ void Estimator::set_num_threads(int n) {
     }
 }
 
+void Estimator::set_light_mode(bool mode) {
+    if (net_) {
+        net_->opt.lightmode = mode;
+        light_mode_ = mode;
+    }
+}
+
 int RatioAnchors(const Rect & anchor,
 	const std::vector<float>& ratios, 
 	std::vector<Rect>* anchors, int threads_num) {
diff --git a/src/common/common.h b/src/common/common.h
index 5b6e58b..1786397 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -68,6 +68,7 @@ void set_omp_num_threads(int n);
 int load_model(IEstimator e, const char* root_path);
 void destroy_estimator(IEstimator e);
 void set_num_threads(IEstimator e, int n);
+void set_light_mode(IEstimator e, bool mode);
 
 typedef struct Point2fVector {
     Point2f* points;
diff --git a/src/common/common.hpp b/src/common/common.hpp
index f94b1bb..a9af502 100644
--- a/src/common/common.hpp
+++ b/src/common/common.hpp
@@ -27,9 +27,11 @@ public:
     virtual ~Estimator();
     virtual int LoadModel(const char* root_path);
     virtual void set_num_threads(int n);
+    virtual void set_light_mode(bool mode);
 protected:
     ncnn::Net* net_;
     bool initialized_ = false;
+    bool light_mode_ = true;
 };
 
 // Wrapper for an individual cv::cvSize
@@ -240,6 +242,11 @@ float sigmoid(float x);
 void EnlargeRect(const float& scale, Rect* rect);
 void RectifyRect(Rect* rect);
 
+template<class ForwardIterator>
+inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
+    return std::distance(first, std::max_element(first, last));
+};
+
 }
 
 #endif // !_COMMON_H_
diff --git a/src/face/detecter/anticonv/anticonv.cpp b/src/face/detecter/anticonv/anticonv.cpp
index 7bbc90c..7f47779 100644
--- a/src/face/detecter/anticonv/anticonv.cpp
+++ b/src/face/detecter/anticonv/anticonv.cpp
@@ -42,9 +42,12 @@ int AntiConv::DetectFace(const unsigned char* rgbdata,
 
 	float factor_x = static_cast<float>(img_width) / inputSize_.width;
 	float factor_y = static_cast<float>(img_height) / inputSize_.height;
-	ncnn::Extractor ex = net_->create_extractor();
 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
 		ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
+
+	ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("data", in);
 
 	std::vector<FaceInfo> faces_tmp;
diff --git a/src/face/detecter/centerface/centerface.cpp b/src/face/detecter/centerface/centerface.cpp
index 6bc61ac..54fe10f 100644
--- a/src/face/detecter/centerface/centerface.cpp
+++ b/src/face/detecter/centerface/centerface.cpp
@@ -24,7 +24,10 @@ int CenterFace::DetectFace(const unsigned char* rgbdata,
 
     ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB,
 		img_width, img_height, img_width_new, img_height_new);
+
 	ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("input.1", in);
 	ncnn::Mat mat_heatmap, mat_scale, mat_offset, mat_landmark;
 	ex.extract("537", mat_heatmap);
diff --git a/src/face/detecter/mtcnn/mtcnn.cpp b/src/face/detecter/mtcnn/mtcnn.cpp
index fe7d1e9..e713689 100644
--- a/src/face/detecter/mtcnn/mtcnn.cpp
+++ b/src/face/detecter/mtcnn/mtcnn.cpp
@@ -122,8 +122,9 @@ int Mtcnn::PDetect(const ncnn::Mat & img_in,
 		ncnn::Mat img_resized;
 		ncnn::resize_bilinear(img_in, img_resized, w, h);
 		ncnn::Extractor ex = pnet_->create_extractor();
-		//ex.set_num_threads(2);
-		ex.set_light_mode(true);
+        ex.set_light_mode(light_mode_);
+        ex.set_num_threads(num_threads);
+
 		ex.input("data", img_resized);
 		ncnn::Mat score_mat, location_mat;
 		ex.extract("prob1", score_mat);
@@ -177,8 +178,8 @@ int Mtcnn::RDetect(const ncnn::Mat & img_in,
 		ncnn::copy_cut_border(img_in, img_face, face.y, img_in.h - face.br().y, face.x, img_in.w - face.br().x);
 		ncnn::resize_bilinear(img_face, img_resized, 24, 24);
 		ncnn::Extractor ex = rnet_->create_extractor();
-		ex.set_light_mode(true);
-		// ex.set_num_threads(2);
+        ex.set_light_mode(light_mode_);
+        ex.set_num_threads(num_threads);
 		ex.input("data", img_resized);
 		ncnn::Mat score_mat, location_mat;
 		ex.extract("prob1", score_mat);
@@ -214,8 +215,8 @@ int Mtcnn::ODetect(const ncnn::Mat & img_in,
 		ncnn::resize_bilinear(img_face, img_resized, 48, 48);
 
 		ncnn::Extractor ex = onet_->create_extractor();
-		ex.set_light_mode(true);
-		// ex.set_num_threads(2);
+        ex.set_light_mode(light_mode_);
+        ex.set_num_threads(num_threads);
 		ex.input("data", img_resized);
 		ncnn::Mat score_mat, location_mat, keypoints_mat;
 		ex.extract("prob1", score_mat);
diff --git a/src/face/detecter/retinaface/retinaface.cpp b/src/face/detecter/retinaface/retinaface.cpp
index 50a03df..213f293 100644
--- a/src/face/detecter/retinaface/retinaface.cpp
+++ b/src/face/detecter/retinaface/retinaface.cpp
@@ -42,9 +42,12 @@ int RetinaFace::DetectFace(const unsigned char* rgbdata,
 
 	float factor_x = static_cast<float>(img_width) / inputSize_.width;
 	float factor_y = static_cast<float>(img_height) / inputSize_.height;
-	ncnn::Extractor ex = net_->create_extractor();
 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata,
 		ncnn::Mat::PIXEL_RGB, img_width, img_height, inputSize_.width, inputSize_.height);
+
+	ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("data", in);
 	
 	std::vector<FaceInfo> faces_tmp;
diff --git a/src/face/detecter/scrfd/scrfd.cpp b/src/face/detecter/scrfd/scrfd.cpp
index 00a7931..fd521fe 100644
--- a/src/face/detecter/scrfd/scrfd.cpp
+++ b/src/face/detecter/scrfd/scrfd.cpp
@@ -128,6 +128,8 @@ int Scrfd::DetectFace(const unsigned char* rgbdata,
     in_pad.substract_mean_normalize(mean_vals, norm_vals);
 
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 
     ex.input("input.1", in_pad);
 
diff --git a/src/face/detecter/yoloface/yoloface.cpp b/src/face/detecter/yoloface/yoloface.cpp
index 98f3b33..8f07af6 100644
--- a/src/face/detecter/yoloface/yoloface.cpp
+++ b/src/face/detecter/yoloface/yoloface.cpp
@@ -154,7 +154,8 @@ int YoloFace::DetectFace(const unsigned char* rgbdata,
     in_pad.substract_mean_normalize(0, norm_vals);
 
     ncnn::Extractor ex = net_->create_extractor();
-
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("data", in_pad);
 
     std::vector<ov::ObjectInfo> proposals;
diff --git a/src/face/hopenet/hopenet.cpp b/src/face/hopenet/hopenet.cpp
index e773897..381e712 100644
--- a/src/face/hopenet/hopenet.cpp
+++ b/src/face/hopenet/hopenet.cpp
@@ -56,6 +56,8 @@ int Hopenet::Detect(const unsigned char* rgbdata,
 
     ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB2GRAY, roi.width, roi.height, 48, 48);
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("data", in);
 
     ncnn::Mat output;
diff --git a/src/face/landmarker/insightface/insightface.cpp b/src/face/landmarker/insightface/insightface.cpp
index 96da4b1..cac54ed 100644
--- a/src/face/landmarker/insightface/insightface.cpp
+++ b/src/face/landmarker/insightface/insightface.cpp
@@ -40,11 +40,13 @@ int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
         unsigned char* dstCursor = img_face + i * face_enlarged.width * 3;
         memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face_enlarged.width);
     }
+	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
+		ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
 
 	// 4 do inference
 	ncnn::Extractor ex = net_->create_extractor();
-	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
-		ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("data", in);
 	ncnn::Mat out;
 	ex.extract("fc1", out);
diff --git a/src/face/landmarker/scrfd/scrfd.cpp b/src/face/landmarker/scrfd/scrfd.cpp
index dcbc107..0f2b862 100644
--- a/src/face/landmarker/scrfd/scrfd.cpp
+++ b/src/face/landmarker/scrfd/scrfd.cpp
@@ -39,9 +39,12 @@ int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
         memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width);
     }
     
-    ncnn::Extractor ex = net_->create_extractor();
     ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192);
     ncnn_in.substract_mean_normalize(means, norms);
+
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("input.1",ncnn_in);
     ncnn::Mat ncnn_out;
     ex.extract("482",ncnn_out);
diff --git a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp
index 3be01c3..ba74f5a 100644
--- a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp
+++ b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp
@@ -29,10 +29,13 @@ int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata,
         unsigned char* dstCursor = img_face + i * face.width * 3;
         memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width);
     }
-	ncnn::Extractor ex = net_->create_extractor();
 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
 		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
 	in.substract_mean_normalize(meanVals, normVals);
+
+	ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("data", in);
 	ncnn::Mat out;
 	ex.extract("bn6_3", out);
diff --git a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp
index 51440d1..489fd11 100644
--- a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp
+++ b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp
@@ -31,11 +31,14 @@ int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata,
 
 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
 		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
-	feature->resize(kFaceFeatureDim);
 	ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
 	ex.input("data", in);
 	ncnn::Mat out;
 	ex.extract("fc1", out);
+
+	feature->resize(kFaceFeatureDim);
 	for (int i = 0; i < kFaceFeatureDim; ++i) {
 		feature->at(i) = out[i];
 	}
diff --git a/src/hand/detecter/nanodet/nanodet.cpp b/src/hand/detecter/nanodet/nanodet.cpp
index 9171910..2c48cd7 100644
--- a/src/hand/detecter/nanodet/nanodet.cpp
+++ b/src/hand/detecter/nanodet/nanodet.cpp
@@ -143,7 +143,8 @@ int Nanodet::Detect(const unsigned char* rgbdata,
     in_pad.substract_mean_normalize(mean_vals, norm_vals);
 
     ncnn::Extractor ex = net_->create_extractor();
-    //__android_log_print(ANDROID_LOG_WARN, "ncnn","input w:%d,h:%d",in_pad.w,in_pad.h);
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("input.1", in_pad);
 
     std::vector<ov::ObjectInfo> proposals;
diff --git a/src/hand/detecter/yolox/yolox.cpp b/src/hand/detecter/yolox/yolox.cpp
index 6021a0c..78e2ec7 100644
--- a/src/hand/detecter/yolox/yolox.cpp
+++ b/src/hand/detecter/yolox/yolox.cpp
@@ -97,6 +97,8 @@ int Yolox::Detect(const unsigned char* rgbdata,
     in_pad.substract_mean_normalize(mean_vals, norm_vals);
 
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("input", in_pad);
     ncnn::Mat out;
     ex.extract("output", out);
diff --git a/src/hand/pose/handpose/handpose.cpp b/src/hand/pose/handpose/handpose.cpp
index 2ddc6af..12613b6 100644
--- a/src/hand/pose/handpose/handpose.cpp
+++ b/src/hand/pose/handpose/handpose.cpp
@@ -32,10 +32,12 @@ int HandPose::Detect(const unsigned char* rgbdata,
     const float meanVals[3] = { 128.0f, 128.0f,  128.0f };
     const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f };
     ncnn_in.substract_mean_normalize(meanVals, normVals);
-    ncnn::Extractor ex1 = net_->create_extractor();
-    ex1.input("input", ncnn_in);
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+    ex.input("input", ncnn_in);
     ncnn::Mat ncnn_out;
-    ex1.extract("output", ncnn_out);
+    ex.extract("output", ncnn_out);
     keypoints.resize(21);
 
     for (int c = 0; c < ncnn_out.c; c++)
diff --git a/src/pose/detecter/openpose/openpose.cpp b/src/pose/detecter/openpose/openpose.cpp
index d131789..9245a24 100644
--- a/src/pose/detecter/openpose/openpose.cpp
+++ b/src/pose/detecter/openpose/openpose.cpp
@@ -51,6 +51,8 @@ int OpenPose::Detect(const unsigned char* rgbdata,
     ncnn::Mat pafs;
     ncnn::Mat heatmaps;
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("data", in);
     ex.extract("stage_1_output_1_heatmaps", heatmaps);  // or stage_0_output_1_heatmaps
     ex.extract("stage_1_output_0_pafs", pafs);          // or stage_0_output_0_pafs
diff --git a/src/pose/detecter/ultralight/ultralight.cpp b/src/pose/detecter/ultralight/ultralight.cpp
index 7f9de64..1855255 100644
--- a/src/pose/detecter/ultralight/ultralight.cpp
+++ b/src/pose/detecter/ultralight/ultralight.cpp
@@ -22,6 +22,8 @@ int Ultralight::Detect(const unsigned char* rgbdata,
     in.substract_mean_normalize(mean_vals, norm_vals);
 
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("data", in);
     ncnn::Mat out;
     ex.extract("output", out);
diff --git a/src/pose/estimator.h b/src/pose/estimator.h
index a1e519d..63d5890 100644
--- a/src/pose/estimator.h
+++ b/src/pose/estimator.h
@@ -9,6 +9,7 @@ extern "C" {
 #endif
     typedef void* IPoseEstimator;
     IPoseEstimator new_ultralight_estimator();
+    IPoseEstimator new_movenet(const int model_type);
     int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata,
         int img_width, int img_height,
         const Rect* rect, KeypointVector* keypoints); 
diff --git a/src/pose/estimator/estimator.cpp b/src/pose/estimator/estimator.cpp
index f759ef4..15cac73 100644
--- a/src/pose/estimator/estimator.cpp
+++ b/src/pose/estimator/estimator.cpp
@@ -1,10 +1,15 @@
 #include "../estimator.h"
 #include "ultralight/ultralight.hpp"
+#include "movenet/movenet.hpp"
 
 IPoseEstimator new_ultralight_estimator() {
     return new ovpose::UltralightEstimator();
 }
 
+IPoseEstimator new_movenet(const int model_type) {
+    return new ovpose::MoveNet(model_type);
+}
+
 int extract_pose_keypoints(IPoseEstimator d, const unsigned char* rgbdata, int img_width, int img_height, const Rect* rect, KeypointVector* keypoints) {
     std::vector<ov::Keypoint> points;
     int ret = static_cast<ovpose::Estimator*>(d)->ExtractKeypoints(rgbdata, img_width, img_height, *rect, &points);
@@ -24,4 +29,8 @@ namespace ovpose {
 Estimator* UltralightEstimatorFactory::CreateEstimator() {
     return new UltralightEstimator();
 }
+
+Estimator* MoveNetFactory::CreateEstimator(const int model_type) {
+    return new MoveNet(model_type);
+}
 }
diff --git a/src/pose/estimator/estimator.hpp b/src/pose/estimator/estimator.hpp
index e113328..a2adb5b 100644
--- a/src/pose/estimator/estimator.hpp
+++ b/src/pose/estimator/estimator.hpp
@@ -24,5 +24,12 @@ public:
     ~UltralightEstimatorFactory() {}
     Estimator* CreateEstimator();
 };
+
+class MoveNetFactory: public EstimatorFactory {
+public:
+    MoveNetFactory(const int model_type) {}
+    ~MoveNetFactory() {}
+    Estimator* CreateEstimator(const int model_type);
+};
 }
 #endif // !_POSE_ESTIMATOR_H
diff --git a/src/pose/estimator/movenet/movenet.cpp b/src/pose/estimator/movenet/movenet.cpp
new file mode 100644
index 0000000..c870666
--- /dev/null
+++ b/src/pose/estimator/movenet/movenet.cpp
@@ -0,0 +1,157 @@
+#include "movenet.hpp"
+#include <string>
+#include <math.h>
+
+#ifdef OV_VULKAN
+#include "gpu.h"
+#endif // OV_VULKAN
+
+namespace ovpose {
+
+MoveNet::MoveNet(const int model_type) : Estimator() {
+    if (model_type == 0) {
+        target_size = 192;
+        kpt_scale = 0.02083333395421505;
+        feature_size = 48;
+    } else {
+        target_size = 256;
+        kpt_scale = 0.015625;
+        feature_size = 64;
+    }
+    for (int i = 0; i < feature_size; i++)
+    {
+        std::vector<float> x, y;
+        for (int j = 0; j < feature_size; j++)
+        {
+            x.push_back(j);
+            y.push_back(i);
+        }
+        dist_y.push_back(y);
+        dist_x.push_back(x);
+    }
+}
+
+int MoveNet::ExtractKeypoints(const unsigned char* rgbdata, 
+    int img_width, int img_height,
+    const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) {
+    keypoints->clear();
+    int w = rect.width;
+    int h = rect.height;
+    float scale = 1.f;
+    if (w > h)
+    {
+        scale = (float)target_size / w;
+        w = target_size;
+        h = h * scale;
+    }
+    else
+    {
+        scale = (float)target_size / h;
+        h = target_size;
+        w = w * scale;
+    }
+
+    size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
+    unsigned char* data = (unsigned char*)malloc(total_size);
+    const unsigned char *start_ptr = rgbdata;
+#if defined(_OPENMP)
+#pragma omp parallel for num_threads(num_threads)
+#endif
+    for(size_t i = 0; i < rect.height; ++i) {
+        const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3; 
+        unsigned char* dstCursor = data + i * rect.width * 3;
+        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
+    }
+
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, rect.width, rect.height, w, h);
+    int wpad = target_size - w;
+    int hpad = target_size - h;
+    ncnn::Mat in_pad;
+    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
+    //数据预处理
+    in_pad.substract_mean_normalize(mean_vals, norm_vals);
+
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+
+    ex.input("input", in_pad);
+
+    ncnn::Mat regress, center, heatmap, offset;
+
+    ex.extract("regress", regress);
+    ex.extract("offset", offset);
+    ex.extract("heatmap", heatmap);
+    ex.extract("center", center);
+
+    float* center_data = (float*)center.data;
+    float* heatmap_data = (float*)heatmap.data;
+    float* offset_data = (float*)offset.data;
+
+    // int top_index = 0;
+    // float top_score = 0;
+
+    int top_index = int(ov::argmax(center_data, center_data+center.h));
+    float top_score = *std::max_element(center_data, center_data + center.h);
+
+    int ct_y = (top_index / feature_size);
+    int ct_x = top_index - ct_y * feature_size;
+
+    std::vector<float> y_regress(num_joints), x_regress(num_joints);
+    float* regress_data = (float*)regress.channel(ct_y).row(ct_x);
+    for (size_t i = 0; i < num_joints; i++)
+    {
+        y_regress[i] = regress_data[i] + (float)ct_y;
+        x_regress[i] = regress_data[i + num_joints] + (float)ct_x;
+    }
+
+    ncnn::Mat kpt_scores = ncnn::Mat(feature_size * feature_size, num_joints, sizeof(float));
+    float* scores_data = (float*)kpt_scores.data;
+    for (int i = 0; i < feature_size; i++)
+    {
+        for (int j = 0; j < feature_size; j++)
+        {
+            std::vector<float> score;
+            for (int c = 0; c < num_joints; c++)
+            {
+                float y = (dist_y[i][j] - y_regress[c]) * (dist_y[i][j] - y_regress[c]);
+                float x = (dist_x[i][j] - x_regress[c]) * (dist_x[i][j] - x_regress[c]);
+                float dist_weight = sqrt(y + x) + 1.8;
+                scores_data[c* feature_size * feature_size +i* feature_size +j] = heatmap_data[i * feature_size * num_joints + j * num_joints + c] / dist_weight;
+            }
+        }
+    }
+    std::vector<int> kpts_ys, kpts_xs;
+    for (int i = 0; i < num_joints; i++)
+    {
+        // top_index = 0;
+        // top_score = 0;
+        top_index = int(ov::argmax(scores_data + feature_size * feature_size *i, scores_data + feature_size * feature_size *(i+1)));
+        top_score = *std::max_element(scores_data + feature_size * feature_size * i, scores_data + feature_size * feature_size * (i + 1));
+
+        int top_y = (top_index / feature_size);
+        int top_x = top_index - top_y * feature_size;
+        kpts_ys.push_back(top_y);
+        kpts_xs.push_back(top_x);
+    }
+
+    for (int i = 0; i < num_joints; i++)
+    {
+        float kpt_offset_x = offset_data[kpts_ys[i] * feature_size * num_joints*2 + kpts_xs[i] * num_joints * 2 + i * 2];
+        float kpt_offset_y = offset_data[kpts_ys[i] * feature_size * num_joints * 2 + kpts_xs[i] * num_joints * 2 + i * 2+1];
+
+        float x = (kpts_xs[i] + kpt_offset_y) * kpt_scale * target_size;
+        float y = (kpts_ys[i] + kpt_offset_x) * kpt_scale * target_size;
+
+        ov::Keypoint kpt;
+        kpt.p = ov::Point2f((x - ((float)wpad / 2)) / scale + rect.x, (y - ((float)hpad / 2)) / scale + rect.y);
+        kpt.score = heatmap_data[kpts_ys[i] * feature_size * num_joints + kpts_xs[i] * num_joints + i];
+        keypoints->push_back(kpt);
+
+    }
+
+    free(data);
+    return 0;
+}
+
+}
diff --git a/src/pose/estimator/movenet/movenet.hpp b/src/pose/estimator/movenet/movenet.hpp
new file mode 100644
index 0000000..058561b
--- /dev/null
+++ b/src/pose/estimator/movenet/movenet.hpp
@@ -0,0 +1,31 @@
+#ifndef _POSE_MOVENET_ESTIMATOR_H_
+#define _POSE_MOVENET_ESTIMATOR_H_ 
+
+#include "../estimator.hpp"
+#include <vector>
+#include "net.h"
+
+namespace ovpose {
+
+class MoveNet: public Estimator {
+public:
+    MoveNet(const int model_type); // 0: ligntning, 1: thunder
+	int ExtractKeypoints(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        const ov::Rect& rect, 
+        std::vector<ov::Keypoint>* keypoints);
+
+private:
+    int feature_size;
+    float kpt_scale;
+    int target_size;
+    const int num_joints = 17;
+    const float mean_vals[3] = {127.5f, 127.5f,  127.5f};
+    const float norm_vals[3] = {1/ 127.5f, 1 / 127.5f, 1 / 127.5f};
+    std::vector<std::vector<float>> dist_y, dist_x;
+};
+
+}
+
+#endif // !_POSE_MOVENET_ESTIMATOR_H_
+
diff --git a/src/pose/estimator/ultralight/ultralight.cpp b/src/pose/estimator/ultralight/ultralight.cpp
index a1dbf8a..44b3680 100644
--- a/src/pose/estimator/ultralight/ultralight.cpp
+++ b/src/pose/estimator/ultralight/ultralight.cpp
@@ -13,6 +13,7 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata,
     keypoints->clear();
     int w = rect.width;
     int h = rect.height;
+    
     size_t total_size = w * h * 3 * sizeof(unsigned char);
     unsigned char* data = (unsigned char*)malloc(total_size);
     const unsigned char *start_ptr = rgbdata;
@@ -29,6 +30,8 @@ int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata,
     in.substract_mean_normalize(meanVals, normVals);
 
     ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
     ex.input("data", in);
     ncnn::Mat out;
     ex.extract("hybridsequential0_conv7_fwd", out);