Merge branch 'release/v1.0.0'

2025-09-26 17:51:13 +08:00 · 2021-11-11 14:35:25 +08:00
parent b549e3c50a f60faaa7ed
commit af8713ed9d
25 changed files with 1049 additions and 507 deletions
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
  - segmentor (for pose segmentation)
    - deeplabv3plus [Google Drive](https://drive.google.com/drive/folders/1BjiK0IiVAfyX30OoyQzoe1YBzvSudghG?usp=sharing)
    - erdnet [Google Drive](https://drive.google.com/drive/folders/1WkQqYT9D4oGL6Gubu0SOeqcZmrdZ5cNw?usp=sharing)
+    - rvm (RobustVideoMatting) [Google Drive](https://drive.google.com/drive/folders/1roV7L8Z3OIC4C7CKQcm_KJ_CPHeTJug5?usp=sharing)
 - hand
  - detector (for hand detect)
    - yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing)
@@ -51,6 +52,8 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
    - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
 - styletransfer
  - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
+- tracker
+  - lighttrack [Google Drive](https://drive.google.com/drive/folders/16cxns_xzSOABHn6UcY1OXyf4MFcSSbEf?usp=sharing)
 - golang binding (github.com/bububa/openvision/go)

 ## Reference
--- a/go/README.md
+++ b/go/README.md
@@ -55,6 +55,7 @@ make -j 4
  - segmentor (for pose segmentation)
    - deeplabv3plus [Google Drive](https://drive.google.com/drive/folders/1BjiK0IiVAfyX30OoyQzoe1YBzvSudghG?usp=sharing)
    - erdnet [Google Drive](https://drive.google.com/drive/folders/1WkQqYT9D4oGL6Gubu0SOeqcZmrdZ5cNw?usp=sharing)
+    - rvm (RobustVideoMatting) [Google Drive](https://drive.google.com/drive/folders/1roV7L8Z3OIC4C7CKQcm_KJ_CPHeTJug5?usp=sharing)
 - hand
  - detector (for hand detect)
    - yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing)
@@ -63,3 +64,5 @@ make -j 4
    - handnet [Google Drive](https://drive.google.com/drive/folders/1DsCGmiVaZobbMWRp5Oec8GbIpeg7CsNR?usp=sharing)
 - styletransfer
  - animegan2 [Google Drive](https://drive.google.com/drive/folders/1K6ZScENPHVbxupHkwl5WcpG8PPECtD8e?usp=sharing)
+- tracker
+  - lighttrack [Google Drive](https://drive.google.com/drive/folders/16cxns_xzSOABHn6UcY1OXyf4MFcSSbEf?usp=sharing)
--- a/go/common/geometry.go
+++ b/go/common/geometry.go
@@ -58,6 +58,8 @@ func GoRect(c *C.Rect, w float64, h float64) Rectangle {
 	)
 }

+var ZR = Rectangle{}
+
 // Point represents a Point
 type Point struct {
 	X float64
@@ -69,6 +71,8 @@ func Pt(x, y float64) Point {
 	return Point{x, y}
 }

+var ZP = Point{}
+
 // GoPoint2f conver C.Point2f to Point
 func GoPoint2f(c *C.Point2f, w float64, h float64) Point {
 	return Pt(
--- a/go/error.go
+++ b/go/error.go
@@ -68,6 +68,12 @@ var (
 			Message: "detect pose failed",
 		}
 	}
+	TrackerError = func(code int) Error {
+		return Error{
+			Code:    code,
+			Message: "object tracker error",
+		}
+	}
 	RealsrError = func(code int) Error {
 		return Error{
 			Code:    code,
--- a/go/examples/face_tracker/main.go
+++ b/go/examples/face_tracker/main.go
--- a/go/tracker/cgo.go
+++ b/go/tracker/cgo.go
@@ -0,0 +1,12 @@
+//go:build !vulkan
+// +build !vulkan
+
+package tracker
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../lib
+*/
+import "C"
--- a/go/tracker/cgo_vulkan.go
+++ b/go/tracker/cgo_vulkan.go
@@ -0,0 +1,11 @@
+// +build vulkan
+
+package tracker
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../lib
+*/
+import "C"
--- a/go/tracker/doc.go
+++ b/go/tracker/doc.go
@@ -0,0 +1,2 @@
+// Package tracker include object tracker 
+package tracker
--- a/go/tracker/lighttrack.go
+++ b/go/tracker/lighttrack.go
@@ -0,0 +1,45 @@
+package tracker
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/tracker/tracker.h"
+*/
+import "C"
+import (
+	"unsafe"
+
+	"github.com/bububa/openvision/go/common"
+)
+
+// LightTrack represents light track tracker
+type LightTrack struct {
+	d C.ITracker
+}
+
+// NewLightTrack returns a new LightTrack
+func NewLightTrack() *LightTrack {
+	return &LightTrack{
+		d: C.new_light_track(),
+	}
+}
+
+// Destroy free tracker
+func (d *LightTrack) Destroy() {
+	common.DestroyEstimator(d)
+}
+
+// Pointer implement Estimator interface
+func (d *LightTrack) Pointer() unsafe.Pointer {
+	return unsafe.Pointer(d.d)
+}
+
+// LoadModel load model for detecter
+func (d *LightTrack) LoadModel(modelPath string) error {
+	return common.EstimatorLoadModel(d, modelPath)
+}
+
+// Track implement Object Tracker interface
+func (d *LightTrack) Track(img *common.Image, rect common.Rectangle) (common.Rectangle, error) {
+	return Track(d, img, rect)
+}
--- a/go/tracker/tracker.go
+++ b/go/tracker/tracker.go
@@ -0,0 +1,40 @@
+package tracker
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/common/common.h"
+#include "openvision/tracker/tracker.h"
+*/
+import "C"
+import (
+	"unsafe"
+
+	openvision "github.com/bububa/openvision/go"
+	"github.com/bububa/openvision/go/common"
+)
+
+// Tracker represents Object Tracker interface
+type Tracker interface {
+	common.Estimator
+	Track(img *common.Image, rect common.Rectangle) (common.Rectangle, error)
+}
+
+// Track returns object tracked rect
+func Track(d Tracker, img *common.Image, rect common.Rectangle) (common.Rectangle, error) {
+	imgWidth := img.WidthF64()
+	imgHeight := img.HeightF64()
+	data := img.Bytes()
+	rectC := rect.CRect(imgWidth, imgHeight)
+	defer C.free(unsafe.Pointer(rectC))
+	errCode := C.track_object(
+		(C.ITracker)(d.Pointer()),
+		(*C.uchar)(unsafe.Pointer(&data[0])),
+		C.int(imgWidth),
+		C.int(imgHeight),
+		(*C.Rect)(unsafe.Pointer(rectC)))
+	if errCode != 0 {
+		return common.ZR, openvision.TrackerError(int(errCode))
+	}
+	return common.GoRect(rectC, imgWidth, imgHeight), nil
+}
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,8 @@ target_include_directories(openvision
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/segmentor>

        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/styletransfer>
+
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tracker>
 )

 #install(TARGETS openvision EXPORT openvision ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH})
@@ -108,3 +110,8 @@ file(COPY
    DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/styletransfer
 )

+file(COPY
+    ${CMAKE_CURRENT_SOURCE_DIR}/tracker/tracker.h
+    DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/tracker
+)
+
--- a/src/face/hopenet/hopenet.cpp
+++ b/src/face/hopenet/hopenet.cpp
@@ -7,94 +7,104 @@
 #include "gpu.h"
 #endif // OV_VULKAN

+IHopenet new_hopenet() { return new ovface::Hopenet(); }

-IHopenet new_hopenet() {
-    return new ovface::Hopenet();
-}
-
-int hopenet_detect(IHopenet h, const unsigned char* rgbdata, int img_width, int img_height, const Rect* roi, HeadPose* euler_angles) {
-    return static_cast<ovface::Hopenet*>(h)->Detect(rgbdata, img_width, img_height, *roi, static_cast<ovface::HeadPose*>(euler_angles));
+int hopenet_detect(IHopenet h, const unsigned char *rgbdata, int img_width,
+                   int img_height, const Rect *roi, HeadPose *euler_angles) {
+  return static_cast<ovface::Hopenet *>(h)->Detect(
+      rgbdata, img_width, img_height, *roi,
+      static_cast<ovface::HeadPose *>(euler_angles));
 }

 namespace ovface {

 #define NEAR_0 1e-10
-#define ODIM   66
+#define ODIM 66

-int Hopenet::LoadModel(const char* root_path) {
-    int ret = Estimator::LoadModel(root_path);
-    if (ret != 0) {
-        return ret;
-    }
-    for (uint i=1; i<67; i++) idx_tensor[i-1] = i;
-    return 0;
+int Hopenet::LoadModel(const char *root_path) {
+  int ret = Estimator::LoadModel(root_path);
+  if (ret != 0) {
+    return ret;
+  }
+  for (uint i = 1; i < 67; i++)
+    idx_tensor[i - 1] = i;
+  return 0;
 }

-int Hopenet::Detect(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-    Rect roi, HeadPose* head_angles) {
-    float diff = fabs(roi.height-roi.width);
-    if (roi.height > roi.width) {
-        roi.x -= diff/2;
-        roi.width = roi.height;
-    } else if (roi.height < roi.width) {
-        roi.y -= diff/2;
-        roi.height = roi.width;
-    }
+int Hopenet::Detect(const unsigned char *rgbdata, int img_width, int img_height,
+                    Rect roi, HeadPose *head_angles) {
+  float diff = fabs(roi.height - roi.width);
+  if (roi.height > roi.width) {
+    roi.x -= diff / 2;
+    roi.width = roi.height;
+  } else if (roi.height < roi.width) {
+    roi.y -= diff / 2;
+    roi.height = roi.width;
+  }

-    size_t total_size = roi.width * roi.height * 3 * sizeof(unsigned char);
-    unsigned char* img_face = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < roi.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + roi.y) * img_width + roi.x) * 3; 
-        unsigned char* dstCursor = img_face + i * roi.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * roi.width);
-    }
+  //     size_t total_size = roi.width * roi.height * 3 * sizeof(unsigned char);
+  //     unsigned char* img_face = (unsigned char*)malloc(total_size);
+  //     const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < roi.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + roi.y) *
+  //         img_width + roi.x) * 3; unsigned char* dstCursor = img_face + i *
+  //         roi.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) *
+  //         3 * roi.width);
+  //     }

-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB2GRAY, roi.width, roi.height, 48, 48);
-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-    ex.input("data", in);
+  //     ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
+  //     ncnn::Mat::PIXEL_RGB2GRAY, roi.width, roi.height, 48, 48);
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB2GRAY, img_width, img_height, roi.x, roi.y,
+      roi.width, roi.height, 48, 48);
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("data", in);

-    ncnn::Mat output;
-    ex.extract("hybridsequential0_multitask0_dense0_fwd", output);
-    float* pred_pitch = output.range(0, ODIM);
-    float* pred_roll  = output.range(ODIM, ODIM*2);
-    float* pred_yaw   = output.range(ODIM*2, ODIM*3);
+  ncnn::Mat output;
+  ex.extract("hybridsequential0_multitask0_dense0_fwd", output);
+  float *pred_pitch = output.range(0, ODIM);
+  float *pred_roll = output.range(ODIM, ODIM * 2);
+  float *pred_yaw = output.range(ODIM * 2, ODIM * 3);

-    this->softmax(pred_pitch, ODIM);
-    this->softmax(pred_roll,  ODIM);
-    this->softmax(pred_yaw,   ODIM);
+  this->softmax(pred_pitch, ODIM);
+  this->softmax(pred_roll, ODIM);
+  this->softmax(pred_yaw, ODIM);

-    // printArray(pred_pitch, ODIM);
+  // printArray(pred_pitch, ODIM);

-    head_angles->pitch = this->getAngle(pred_pitch, ODIM);
-    head_angles->roll  = this->getAngle(pred_roll,  ODIM);
-    head_angles->yaw   = this->getAngle(pred_yaw,   ODIM);
+  head_angles->pitch = this->getAngle(pred_pitch, ODIM);
+  head_angles->roll = this->getAngle(pred_roll, ODIM);
+  head_angles->yaw = this->getAngle(pred_yaw, ODIM);

-    free(img_face);
+  // free(img_face);

-    return 0;
+  return 0;
 }

-void Hopenet::softmax(float* z, size_t el) {
-    double zmax = -INFINITY;
-    double zsum = 0;
-    for (size_t i = 0; i < el; i++) if (z[i] > zmax) zmax=z[i];
-    for (size_t i=0; i<el; i++) z[i] = std::exp(z[i]-zmax);
-    zsum = std::accumulate(z, z+el, 0.0);
-    for (size_t i=0; i<el; i++) z[i] = (z[i]/zsum)+NEAR_0;
+void Hopenet::softmax(float *z, size_t el) {
+  double zmax = -INFINITY;
+  double zsum = 0;
+  for (size_t i = 0; i < el; i++)
+    if (z[i] > zmax)
+      zmax = z[i];
+  for (size_t i = 0; i < el; i++)
+    z[i] = std::exp(z[i] - zmax);
+  zsum = std::accumulate(z, z + el, 0.0);
+  for (size_t i = 0; i < el; i++)
+    z[i] = (z[i] / zsum) + NEAR_0;
 }

-double Hopenet::getAngle(float* prediction, size_t len) {
-    double expectation[len];
-    for (uint i=0; i<len; i++) expectation[i]=idx_tensor[i]*prediction[i];
-    double angle = std::accumulate(expectation, expectation+len, 0.0) * 3 - 99;
-    return angle;
+double Hopenet::getAngle(float *prediction, size_t len) {
+  double expectation[len];
+  for (uint i = 0; i < len; i++)
+    expectation[i] = idx_tensor[i] * prediction[i];
+  double angle = std::accumulate(expectation, expectation + len, 0.0) * 3 - 99;
+  return angle;
 }

-}
+} // namespace ovface
--- a/src/face/landmarker/insightface/insightface.cpp
+++ b/src/face/landmarker/insightface/insightface.cpp
@@ -7,59 +7,65 @@

 namespace ovface {

-int InsightfaceLandmarker::ExtractKeypoints(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-	const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
-	keypoints->clear();
-	if (!initialized_) {
-		return 10000;
-	}
+int InsightfaceLandmarker::ExtractKeypoints(
+    const unsigned char *rgbdata, int img_width, int img_height,
+    const ov::Rect &face, std::vector<ov::Point2f> *keypoints) {
+  keypoints->clear();
+  if (!initialized_) {
+    return 10000;
+  }

-	if (rgbdata == 0){
-		return 10001;
-	}
+  if (rgbdata == 0) {
+    return 10001;
+  }

-	// 1 enlarge the face rect
-    ov::Rect face_enlarged = face; 
-	const float enlarge_scale = 1.5f;
-	EnlargeRect(enlarge_scale, &face_enlarged);
+  // 1 enlarge the face rect
+  ov::Rect face_enlarged = face;
+  const float enlarge_scale = 1.5f;
+  EnlargeRect(enlarge_scale, &face_enlarged);

-	// 2 square the rect
-	RectifyRect(&face_enlarged);
-	face_enlarged = face_enlarged & Rect(0, 0, img_width, img_height);
+  // 2 square the rect
+  RectifyRect(&face_enlarged);
+  face_enlarged = face_enlarged & Rect(0, 0, img_width, img_height);

-	// 3 crop the face
-    size_t total_size = face_enlarged.width * face_enlarged.height * 3 * sizeof(unsigned char);
-    unsigned char* img_face = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < face_enlarged.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + face_enlarged.y) * img_width + face_enlarged.x) * 3; 
-        unsigned char* dstCursor = img_face + i * face_enlarged.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face_enlarged.width);
-    }
-	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
-		ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192, 192);
+  // 3 crop the face
+  //     size_t total_size = face_enlarged.width * face_enlarged.height * 3 *
+  //     sizeof(unsigned char); unsigned char* img_face = (unsigned
+  //     char*)malloc(total_size); const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < face_enlarged.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + face_enlarged.y)
+  //         * img_width + face_enlarged.x) * 3; unsigned char* dstCursor =
+  //         img_face + i * face_enlarged.width * 3; memcpy(dstCursor,
+  //         srcCursor, sizeof(unsigned char) * 3 * face_enlarged.width);
+  //     }
+  // 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
+  // 		ncnn::Mat::PIXEL_RGB, face_enlarged.width, face_enlarged.height, 192,
+  // 192);
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, face_enlarged.x,
+      face_enlarged.y, face_enlarged.width, face_enlarged.height, 192, 192);

-	// 4 do inference
-	ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-	ex.input("data", in);
-	ncnn::Mat out;
-	ex.extract("fc1", out);
+  // 4 do inference
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("data", in);
+  ncnn::Mat out;
+  ex.extract("fc1", out);

-	for (int i = 0; i < 106; ++i) {
-		float x = (out[2 * i] + 1.0f) * face_enlarged.width / 2 + face_enlarged.x;
-		float y = (out[2 * i + 1] + 1.0f) * face_enlarged.height / 2 + face_enlarged.y;
-		keypoints->push_back(Point2f(x, y));
-	}
+  for (int i = 0; i < 106; ++i) {
+    float x = (out[2 * i] + 1.0f) * face_enlarged.width / 2 + face_enlarged.x;
+    float y =
+        (out[2 * i + 1] + 1.0f) * face_enlarged.height / 2 + face_enlarged.y;
+    keypoints->push_back(Point2f(x, y));
+  }

-    free(img_face);
+  // free(img_face);

-	return 0;
+  return 0;
 }

-}
+} // namespace ovface
--- a/src/face/landmarker/scrfd/scrfd.cpp
+++ b/src/face/landmarker/scrfd/scrfd.cpp
@@ -6,58 +6,63 @@

 namespace ovface {

-int ScrfdLandmarker::ExtractKeypoints(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-	const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
-	keypoints->clear();
-	if (!initialized_) {
-		return 10000;
-	}
+int ScrfdLandmarker::ExtractKeypoints(const unsigned char *rgbdata,
+                                      int img_width, int img_height,
+                                      const ov::Rect &face,
+                                      std::vector<ov::Point2f> *keypoints) {
+  keypoints->clear();
+  if (!initialized_) {
+    return 10000;
+  }

-	if (rgbdata == 0){
-		return 10001;
-	}
-	// 1 enlarge the face rect
-    ov::Rect box = face; 
-	const float enlarge_scale = 1.5f;
-	EnlargeRect(enlarge_scale, &box);
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  // 1 enlarge the face rect
+  ov::Rect box = face;
+  const float enlarge_scale = 1.5f;
+  EnlargeRect(enlarge_scale, &box);

-	// 2 square the rect
-	RectifyRect(&box);
+  // 2 square the rect
+  RectifyRect(&box);

-    box = box & Rect(0, 0, img_width, img_height);
+  box = box & Rect(0, 0, img_width, img_height);

-    size_t total_size = box.width * box.height * 3 * sizeof(unsigned char);
-    unsigned char* img_face = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < box.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + box.y) * img_width + box.x) * 3; 
-        unsigned char* dstCursor = img_face + i * box.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * box.width);
-    }
-    
-    ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face, ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192);
-    ncnn_in.substract_mean_normalize(means, norms);
+  //     size_t total_size = box.width * box.height * 3 * sizeof(unsigned char);
+  //     unsigned char* img_face = (unsigned char*)malloc(total_size);
+  //     const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < box.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + box.y) *
+  //         img_width + box.x) * 3; unsigned char* dstCursor = img_face + i *
+  //         box.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) *
+  //         3 * box.width);
+  //     }
+  //
+  //     ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(img_face,
+  //     ncnn::Mat::PIXEL_RGB, box.width, box.height, 192, 192);
+  ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, box.x, box.y,
+      box.width, box.height, 192, 192);
+  ncnn_in.substract_mean_normalize(means, norms);

-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-    ex.input("input.1",ncnn_in);
-    ncnn::Mat ncnn_out;
-    ex.extract("482",ncnn_out);
-    float *scoredata = (float*)ncnn_out.data;
-    for(int i = 0; i < 468; i++)
-    {
-        ov::Point2f pt;
-        pt.x = scoredata[i*3]*box.width/192 + box.x;
-        pt.y = scoredata[i*3+1]*box.height/192 + box.y;
-        keypoints->push_back(pt);
-    }
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("input.1", ncnn_in);
+  ncnn::Mat ncnn_out;
+  ex.extract("482", ncnn_out);
+  float *scoredata = (float *)ncnn_out.data;
+  for (int i = 0; i < 468; i++) {
+    ov::Point2f pt;
+    pt.x = scoredata[i * 3] * box.width / 192 + box.x;
+    pt.y = scoredata[i * 3 + 1] * box.height / 192 + box.y;
+    keypoints->push_back(pt);
+  }

-    free(img_face);
-    return 0;
-}
+  // free(img_face);
+  return 0;
 }
+} // namespace ovface
--- a/src/face/landmarker/zqlandmarker/zqlandmarker.cpp
+++ b/src/face/landmarker/zqlandmarker/zqlandmarker.cpp
@@ -6,49 +6,52 @@

 namespace ovface {

-int ZQLandmarker::ExtractKeypoints(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-	const ov::Rect & face, std::vector<ov::Point2f>* keypoints) {
-	keypoints->clear();
-	if (!initialized_) {
-		return 10000;
-	}
+int ZQLandmarker::ExtractKeypoints(const unsigned char *rgbdata, int img_width,
+                                   int img_height, const ov::Rect &face,
+                                   std::vector<ov::Point2f> *keypoints) {
+  keypoints->clear();
+  if (!initialized_) {
+    return 10000;
+  }

-	if (rgbdata == 0){
-		return 10001;
-	}
+  if (rgbdata == 0) {
+    return 10001;
+  }

-    size_t total_size = face.width * face.height * 3 * sizeof(unsigned char);
-    unsigned char* img_face = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < face.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + face.y) * img_width + face.x) * 3; 
-        unsigned char* dstCursor = img_face + i * face.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width);
-    }
-	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
-		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
-	in.substract_mean_normalize(meanVals, normVals);
+  //     size_t total_size = face.width * face.height * 3 * sizeof(unsigned
+  //     char); unsigned char* img_face = (unsigned char*)malloc(total_size);
+  //     const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < face.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + face.y) *
+  //         img_width + face.x) * 3; unsigned char* dstCursor = img_face + i *
+  //         face.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char)
+  //         * 3 * face.width);
+  //     }
+  // 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
+  // 		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, face.x, face.y,
+      face.width, face.height, 112, 112);
+  in.substract_mean_normalize(meanVals, normVals);

-	ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-	ex.input("data", in);
-	ncnn::Mat out;
-	ex.extract("bn6_3", out);
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("data", in);
+  ncnn::Mat out;
+  ex.extract("bn6_3", out);

-	for (int i = 0; i < 106; ++i) {
-		float x = abs(out[2 * i] * face.width) + face.x;
-		float y = abs(out[2 * i + 1] * face.height) + face.y;
-		keypoints->push_back(Point2f(x, y));
-	}
+  for (int i = 0; i < 106; ++i) {
+    float x = abs(out[2 * i] * face.width) + face.x;
+    float y = abs(out[2 * i + 1] * face.height) + face.y;
+    keypoints->push_back(Point2f(x, y));
+  }

-
-    free(img_face);
-	return 0;
+  // free(img_face);
+  return 0;
 }

-}
+} // namespace ovface
--- a/src/face/recognizer/mobilefacenet/mobilefacenet.cpp
+++ b/src/face/recognizer/mobilefacenet/mobilefacenet.cpp
@@ -6,47 +6,49 @@

 namespace ovface {

-int Mobilefacenet::ExtractFeature(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-	const ov::Rect & face, std::vector<float>* feature) {
-	feature->clear();
-	if (!initialized_) {
-		return 10000;
-	}
-	if (rgbdata == 0){
-		return 10001;
-	}
+int Mobilefacenet::ExtractFeature(const unsigned char *rgbdata, int img_width,
+                                  int img_height, const ov::Rect &face,
+                                  std::vector<float> *feature) {
+  feature->clear();
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }

-    size_t total_size = face.width * face.height * 3 * sizeof(unsigned char);
-    unsigned char* img_face = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < face.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + face.y) * img_width + face.x) * 3; 
-        unsigned char* dstCursor = img_face + i * face.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * face.width);
-    }
+  //     size_t total_size = face.width * face.height * 3 * sizeof(unsigned
+  //     char); unsigned char* img_face = (unsigned char*)malloc(total_size);
+  //     const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < face.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + face.y) *
+  //         img_width + face.x) * 3; unsigned char* dstCursor = img_face + i *
+  //         face.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char)
+  //         * 3 * face.width);
+  //     }

-	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
-		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
-	ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-	ex.input("data", in);
-	ncnn::Mat out;
-	ex.extract("fc1", out);
+  // 	ncnn::Mat in = ncnn::Mat::from_pixels_resize(img_face,
+  // 		ncnn::Mat::PIXEL_RGB, face.width, face.height, 112, 112);
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, face.x, face.y,
+      face.width, face.height, 112, 112);
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("data", in);
+  ncnn::Mat out;
+  ex.extract("fc1", out);

-	feature->resize(kFaceFeatureDim);
-	for (int i = 0; i < kFaceFeatureDim; ++i) {
-		feature->at(i) = out[i];
-	}
+  feature->resize(kFaceFeatureDim);
+  for (int i = 0; i < kFaceFeatureDim; ++i) {
+    feature->at(i) = out[i];
+  }

-    free(img_face);
-	return 0;
+  // free(img_face);
+  return 0;
 }

-}
-
-
+} // namespace ovface
--- a/src/hand/pose/handpose/handpose.cpp
+++ b/src/hand/pose/handpose/handpose.cpp
@@ -7,51 +7,53 @@

 namespace ovhand {

-int HandPose::Detect(const unsigned char* rgbdata,
-    int img_width, int img_height,
-    const ov::Rect& rect,
-    std::vector<ov::Point2f>& keypoints) {
-    keypoints.clear(); 
-	if (!initialized_) {
-		return 10000;
-	}
-	if (rgbdata == 0){
-		return 10001;
-    }
+int HandPose::Detect(const unsigned char *rgbdata, int img_width,
+                     int img_height, const ov::Rect &rect,
+                     std::vector<ov::Point2f> &keypoints) {
+  keypoints.clear();
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }

-    size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
-    unsigned char* crop_img = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-    for(size_t i = 0; i < rect.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3; 
-        unsigned char* dstCursor = crop_img + i * rect.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
-    }
+  // size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
+  // unsigned char* crop_img = (unsigned char*)malloc(total_size);
+  // const unsigned char *start_ptr = rgbdata;
+  // for(size_t i = 0; i < rect.height; ++i) {
+  //     const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width
+  //     + rect.x) * 3; unsigned char* dstCursor = crop_img + i * rect.width *
+  //     3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 *
+  //     rect.width);
+  // }

-    ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(crop_img, ncnn::Mat::PIXEL_RGB, rect.width, rect.height, 224, 224);
-    const float meanVals[3] = { 128.0f, 128.0f,  128.0f };
-    const float normVals[3] = { 0.00390625f, 0.00390625f, 0.00390625f };
-    ncnn_in.substract_mean_normalize(meanVals, normVals);
-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-    ex.input("input", ncnn_in);
-    ncnn::Mat ncnn_out;
-    ex.extract("output", ncnn_out);
-    keypoints.resize(21);
+  // ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(crop_img,
+  // ncnn::Mat::PIXEL_RGB, rect.width, rect.height, 224, 224);
+  ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
+      rect.width, rect.height, 224, 224);
+  const float meanVals[3] = {128.0f, 128.0f, 128.0f};
+  const float normVals[3] = {0.00390625f, 0.00390625f, 0.00390625f};
+  ncnn_in.substract_mean_normalize(meanVals, normVals);
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("input", ncnn_in);
+  ncnn::Mat ncnn_out;
+  ex.extract("output", ncnn_out);
+  keypoints.resize(21);

-    for (int c = 0; c < ncnn_out.c; c++)
-    {
-        ncnn::Mat data = ncnn_out.channel(c);
-        const float *ptr = data.row(0);
-        for (size_t j = 0; j < 21; j++)
-        {
-            float pt_x = ptr[j * 2] * rect.width;
-            float pt_y = ptr[j * 2 + 1] * rect.height;
-            keypoints[j] = ov::Point2f(pt_x + rect.x, pt_y + rect.y);
-        }
+  for (int c = 0; c < ncnn_out.c; c++) {
+    ncnn::Mat data = ncnn_out.channel(c);
+    const float *ptr = data.row(0);
+    for (size_t j = 0; j < 21; j++) {
+      float pt_x = ptr[j * 2] * rect.width;
+      float pt_y = ptr[j * 2 + 1] * rect.height;
+      keypoints[j] = ov::Point2f(pt_x + rect.x, pt_y + rect.y);
    }
-    free(crop_img);
-    return 0;
-}
+  }
+  // free(crop_img);
+  return 0;
 }
+} // namespace ovhand
--- a/src/pose/estimator/movenet/movenet.cpp
+++ b/src/pose/estimator/movenet/movenet.cpp
@@ -1,6 +1,6 @@
 #include "movenet.hpp"
-#include <string>
 #include <math.h>
+#include <string>

 #ifdef OV_VULKAN
 #include "gpu.h"
@@ -9,155 +9,162 @@
 namespace ovpose {

 MoveNet::MoveNet(int model_type) : Estimator() {
-    if (model_type == 0) {
-        target_size = 192;
-        kpt_scale = 0.02083333395421505;
-        feature_size = 48;
-    } else {
-        target_size = 256;
-        kpt_scale = 0.015625;
-        feature_size = 64;
-    }
-    for (int i = 0; i < feature_size; i++)
-    {
-        std::vector<float> x, y;
-        for (int j = 0; j < feature_size; j++)
-        {
-            x.push_back(j);
-            y.push_back(i);
-        }
-        dist_y.push_back(y);
-        dist_x.push_back(x);
+  if (model_type == 0) {
+    target_size = 192;
+    kpt_scale = 0.02083333395421505;
+    feature_size = 48;
+  } else {
+    target_size = 256;
+    kpt_scale = 0.015625;
+    feature_size = 64;
+  }
+  for (int i = 0; i < feature_size; i++) {
+    std::vector<float> x, y;
+    for (int j = 0; j < feature_size; j++) {
+      x.push_back(j);
+      y.push_back(i);
    }
+    dist_y.push_back(y);
+    dist_x.push_back(x);
+  }
 }

-int MoveNet::ExtractKeypoints(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-    const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) {
-	if (!initialized_) {
-		return 10000;
-	}
-	if (rgbdata == 0){
-		return 10001;
-	}
-    keypoints->clear();
-    int w = rect.width;
-    int h = rect.height;
-    float scale = 1.f;
-    if (w > h)
-    {
-        scale = (float)target_size / w;
-        w = target_size;
-        h = h * scale;
-    }
-    else
-    {
-        scale = (float)target_size / h;
-        h = target_size;
-        w = w * scale;
+int MoveNet::ExtractKeypoints(const unsigned char *rgbdata, int img_width,
+                              int img_height, const ov::Rect &rect,
+                              std::vector<ov::Keypoint> *keypoints) {
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  keypoints->clear();
+  int w = rect.width;
+  int h = rect.height;
+  float scale = 1.f;
+  if (w > h) {
+    scale = (float)target_size / w;
+    w = target_size;
+    h = h * scale;
+  } else {
+    scale = (float)target_size / h;
+    h = target_size;
+    w = w * scale;
+  }
+
+  //     size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned
+  //     char); unsigned char* data = (unsigned char*)malloc(total_size); const
+  //     unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < rect.height; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + rect.y) *
+  //         img_width + rect.x) * 3; unsigned char* dstCursor = data + i *
+  //         rect.width * 3; memcpy(dstCursor, srcCursor, sizeof(unsigned char)
+  //         * 3 * rect.width);
+  //     }
+
+  //     ncnn::Mat in = ncnn::Mat::from_pixels_resize(data,
+  //     ncnn::Mat::PIXEL_RGB, rect.width, rect.height, w, h);
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
+      rect.width, rect.height, w, h);
+  int wpad = target_size - w;
+  int hpad = target_size - h;
+  ncnn::Mat in_pad;
+  ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2,
+                         wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
+  //数据预处理
+  in_pad.substract_mean_normalize(mean_vals, norm_vals);
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+
+  ex.input("input", in_pad);
+
+  ncnn::Mat regress, center, heatmap, offset;
+
+  ex.extract("regress", regress);
+  ex.extract("offset", offset);
+  ex.extract("heatmap", heatmap);
+  ex.extract("center", center);
+
+  float *center_data = (float *)center.data;
+  float *heatmap_data = (float *)heatmap.data;
+  float *offset_data = (float *)offset.data;
+
+  // int top_index = 0;
+  // float top_score = 0;
+
+  int top_index = int(ov::argmax(center_data, center_data + center.h));
+  float top_score = *std::max_element(center_data, center_data + center.h);
+
+  int ct_y = (top_index / feature_size);
+  int ct_x = top_index - ct_y * feature_size;
+
+  std::vector<float> y_regress(num_joints), x_regress(num_joints);
+  float *regress_data = (float *)regress.channel(ct_y).row(ct_x);
+  for (size_t i = 0; i < num_joints; i++) {
+    y_regress[i] = regress_data[i] + (float)ct_y;
+    x_regress[i] = regress_data[i + num_joints] + (float)ct_x;
+  }
+
+  ncnn::Mat kpt_scores =
+      ncnn::Mat(feature_size * feature_size, num_joints, sizeof(float));
+  float *scores_data = (float *)kpt_scores.data;
+  for (int i = 0; i < feature_size; i++) {
+    for (int j = 0; j < feature_size; j++) {
+      std::vector<float> score;
+      for (int c = 0; c < num_joints; c++) {
+        float y = (dist_y[i][j] - y_regress[c]) * (dist_y[i][j] - y_regress[c]);
+        float x = (dist_x[i][j] - x_regress[c]) * (dist_x[i][j] - x_regress[c]);
+        float dist_weight = sqrt(y + x) + 1.8;
+        scores_data[c * feature_size * feature_size + i * feature_size + j] =
+            heatmap_data[i * feature_size * num_joints + j * num_joints + c] /
+            dist_weight;
+      }
    }
+  }
+  std::vector<int> kpts_ys, kpts_xs;
+  for (int i = 0; i < num_joints; i++) {
+    // top_index = 0;
+    // top_score = 0;
+    top_index =
+        int(ov::argmax(scores_data + feature_size * feature_size * i,
+                       scores_data + feature_size * feature_size * (i + 1)));
+    top_score =
+        *std::max_element(scores_data + feature_size * feature_size * i,
+                          scores_data + feature_size * feature_size * (i + 1));

-    size_t total_size = rect.width * rect.height * 3 * sizeof(unsigned char);
-    unsigned char* data = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < rect.height; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3; 
-        unsigned char* dstCursor = data + i * rect.width * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * rect.width);
-    }
+    int top_y = (top_index / feature_size);
+    int top_x = top_index - top_y * feature_size;
+    kpts_ys.push_back(top_y);
+    kpts_xs.push_back(top_x);
+  }

-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, rect.width, rect.height, w, h);
-    int wpad = target_size - w;
-    int hpad = target_size - h;
-    ncnn::Mat in_pad;
-    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
-    //数据预处理
-    in_pad.substract_mean_normalize(mean_vals, norm_vals);
+  for (int i = 0; i < num_joints; i++) {
+    float kpt_offset_x =
+        offset_data[kpts_ys[i] * feature_size * num_joints * 2 +
+                    kpts_xs[i] * num_joints * 2 + i * 2];
+    float kpt_offset_y =
+        offset_data[kpts_ys[i] * feature_size * num_joints * 2 +
+                    kpts_xs[i] * num_joints * 2 + i * 2 + 1];

-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
+    float x = (kpts_xs[i] + kpt_offset_y) * kpt_scale * target_size;
+    float y = (kpts_ys[i] + kpt_offset_x) * kpt_scale * target_size;

-    ex.input("input", in_pad);
+    ov::Keypoint kpt;
+    kpt.p = ov::Point2f((x - ((float)wpad / 2)) / scale + rect.x,
+                        (y - ((float)hpad / 2)) / scale + rect.y);
+    kpt.score = heatmap_data[kpts_ys[i] * feature_size * num_joints +
+                             kpts_xs[i] * num_joints + i];
+    keypoints->push_back(kpt);
+  }

-    ncnn::Mat regress, center, heatmap, offset;
-
-    ex.extract("regress", regress);
-    ex.extract("offset", offset);
-    ex.extract("heatmap", heatmap);
-    ex.extract("center", center);
-
-    float* center_data = (float*)center.data;
-    float* heatmap_data = (float*)heatmap.data;
-    float* offset_data = (float*)offset.data;
-
-    // int top_index = 0;
-    // float top_score = 0;
-
-    int top_index = int(ov::argmax(center_data, center_data+center.h));
-    float top_score = *std::max_element(center_data, center_data + center.h);
-
-    int ct_y = (top_index / feature_size);
-    int ct_x = top_index - ct_y * feature_size;
-
-    std::vector<float> y_regress(num_joints), x_regress(num_joints);
-    float* regress_data = (float*)regress.channel(ct_y).row(ct_x);
-    for (size_t i = 0; i < num_joints; i++)
-    {
-        y_regress[i] = regress_data[i] + (float)ct_y;
-        x_regress[i] = regress_data[i + num_joints] + (float)ct_x;
-    }
-
-    ncnn::Mat kpt_scores = ncnn::Mat(feature_size * feature_size, num_joints, sizeof(float));
-    float* scores_data = (float*)kpt_scores.data;
-    for (int i = 0; i < feature_size; i++)
-    {
-        for (int j = 0; j < feature_size; j++)
-        {
-            std::vector<float> score;
-            for (int c = 0; c < num_joints; c++)
-            {
-                float y = (dist_y[i][j] - y_regress[c]) * (dist_y[i][j] - y_regress[c]);
-                float x = (dist_x[i][j] - x_regress[c]) * (dist_x[i][j] - x_regress[c]);
-                float dist_weight = sqrt(y + x) + 1.8;
-                scores_data[c* feature_size * feature_size +i* feature_size +j] = heatmap_data[i * feature_size * num_joints + j * num_joints + c] / dist_weight;
-            }
-        }
-    }
-    std::vector<int> kpts_ys, kpts_xs;
-    for (int i = 0; i < num_joints; i++)
-    {
-        // top_index = 0;
-        // top_score = 0;
-        top_index = int(ov::argmax(scores_data + feature_size * feature_size *i, scores_data + feature_size * feature_size *(i+1)));
-        top_score = *std::max_element(scores_data + feature_size * feature_size * i, scores_data + feature_size * feature_size * (i + 1));
-
-        int top_y = (top_index / feature_size);
-        int top_x = top_index - top_y * feature_size;
-        kpts_ys.push_back(top_y);
-        kpts_xs.push_back(top_x);
-    }
-
-    for (int i = 0; i < num_joints; i++)
-    {
-        float kpt_offset_x = offset_data[kpts_ys[i] * feature_size * num_joints*2 + kpts_xs[i] * num_joints * 2 + i * 2];
-        float kpt_offset_y = offset_data[kpts_ys[i] * feature_size * num_joints * 2 + kpts_xs[i] * num_joints * 2 + i * 2+1];
-
-        float x = (kpts_xs[i] + kpt_offset_y) * kpt_scale * target_size;
-        float y = (kpts_ys[i] + kpt_offset_x) * kpt_scale * target_size;
-
-        ov::Keypoint kpt;
-        kpt.p = ov::Point2f((x - ((float)wpad / 2)) / scale + rect.x, (y - ((float)hpad / 2)) / scale + rect.y);
-        kpt.score = heatmap_data[kpts_ys[i] * feature_size * num_joints + kpts_xs[i] * num_joints + i];
-        keypoints->push_back(kpt);
-
-    }
-
-    free(data);
-    return 0;
+  // free(data);
+  return 0;
 }

-}
+} // namespace ovpose
--- a/src/pose/estimator/ultralight/ultralight.cpp
+++ b/src/pose/estimator/ultralight/ultralight.cpp
@@ -7,70 +7,71 @@

 namespace ovpose {

-int UltralightEstimator::ExtractKeypoints(const unsigned char* rgbdata, 
-    int img_width, int img_height,
-    const ov::Rect& rect, std::vector<ov::Keypoint>* keypoints) {
-	if (!initialized_) {
-		return 10000;
-	}
-	if (rgbdata == 0){
-		return 10001;
-	}
-    keypoints->clear();
-    int w = rect.width;
-    int h = rect.height;
-    
-    size_t total_size = w * h * 3 * sizeof(unsigned char);
-    unsigned char* data = (unsigned char*)malloc(total_size);
-    const unsigned char *start_ptr = rgbdata;
-#if defined(_OPENMP)
-#pragma omp parallel for num_threads(num_threads)
-#endif
-    for(size_t i = 0; i < h; ++i) {
-        const unsigned char* srcCursor = start_ptr + ((i + rect.y) * img_width + rect.x) * 3; 
-        unsigned char* dstCursor = data + i * w * 3;
-        memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * w);
-    }
-    ncnn::Mat in = ncnn::Mat::from_pixels_resize(data, ncnn::Mat::PIXEL_RGB, w, h, 192, 256);
-    //数据预处理
-    in.substract_mean_normalize(meanVals, normVals);
+int UltralightEstimator::ExtractKeypoints(
+    const unsigned char *rgbdata, int img_width, int img_height,
+    const ov::Rect &rect, std::vector<ov::Keypoint> *keypoints) {
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  keypoints->clear();
+  // int w = rect.width;
+  // int h = rect.height;

-    ncnn::Extractor ex = net_->create_extractor();
-    ex.set_light_mode(light_mode_);
-    ex.set_num_threads(num_threads);
-    ex.input("data", in);
-    ncnn::Mat out;
-    ex.extract("hybridsequential0_conv7_fwd", out);
-    for (int p = 0; p < out.c; p++)
-    {
-        const ncnn::Mat m = out.channel(p);
+  //     size_t total_size = w * h * 3 * sizeof(unsigned char);
+  //     unsigned char* data = (unsigned char*)malloc(total_size);
+  //     const unsigned char *start_ptr = rgbdata;
+  // #if defined(_OPENMP)
+  // #pragma omp parallel for num_threads(num_threads)
+  // #endif
+  //     for(size_t i = 0; i < h; ++i) {
+  //         const unsigned char* srcCursor = start_ptr + ((i + rect.y) *
+  //         img_width + rect.x) * 3; unsigned char* dstCursor = data + i * w *
+  //         3; memcpy(dstCursor, srcCursor, sizeof(unsigned char) * 3 * w);
+  //     }
+  //     ncnn::Mat in = ncnn::Mat::from_pixels_resize(data,
+  //     ncnn::Mat::PIXEL_RGB, w, h, 192, 256);

-        float max_prob = 0.f;
-        int max_x = 0;
-        int max_y = 0;
-        for (int y = 0; y < out.h; y++)
-        {
-            const float* ptr = m.row(y);
-            for (int x = 0; x < out.w; x++)
-            {
-                float prob = ptr[x];
-                if (prob > max_prob)
-                {
-                    max_prob = prob;
-                    max_x = x;
-                    max_y = y;
-                }
-            }
+  ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(
+      rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, rect.x, rect.y,
+      rect.width, rect.height, 192, 256);
+  in.substract_mean_normalize(meanVals, normVals);
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+  ex.input("data", in);
+  ncnn::Mat out;
+  ex.extract("hybridsequential0_conv7_fwd", out);
+  for (int p = 0; p < out.c; p++) {
+    const ncnn::Mat m = out.channel(p);
+
+    float max_prob = 0.f;
+    int max_x = 0;
+    int max_y = 0;
+    for (int y = 0; y < out.h; y++) {
+      const float *ptr = m.row(y);
+      for (int x = 0; x < out.w; x++) {
+        float prob = ptr[x];
+        if (prob > max_prob) {
+          max_prob = prob;
+          max_x = x;
+          max_y = y;
        }
-
-        ov::Keypoint keypoint;
-        keypoint.p = ov::Point2f(max_x * w / (float)out.w+rect.x, max_y * h / (float)out.h+rect.y);
-        keypoint.score = max_prob;
-        keypoints->push_back(keypoint);
+      }
    }

-    free(data);
-    return 0;
+    ov::Keypoint keypoint;
+    keypoint.p = ov::Point2f(max_x * rect.width / (float)out.w + rect.x,
+                             max_y * rect.height / (float)out.h + rect.y);
+    keypoint.score = max_prob;
+    keypoints->push_back(keypoint);
+  }
+
+  // free(data);
+  return 0;
 }

-}
+} // namespace ovpose
--- a/src/pose/segmentor/rvm/rvm.cpp
+++ b/src/pose/segmentor/rvm/rvm.cpp
@@ -69,14 +69,7 @@ int RVM::Matting(const unsigned char *rgbdata, int img_width, int img_height,
  for (int c = 0; c < 3; ++c) {
    float *pImage = matting.channel(c);
    for (int i = 0; i < target_width * target_height; i++) {
-      float alpha = pha[i];
-      if (alpha <= 0) {
-        alpha = 0;
-      } else if (alpha > 1) {
-        alpha = 1;
-      }
-      int value = 255 * alpha;
-      pImage[i] = value;
+      pImage[i] = pha[i] * 255;
    }
  }

@@ -132,6 +125,13 @@ int RVM::Merge(const unsigned char *rgbdata, int img_width, int img_height,
  ex.input("r2i", r2i);
  ex.input("r3i", r3i);
  ex.input("r4i", r4i);
+
+  ncnn::Mat r1o, r2o, r3o, r4o;
+  ex.extract("r4o", r4o);
+  ex.extract("r3o", r3o);
+  ex.extract("r2o", r2o);
+  ex.extract("r1o", r1o);
+
  ncnn::Mat fgr, pha;
  ex.extract("fgr", fgr);
  ex.extract("pha", pha);
@@ -140,18 +140,8 @@ int RVM::Merge(const unsigned char *rgbdata, int img_width, int img_height,
    float *pImage = matting.channel(c);
    for (int i = 0; i < target_width * target_height; i++) {
      float alpha = pha[i];
-      if (alpha <= 0) {
-        alpha = 0;
-      } else if (alpha > 1) {
-        alpha = 1;
-      }
-      float frv = fgr.channel(c)[i];
-      if (frv < 0) {
-        frv = 0;
-      } else if (frv > 1) {
-        frv = 1;
-      }
-      float value = fgr.channel(c)[i] * 255 * alpha;
+      float value =
+          fgr.channel(c)[i] * 255 * alpha + bg.channel(c)[i] * (1 - alpha);
      pImage[i] = value;
    }
  }
@@ -165,9 +155,12 @@ int RVM::Merge(const unsigned char *rgbdata, int img_width, int img_height,
  out->data = (unsigned char *)malloc(outimg.total());
  outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB);

-  // 4. update context (needed for video detection.)
-  context_is_update = false; // init state.
-  // this->update_context(ex);
+  r1i.clone_from(r1o); // deepcopy
+  r2i.clone_from(r2o); // deepcopy
+  r3i.clone_from(r3o); // deepcopy
+  r4i.clone_from(r4o); // deepcopy
+
+  context_is_update = true;

  return 0;
 }
--- a/src/tracker/lighttrack/lighttrack.cpp
+++ b/src/tracker/lighttrack/lighttrack.cpp
@@ -0,0 +1,299 @@
+#include "lighttrack.hpp"
+
+#include <math.h>
+
+#ifdef OV_VULKAN
+#include "gpu.h"
+#endif // OV_VULKAN
+
+namespace ovtracker {
+
+static std::vector<float> sc_f(std::vector<float> w, std::vector<float> h,
+                               float sz) {
+  std::vector<float> pad(16 * 16, 0);
+  std::vector<float> sz2;
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 16; j++) {
+      pad[i * 16 + j] = (w[i * 16 + j] + h[i * 16 + j]) * 0.5;
+    }
+  }
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 16; j++) {
+      float t = sqrt((w[i * 16 + j] + pad[i * 16 + j]) *
+                     (h[i * 16 + j] + pad[i * 16 + j])) /
+                sz;
+
+      sz2.push_back(std::max(t, (float)1.0 / t));
+    }
+  }
+
+  return sz2;
+}
+
+static std::vector<float> rc_f(std::vector<float> w, std::vector<float> h,
+                               ov::Point2f target_sz) {
+  float ratio = target_sz.x / target_sz.y;
+  std::vector<float> sz2;
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 16; j++) {
+      float t = ratio / (w[i * 16 + j] / h[i * 16 + j]);
+      sz2.push_back(std::max(t, (float)1.0 / t));
+    }
+  }
+
+  return sz2;
+}
+
+static float sz_whFun(ov::Point2f wh) {
+  float pad = (wh.x + wh.y) * 0.5;
+  float sz2 = (wh.x + pad) * (wh.y + pad);
+  return sqrt(sz2);
+}
+
+ncnn::Mat LightTrack::get_subwindow_tracking(ncnn::Mat im, ov::Point2f pos,
+                                             int model_sz, int original_sz) {
+  float c = (float)(original_sz + 1) / 2;
+  int context_xmin = round(pos.x - c);
+  int context_xmax = context_xmin + original_sz - 1;
+  int context_ymin = round(pos.y - c);
+  int context_ymax = context_ymin + original_sz - 1;
+
+  int left_pad = int(std::max(0, -context_xmin));
+  int top_pad = int(std::max(0, -context_ymin));
+  int right_pad = int(std::max(0, context_xmax - im.w + 1));
+  int bottom_pad = int(std::max(0, context_ymax - im.h + 1));
+
+  context_xmin += left_pad;
+  context_xmax += left_pad;
+  context_ymin += top_pad;
+  context_ymax += top_pad;
+  ncnn::Mat im_path_original;
+
+  if (top_pad > 0 || left_pad > 0 || right_pad > 0 || bottom_pad > 0) {
+    ncnn::Mat te_im =
+        ncnn::Mat(im.w + top_pad + bottom_pad, im.h + left_pad + right_pad, 3);
+    // te_im(cv::Rect(left_pad, top_pad, im.cols, im.rows)) = im;
+    ncnn::copy_make_border(im, te_im, top_pad, bottom_pad, left_pad, right_pad,
+                           ncnn::BORDER_CONSTANT, 0.f);
+    // im_path_original = te_im(cv::Rect(context_xmin, context_ymin,
+    // context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
+    ncnn::copy_cut_border(te_im, im_path_original, context_ymin,
+                          context_ymax + 1, context_xmin, context_xmax + 1);
+  } else {
+    // im_path_original = im(cv::Rect(context_xmin, context_ymin, context_xmax -
+    // context_xmin + 1, context_ymax - context_ymin + 1));
+    ncnn::copy_cut_border(im, im_path_original, context_ymin, context_ymax + 1,
+                          context_xmin, context_xmax + 1);
+  }
+
+  ncnn::Mat im_path;
+  ncnn::resize_bicubic(im_path_original, im_path, model_sz, model_sz);
+
+  return im_path;
+}
+
+ncnn::Mat LightTrack::get_template(ncnn::Mat templateImage, ov::Point2f pos) {
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 16; j++) {
+      grid_to_search_x[i * 16 + j] = j * 16;
+      grid_to_search_y[i * 16 + j] = i * 16;
+    }
+  }
+
+  std::vector<float> hanning(16, 0);
+  for (int i = 0; i < 16; i++) {
+    float w = 0.5 - 0.5 * cos(2 * M_PI * i / 15);
+    hanning[i] = w;
+  }
+  for (int i = 0; i < 16; i++) {
+
+    for (int j = 0; j < 16; j++) {
+      window[i * 16 + j] = hanning[i] * hanning[j];
+    }
+  }
+  float hc_z = target_sz.y + 0.5 * (target_sz.x + target_sz.y);
+  float wc_z = target_sz.x + 0.5 * (target_sz.x + target_sz.y);
+  float s_z = sqrt(wc_z * hc_z);
+  ncnn::Mat temp = get_subwindow_tracking(templateImage, pos, 127, s_z);
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+
+  ncnn::Mat ncnn_in1;
+  ncnn::resize_bicubic(temp, ncnn_in1, 127, 127);
+  ncnn_in1.substract_mean_normalize(mean_vals1, norm_vals1);
+
+  ex.input("input", ncnn_in1);
+  ncnn::Mat zf_;
+  ex.extract("745", zf_);
+
+  return zf_;
+}
+
+int LightTrack::init(const ov::Rect &rect, int screenWidth) {
+  if (!inited_) {
+
+    float scale = (double)temp.w / (double)screenWidth;
+    template_box.x = rect.x * scale;
+    template_box.y = rect.y * scale;
+    template_box.width = rect.width * scale;
+    template_box.height = rect.height * scale;
+    target_pos = ov::Point2f(template_box.x + (float)template_box.width / 2,
+                             template_box.y + (float)template_box.height / 2);
+    target_sz.x = template_box.width;
+    target_sz.y = template_box.height;
+
+    zf = get_template(temp, target_pos);
+    inited_ = true;
+  }
+
+  return 0;
+}
+
+void LightTrack::update(ncnn::Mat x_crop, ncnn::Mat zf_,
+                        ov::Point2f &target_pos_, ov::Point2f &target_sz_,
+                        float scale_z, std::vector<float> &cls_score) {
+  target_sz_.x = target_sz_.x * scale_z;
+  target_sz_.y = target_sz_.y * scale_z;
+
+  ncnn::Extractor ex = net_->create_extractor();
+  ex.set_light_mode(light_mode_);
+  ex.set_num_threads(num_threads);
+
+  ncnn::Mat ncnn_in1 = x_crop;
+  ncnn_in1.substract_mean_normalize(mean_vals1, norm_vals1);
+
+  ex.input("input", ncnn_in1);
+  ex.input("temp", zf_);
+
+  ncnn::Mat cls, reg;
+  ex.extract("cls", cls);
+  ex.extract("reg", reg);
+
+  float *cls_data = (float *)cls.data;
+  cls_score.clear();
+  for (int i = 0; i < 16 * 16; i++) {
+    cls_score.push_back(ov::sigmoid(cls_data[i]));
+  }
+
+  std::vector<float> pred_x1(16 * 16, 0), pred_y1(16 * 16, 0),
+      pred_x2(16 * 16, 0), pred_y2(16 * 16, 0);
+
+  float *reg_data1 = reg.channel(0);
+  float *reg_data2 = reg.channel(1);
+  float *reg_data3 = reg.channel(2);
+  float *reg_data4 = reg.channel(3);
+  for (int j = 0; j < 16; j++) {
+    for (int k = 0; k < 16; k++) {
+      pred_x1[j * 16 + k] =
+          grid_to_search_x[j * 16 + k] - reg_data1[j * 16 + k];
+      pred_y1[j * 16 + k] =
+          grid_to_search_y[j * 16 + k] - reg_data2[j * 16 + k];
+      pred_x2[j * 16 + k] =
+          grid_to_search_x[j * 16 + k] + reg_data3[j * 16 + k];
+      pred_y2[j * 16 + k] =
+          grid_to_search_y[j * 16 + k] + reg_data4[j * 16 + k];
+    }
+  }
+
+  std::vector<float> w(16 * 16, 0), h(16 * 16, 0);
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 16; j++) {
+      w[i * 16 + j] = pred_x2[i * 16 + j] - pred_x1[i * 16 + j];
+      h[i * 16 + j] = pred_y2[i * 16 + j] - pred_y1[i * 16 + j];
+    }
+  }
+
+  float sz_wh = sz_whFun(target_sz_);
+  std::vector<float> s_c = sc_f(w, h, sz_wh);
+  std::vector<float> r_c = rc_f(w, h, target_sz_);
+
+  std::vector<float> penalty(16 * 16, 0);
+  for (int i = 0; i < 16 * 16; i++) {
+    penalty[i] = exp(-1 * (s_c[i] * r_c[i] - 1) * 0.062);
+  }
+
+  std::vector<float> pscore(16 * 16, 0);
+  int r_max = 0, c_max = 0;
+  float maxScore = 0;
+  for (int i = 0; i < 16 * 16; i++) {
+    pscore[i] = (penalty[i] * cls_score[i]) * (1 - 0.225) + window[i] * 0.225;
+    if (pscore[i] > maxScore) {
+      maxScore = pscore[i];
+      r_max = floor(i / 16);
+      c_max = ((float)i / 16 - r_max) * 16;
+    }
+  }
+
+  float predx1 = pred_x1[r_max * 16 + c_max];
+  float predy1 = pred_y1[r_max * 16 + c_max];
+  float predx2 = pred_x2[r_max * 16 + c_max];
+  float predy2 = pred_y2[r_max * 16 + c_max];
+
+  float pred_xs = (predx1 + predx2) / 2;
+  float pred_ys = (predy1 + predy2) / 2;
+  float pred_w = predx2 - predx1;
+  float pred_h = predy2 - predy1;
+
+  float diff_xs = pred_xs - 256.f / 2;
+  float diff_ys = pred_ys - 256.f / 2;
+
+  diff_xs = diff_xs / scale_z;
+  diff_ys = diff_ys / scale_z;
+  pred_w = pred_w / scale_z;
+  pred_h = pred_h / scale_z;
+
+  target_sz_.x = target_sz_.x / scale_z;
+  target_sz_.y = target_sz_.y / scale_z;
+
+  float lr =
+      penalty[r_max * 16 + c_max] * cls_score[r_max * 16 + c_max] * 0.765;
+  float res_xs = target_pos_.x + diff_xs;
+  float res_ys = target_pos_.y + diff_ys;
+  float res_w = pred_w * lr + (1 - lr) * target_sz_.x;
+  float res_h = pred_h * lr + (1 - lr) * target_sz_.y;
+
+  target_pos_.x = res_xs;
+  target_pos_.y = res_ys;
+  target_sz_.x = target_sz_.x * (1 - lr) + lr * res_w;
+  target_sz_.y = target_sz_.y * (1 - lr) + lr * res_h;
+}
+
+int LightTrack::Track(const unsigned char *rgbdata, int img_width,
+                      int img_height, ov::Rect *rect) {
+
+  if (!initialized_) {
+    return 10000;
+  }
+  if (rgbdata == 0) {
+    return 10001;
+  }
+  init(*rect, img_width);
+  float hc_z = target_sz.y + 0.5 * (target_sz.x + target_sz.y);
+  float wc_z = target_sz.x + 0.5 * (target_sz.x + target_sz.y);
+  float s_z = sqrt(wc_z * hc_z);
+  float scale_z = 127. / s_z;
+  float d_search = (256. - 127.) / 2;
+  float pad = d_search / scale_z;
+  int s_x = round(s_z + 2 * pad);
+
+  ncnn::Mat img = ncnn::Mat::from_pixels(rgbdata, ncnn::Mat::PIXEL_RGB,
+                                         img_width, img_height);
+  ncnn::Mat x_crop = get_subwindow_tracking(img, target_pos, 256, s_x);
+
+  std::vector<float> cls_score;
+  update(x_crop, zf, target_pos, target_sz, scale_z, cls_score);
+
+  target_pos.x = std::max(0.f, std::min((float)img.w, target_pos.x));
+  target_pos.y = std::max(0.f, std::min((float)img.h, target_pos.y));
+  target_sz.x = std::max(10.f, std::min((float)img.w, target_sz.x));
+  target_sz.y = std::max(10.f, std::min((float)img.h, target_sz.y));
+  rect->x = target_pos.x - target_sz.x / 2;
+  rect->y = target_pos.y - target_sz.y / 2;
+  rect->width = target_sz.x;
+  rect->height = target_sz.y;
+  return 0;
+}
+
+} // namespace ovtracker
--- a/src/tracker/lighttrack/lighttrack.hpp
+++ b/src/tracker/lighttrack/lighttrack.hpp
@@ -0,0 +1,37 @@
+#ifndef _TRACKER_LIGHT_TRACK_H_
+#define _TRACKER_LIGHT_TRACK_H_
+
+#include "../tracker.hpp"
+#include "net.h"
+
+namespace ovtracker {
+
+class LightTrack : public Tracker {
+public:
+  int Track(const unsigned char *rgbdata, int img_width, int img_height,
+            ov::Rect *rect);
+
+private:
+  ncnn::Mat zf;
+  ov::Rect template_box;
+  ncnn::Mat temp;
+  std::vector<float> grid_to_search_x;
+  std::vector<float> grid_to_search_y;
+  std::vector<float> window;
+  ov::Point2f target_sz;
+  ov::Point2f target_pos;
+  const float mean_vals1[3] = {123.675f, 116.28f, 103.53f};
+  const float norm_vals1[3] = {0.01712475f, 0.0175f, 0.01742919f};
+
+  bool inited_;
+
+  int init(const ov::Rect &rect, int screenWidth);
+  ncnn::Mat get_template(ncnn::Mat templateImage, ov::Point2f pos);
+  ncnn::Mat get_subwindow_tracking(ncnn::Mat im, ov::Point2f pos, int model_sz,
+                                   int original_sz);
+  void update(ncnn::Mat x_crop, ncnn::Mat zf_, ov::Point2f &target_pos_,
+              ov::Point2f &target_sz_, float scale_z,
+              std::vector<float> &cls_score);
+};
+} // namespace ovtracker
+#endif // !_TRACKER_LIGHT_TRACK_H_
--- a/src/tracker/tracker.cpp
+++ b/src/tracker/tracker.cpp
@@ -0,0 +1,14 @@
+#include "tracker.h"
+#include "lighttrack/lighttrack.hpp"
+
+ITracker new_light_track() { return new ovtracker::LightTrack(); }
+
+int track_object(ITracker d, const unsigned char *rgbdata, int img_width,
+                 int img_height, Rect *rect) {
+  int ret = static_cast<ovtracker::Tracker *>(d)->Track(rgbdata, img_width,
+                                                        img_height, rect);
+  if (ret != 0) {
+    return ret;
+  }
+  return 0;
+}
--- a/src/tracker/tracker.h
+++ b/src/tracker/tracker.h
@@ -0,0 +1,17 @@
+#ifndef _TRACKER_C_H_
+#define _TRACKER_C_H_
+
+#include "../common/common.h"
+
+#ifdef __cplusplus
+#include "tracker.hpp"
+extern "C" {
+#endif
+typedef void *ITracker;
+ITracker new_light_track();
+int track_object(ITracker d, const unsigned char *rgbdata, int img_width,
+                 int img_height, Rect *rect);
+#ifdef __cplusplus
+}
+#endif
+#endif // !_TRACKER_C_H_
--- a/src/tracker/tracker.hpp
+++ b/src/tracker/tracker.hpp
@@ -0,0 +1,13 @@
+#ifndef _TRACKER_H_
+#define _TRACKER_H_
+
+#include "../common/common.hpp"
+
+namespace ovtracker {
+class Tracker : public ov::Estimator {
+public:
+  virtual int Track(const unsigned char *rgbdata, int img_width, int img_height,
+                    ov::Rect *rect) = 0;
+};
+} // namespace ovtracker
+#endif // !_TRACKER_H_