feat(pose): add pose segmentor

2025-10-06 14:16:52 +08:00 · 2021-11-05 14:06:55 +08:00
parent ede2cbdba6
commit 1997bf670e
21 changed files with 679 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -40,6 +40,9 @@ cmake .. # optional -DNCNN_VULKAN=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COM
  - estimator (for pose estimation)
    - ultralight [Google Drive](https://drive.google.com/drive/folders/15b-I5HDyGe2WLb-TO85SJYmnYONvGOKh?usp=sharing)
    - movenet [Google Drive](https://drive.google.com/drive/folders/14zgKk0tro1kjRrSTs0EAlEKrV8Q4XA34?usp=sharing)
+  - segmentor (for pose segmentation)
+    - deeplabv3plus [Google Drive](https://drive.google.com/drive/folders/1BjiK0IiVAfyX30OoyQzoe1YBzvSudghG?usp=sharing)
+    - erdnet [Google Drive](https://drive.google.com/drive/folders/1WkQqYT9D4oGL6Gubu0SOeqcZmrdZ5cNw?usp=sharing)
 - hand
  - detector (for hand detect)
    - yolox [Google Drive](https://drive.google.com/drive/folders/1lNm5X6DJ1ZXVaqg54rXnRhvPfC5lAxlH?usp=sharing)
--- a/go/common/image.go
+++ b/go/common/image.go
@@ -92,7 +92,11 @@ func NewImageFromBytes(data []byte, w int, h int, channels int) (image.Image, er
 	for y := 0; y < h; y++ {
 		for x := 0; x < w; x++ {
 			pos := (y*w + x) * channels
-			img.SetRGBA(x, y, color.RGBA{uint8(data[pos]), uint8(data[pos+1]), uint8(data[pos+2]), 255})
+			var alpha byte = 255
+			if channels == 4 {
+				alpha = data[pos+3]
+			}
+			img.SetRGBA(x, y, color.RGBA{uint8(data[pos]), uint8(data[pos+1]), uint8(data[pos+2]), uint8(alpha)})
 		}
 	}
 	return img, nil
--- a/go/examples/poseseg/main.go
+++ b/go/examples/poseseg/main.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"image"
+	"image/jpeg"
+	"log"
+	"os"
+	"os/user"
+	"path/filepath"
+	"strings"
+
+	"github.com/bububa/openvision/go/common"
+	"github.com/bububa/openvision/go/pose/segmentor"
+)
+
+func main() {
+	wd, _ := os.Getwd()
+	dataPath := cleanPath(wd, "~/go/src/github.com/bububa/openvision/data")
+	imgPath := filepath.Join(dataPath, "./images")
+	modelPath := filepath.Join(dataPath, "./models")
+	common.CreateGPUInstance()
+	defer common.DestroyGPUInstance()
+	cpuCores := common.GetBigCPUCount()
+	common.SetOMPThreads(cpuCores)
+	log.Printf("CPU big cores:%d\n", cpuCores)
+	for idx, seg := range []segmentor.Segmentor{
+		deeplabv3(modelPath),
+		erdnet(modelPath),
+	} {
+		defer seg.Destroy()
+		common.SetEstimatorThreads(seg, cpuCores)
+		matting(seg, imgPath, "ultralight-pose2.jpg", idx)
+		merge(seg, imgPath, "ultralight-pose2.jpg", "bg2.jpg", idx)
+	}
+}
+
+func deeplabv3(modelPath string) segmentor.Segmentor {
+	modelPath = filepath.Join(modelPath, "deeplabv3plus")
+	d := segmentor.NewDeeplabv3plus()
+	if err := d.LoadModel(modelPath); err != nil {
+		log.Fatalln(err)
+	}
+	return d
+}
+
+func erdnet(modelPath string) segmentor.Segmentor {
+	modelPath = filepath.Join(modelPath, "erdnet")
+	d := segmentor.NewERDNet()
+	if err := d.LoadModel(modelPath); err != nil {
+		log.Fatalln(err)
+	}
+	return d
+}
+
+func matting(seg segmentor.Segmentor, imgPath string, filename string, idx int) {
+	inPath := filepath.Join(imgPath, filename)
+	imgLoaded, err := loadImage(inPath)
+	if err != nil {
+		log.Fatalln("load image failed,", err)
+	}
+	img := common.NewImage(imgLoaded)
+	out, err := seg.Matting(img)
+	if err != nil {
+		log.Fatalln(err)
+	}
+	outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("poseseg-matting-%d-%s", idx, filename))
+	if err := saveImage(out, outPath); err != nil {
+		log.Fatalln(err)
+	}
+
+}
+
+func merge(seg segmentor.Segmentor, imgPath string, filename string, bgFilename string, idx int) {
+	inPath := filepath.Join(imgPath, filename)
+	imgLoaded, err := loadImage(inPath)
+	if err != nil {
+		log.Fatalln("load image failed,", err)
+	}
+	img := common.NewImage(imgLoaded)
+	bgPath := filepath.Join(imgPath, bgFilename)
+	bgLoaded, err := loadImage(bgPath)
+	if err != nil {
+		log.Fatalln("load bg image failed,", err)
+	}
+	bg := common.NewImage(bgLoaded)
+	out, err := seg.Merge(img, bg)
+	if err != nil {
+		log.Fatalln(err)
+	}
+	outPath := filepath.Join(imgPath, "./results", fmt.Sprintf("poseseg-merge-%d-%s", idx, filename))
+	if err := saveImage(out, outPath); err != nil {
+		log.Fatalln(err)
+	}
+
+}
+
+func loadImage(filePath string) (image.Image, error) {
+	fn, err := os.Open(filePath)
+	if err != nil {
+		return nil, err
+	}
+	defer fn.Close()
+	img, _, err := image.Decode(fn)
+	if err != nil {
+		return nil, err
+	}
+	return img, nil
+}
+
+func saveImage(img image.Image, filePath string) error {
+	buf := new(bytes.Buffer)
+	if err := jpeg.Encode(buf, img, nil); err != nil {
+		return err
+	}
+	fn, err := os.Create(filePath)
+	if err != nil {
+		return err
+	}
+	defer fn.Close()
+	fn.Write(buf.Bytes())
+	return nil
+}
+
+func cleanPath(wd string, path string) string {
+	usr, _ := user.Current()
+	dir := usr.HomeDir
+	if path == "~" {
+		return dir
+	} else if strings.HasPrefix(path, "~/") {
+		return filepath.Join(dir, path[2:])
+	}
+	return filepath.Join(wd, path)
+}
--- a/go/pose/segmentor/cgo.go
+++ b/go/pose/segmentor/cgo.go
@@ -0,0 +1,11 @@
+// +build !vulkan
+
+package segmentor
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
+*/
+import "C"
--- a/go/pose/segmentor/cgo_vulkan.go
+++ b/go/pose/segmentor/cgo_vulkan.go
@@ -0,0 +1,11 @@
+// +build vulkan
+
+package segmentor
+
+/*
+#cgo CXXFLAGS:   --std=c++11 -fopenmp
+#cgo CPPFLAGS:   -I ${SRCDIR}/../../../include -I /usr/local/include
+#cgo LDFLAGS: -lstdc++ -lncnn -lomp -lopenvision -lglslang -lvulkan -lSPIRV -lOGLCompiler -lMachineIndependent -lGenericCodeGen -lOSDependent
+#cgo LDFLAGS: -L /usr/local/lib -L ${SRCDIR}/../../../lib
+*/
+import "C"
--- a/go/pose/segmentor/deeplabv3plus.go
+++ b/go/pose/segmentor/deeplabv3plus.go
@@ -0,0 +1,51 @@
+package segmentor
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/pose/segmentor.h"
+*/
+import "C"
+import (
+	"image"
+	"unsafe"
+
+	"github.com/bububa/openvision/go/common"
+)
+
+// Deeplabv3plus represents deeplabv3plus segmentor
+type Deeplabv3plus struct {
+	d C.IPoseSegmentor
+}
+
+// NewDeeplabv3plus returns a new Deeplabv3plus
+func NewDeeplabv3plus() *Deeplabv3plus {
+	return &Deeplabv3plus{
+		d: C.new_deeplabv3plus_pose_segmentor(),
+	}
+}
+
+// Destroy free segmentor
+func (d *Deeplabv3plus) Destroy() {
+	common.DestroyEstimator(d)
+}
+
+// Pointer implement Estimator interface
+func (d *Deeplabv3plus) Pointer() unsafe.Pointer {
+	return unsafe.Pointer(d.d)
+}
+
+// LoadModel load model for detecter
+func (d *Deeplabv3plus) LoadModel(modelPath string) error {
+	return common.EstimatorLoadModel(d, modelPath)
+}
+
+// Matting implement Segmentor interface
+func (d *Deeplabv3plus) Matting(img *common.Image) (image.Image, error) {
+	return Matting(d, img)
+}
+
+// Merge implement Segmentor interface
+func (d *Deeplabv3plus) Merge(img *common.Image, bg *common.Image) (image.Image, error) {
+	return Merge(d, img, bg)
+}
--- a/go/pose/segmentor/doc.go
+++ b/go/pose/segmentor/doc.go
@@ -0,0 +1,2 @@
+// Package segmentor pose estimator
+package segmentor
--- a/go/pose/segmentor/erdnet.go
+++ b/go/pose/segmentor/erdnet.go
@@ -0,0 +1,51 @@
+package segmentor
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/pose/segmentor.h"
+*/
+import "C"
+import (
+	"image"
+	"unsafe"
+
+	"github.com/bububa/openvision/go/common"
+)
+
+// ERDNet represents erdnet segmentor
+type ERDNet struct {
+	d C.IPoseSegmentor
+}
+
+// NewERDNet returns a new ERDNet
+func NewERDNet() *ERDNet {
+	return &ERDNet{
+		d: C.new_erdnet_pose_segmentor(),
+	}
+}
+
+// Destroy free segmentor
+func (d *ERDNet) Destroy() {
+	common.DestroyEstimator(d)
+}
+
+// Pointer implement Estimator interface
+func (d *ERDNet) Pointer() unsafe.Pointer {
+	return unsafe.Pointer(d.d)
+}
+
+// LoadModel load model for detecter
+func (d *ERDNet) LoadModel(modelPath string) error {
+	return common.EstimatorLoadModel(d, modelPath)
+}
+
+// Matting implement Segmentor interface
+func (d *ERDNet) Matting(img *common.Image) (image.Image, error) {
+	return Matting(d, img)
+}
+
+// Merge implement Segmentor interface
+func (d *ERDNet) Merge(img *common.Image, bg *common.Image) (image.Image, error) {
+	return Merge(d, img, bg)
+}
--- a/go/pose/segmentor/segmentor.go
+++ b/go/pose/segmentor/segmentor.go
@@ -0,0 +1,65 @@
+package segmentor
+
+/*
+#include <stdlib.h>
+#include <stdbool.h>
+#include "openvision/common/common.h"
+#include "openvision/pose/segmentor.h"
+*/
+import "C"
+import (
+	"image"
+	"unsafe"
+
+	openvision "github.com/bububa/openvision/go"
+	"github.com/bububa/openvision/go/common"
+)
+
+// Segmentor represents segmentor interface
+type Segmentor interface {
+	common.Estimator
+	Matting(img *common.Image) (image.Image, error)
+	Merge(img *common.Image, bg *common.Image) (image.Image, error)
+}
+
+// Matting returns pose segment matting image
+func Matting(d Segmentor, img *common.Image) (image.Image, error) {
+	imgWidth := img.WidthF64()
+	imgHeight := img.HeightF64()
+	data := img.Bytes()
+	outImgC := common.NewCImage()
+	defer common.FreeCImage(outImgC)
+	errCode := C.pose_segment_matting(
+		(C.IPoseSegmentor)(d.Pointer()),
+		(*C.uchar)(unsafe.Pointer(&data[0])),
+		C.int(imgWidth),
+		C.int(imgHeight),
+		(*C.Image)(unsafe.Pointer(outImgC)))
+	if errCode != 0 {
+		return nil, openvision.DetectPoseError(int(errCode))
+	}
+	return common.GoImage(outImgC)
+}
+
+// Merge merge pose with background
+func Merge(d Segmentor, img *common.Image, bg *common.Image) (image.Image, error) {
+	imgWidth := img.WidthF64()
+	imgHeight := img.HeightF64()
+	data := img.Bytes()
+	bgWidth := bg.Width()
+	bgHeight := bg.Height()
+	bgdata := bg.Bytes()
+	outImgC := common.NewCImage()
+	defer common.FreeCImage(outImgC)
+	errCode := C.pose_segment_merge(
+		(C.IPoseSegmentor)(d.Pointer()),
+		(*C.uchar)(unsafe.Pointer(&data[0])),
+		C.int(imgWidth), C.int(imgHeight),
+		(*C.uchar)(unsafe.Pointer(&bgdata[0])),
+		C.int(bgWidth), C.int(bgHeight),
+		(*C.Image)(unsafe.Pointer(outImgC)))
+	if errCode != 0 {
+		return nil, openvision.DetectPoseError(int(errCode))
+	}
+	return common.GoImage(outImgC)
+}
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -68,6 +68,7 @@ target_include_directories(openvision
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/estimator>
+        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pose/segmentor>
 )

 #install(TARGETS openvision EXPORT openvision ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH})
@@ -96,6 +97,7 @@ file(COPY
 file(COPY
    ${CMAKE_CURRENT_SOURCE_DIR}/pose/detecter.h
    ${CMAKE_CURRENT_SOURCE_DIR}/pose/estimator.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/pose/segmentor.h
    DESTINATION ${INCLUDE_OUTPUT_PATH}/openvision/pose
 )

--- a/src/common/common.cpp
+++ b/src/common/common.cpp
@@ -130,6 +130,8 @@ Estimator::Estimator() : EstimatorBase() {
    if (num_threads > 0) {
        net_->opt.num_threads = num_threads;
    }
+    net_->opt.blob_allocator = &blob_allocator_;
+    net_->opt.workspace_allocator = &workspace_allocator_;
 #ifdef OV_VULKAN
    net_->opt.use_vulkan_compute = true;
 #endif // OV_VULKAN
@@ -139,6 +141,8 @@ Estimator::~Estimator() {
    if (net_) {
        net_->clear();
    }
+    workspace_allocator_.clear();
+    blob_allocator_.clear();
 }

 int Estimator::LoadModel(const char * root_path) {
--- a/src/common/common.hpp
+++ b/src/common/common.hpp
@@ -30,6 +30,8 @@ public:
    virtual void set_light_mode(bool mode);
 protected:
    ncnn::Net* net_;
+    ncnn::PoolAllocator workspace_allocator_;
+    ncnn::UnlockedPoolAllocator blob_allocator_;
    bool initialized_ = false;
    bool light_mode_ = true;
 };
--- a/src/pose/estimator/estimator.cpp
+++ b/src/pose/estimator/estimator.cpp
@@ -30,7 +30,4 @@ Estimator* UltralightEstimatorFactory::CreateEstimator() {
    return new UltralightEstimator();
 }

-Estimator* MoveNetFactory::CreateEstimator(const int model_type) {
-    return new MoveNet(model_type);
-}
 }
--- a/src/pose/estimator/estimator.hpp
+++ b/src/pose/estimator/estimator.hpp
@@ -25,11 +25,5 @@ public:
    Estimator* CreateEstimator();
 };

-class MoveNetFactory: public EstimatorFactory {
-public:
-    MoveNetFactory(const int model_type) {}
-    ~MoveNetFactory() {}
-    Estimator* CreateEstimator(const int model_type);
-};
 }
 #endif // !_POSE_ESTIMATOR_H
--- a/src/pose/segmentor.h
+++ b/src/pose/segmentor.h
@@ -0,0 +1,24 @@
+#ifndef _POSE_SEGMENTOR_C_H_
+#define _POSE_SEGMENTOR_C_H_
+
+#include "../common/common.h"
+
+#ifdef __cplusplus
+#include "segmentor/segmentor.hpp"
+extern "C" {
+#endif
+    typedef void* IPoseSegmentor;
+    IPoseSegmentor new_deeplabv3plus_pose_segmentor();
+    IPoseSegmentor new_erdnet_pose_segmentor();
+    int pose_segment_matting(IPoseSegmentor s, const unsigned char* rgbdata,
+        int img_width, int img_height,
+        Image* out);
+    int pose_segment_merge(IPoseSegmentor s, const unsigned char* rgbdata,
+        int img_width, int img_height,
+        const unsigned char* bgdata,
+        int bg_width, int bg_height,
+        Image* out);
+#ifdef __cplusplus
+}
+#endif
+#endif // !_POSE_SEGMENTOR_C_H_
--- a/src/pose/segmentor/deeplabv3plus/deeplabv3plus.cpp
+++ b/src/pose/segmentor/deeplabv3plus/deeplabv3plus.cpp
@@ -0,0 +1,87 @@
+#include "deeplabv3plus.hpp"
+
+#ifdef OV_VULKAN
+#include "gpu.h"
+#endif // OV_VULKAN
+
+namespace ovpose {
+
+int Deeplabv3plusSegmentor::Matting(const unsigned char* rgbdata,
+    int img_width, int img_height,
+    Image* out) {
+
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, target_size, target_size);
+    ncnn::Mat matting = ncnn::Mat(target_size, target_size, 3);
+
+    in.substract_mean_normalize(mean_vals, norm_vals);
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+
+    ex.input("input", in);
+    ncnn::Mat output;
+    ex.extract("output", output);
+
+    const float* pCls0 = output.channel(0);
+    const float* pCls1 = output.channel(1);
+
+    for (int c = 0; c < 3; c++)
+    {
+        float* pImage = matting.channel(c);
+        for (int i = 0; i < target_size*target_size; i++){
+            pImage[i] = pCls0[i] < pCls1[i] ? 255 : pImage[i] * 0;
+        }
+    }
+    ncnn::Mat outimg;
+    ncnn::resize_bicubic(matting, outimg, img_width, img_height);
+    out->width = outimg.w;
+    out->height = outimg.h;
+    out->channels = outimg.c;
+    out->data = (unsigned char*)malloc(outimg.total());
+    outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB); 
+
+    return 0;
+}
+
+int Deeplabv3plusSegmentor::Merge(const unsigned char* rgbdata,
+    int img_width, int img_height,
+    const unsigned char*bgdata,
+    int bg_width, int bg_height,
+    Image* out) {
+
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, target_size, target_size);
+    ncnn::Mat bg = ncnn::Mat::from_pixels_resize(bgdata, ncnn::Mat::PIXEL_RGB, bg_width, bg_height, target_size, target_size);
+    ncnn::Mat matting = in.clone(); 
+
+    in.substract_mean_normalize(mean_vals, norm_vals);
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+
+    ex.input("input", in);
+    ncnn::Mat output;
+    ex.extract("output", output);
+
+    const float* pCls0 = output.channel(0);
+    const float* pCls1 = output.channel(1);
+
+    for (int c = 0; c < 3; c++)
+    {
+        float* pImage = matting.channel(c);
+        for (int i = 0; i < target_size*target_size; i++){
+            if (pCls0[i] >= pCls1[i]) {
+                pImage[i] = bg.channel(c)[i];
+            }
+        }
+    }
+    ncnn::Mat outimg;
+    ncnn::resize_bicubic(matting, outimg, img_width, img_height);
+    out->width = outimg.w;
+    out->height = outimg.h;
+    out->channels = outimg.c;
+    out->data = (unsigned char*)malloc(outimg.total());
+    outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB); 
+
+    return 0;
+}
+}
--- a/src/pose/segmentor/deeplabv3plus/deeplabv3plus.hpp
+++ b/src/pose/segmentor/deeplabv3plus/deeplabv3plus.hpp
@@ -0,0 +1,26 @@
+#ifndef _POSE_SEGMENTOR_DEEPLABV3PLUS_H_
+#define _POSE_SEGMENTOR_DEEPLABV3PLUS_H_
+
+#include "../segmentor.hpp"
+#include "net.h"
+
+namespace ovpose {
+
+class Deeplabv3plusSegmentor : public Segmentor  {
+public:
+    virtual int Matting(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        Image* out);
+    virtual int Merge(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        const unsigned char* bgdata,
+        int bg_width, int bg_height,
+        Image* out);
+private:
+    const int target_size = 640;
+    const float mean_vals[3] = {0.45734706f * 255.f, 0.43338275f * 255.f, 0.40058118f*255.f};
+    const float norm_vals[3] = {1/0.23965294/255.f, 1/0.23532275/255.f, 1/0.2398498/255.f};
+};
+
+}
+#endif // !_POSE_SEGMENTOR_DEEPLABV3PLUS_H_
--- a/src/pose/segmentor/erdnet/erdnet.cpp
+++ b/src/pose/segmentor/erdnet/erdnet.cpp
@@ -0,0 +1,87 @@
+#include "erdnet.hpp"
+
+#ifdef OV_VULKAN
+#include "gpu.h"
+#endif // OV_VULKAN
+
+namespace ovpose {
+
+int ERDNetSegmentor::Matting(const unsigned char* rgbdata,
+    int img_width, int img_height,
+    Image* out) {
+
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, target_size, target_size);
+    ncnn::Mat matting = ncnn::Mat(target_size, target_size, 3);
+
+    in.substract_mean_normalize(mean_vals, norm_vals);
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+
+    ex.input("input_blob1", in);
+    ncnn::Mat output;
+    ex.extract("sigmoid_blob1", output);
+
+    for (int c = 0; c < 3; ++c) {
+        float* pImage = matting.channel(c);
+        for (int i = 0; i < target_size*target_size; i++) {
+            const float alpha = output[i];
+            float value = 255 * alpha;
+            value = std::max(std::min(value, 255.f), 0.f);
+            pImage[i] = value;
+        }
+    }
+
+    ncnn::Mat outimg;
+    ncnn::resize_bicubic(matting, outimg, img_width, img_height);
+
+    out->width = outimg.w;
+    out->height = outimg.h;
+    out->channels = outimg.c;
+    out->data = (unsigned char*)malloc(outimg.total());
+    outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB); 
+    return 0;
+}
+
+int ERDNetSegmentor::Merge(const unsigned char* rgbdata,
+    int img_width, int img_height,
+    const unsigned char* bgdata,
+    int bg_width, int bg_height,
+    Image* out) {
+
+    ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB, img_width, img_height, target_size, target_size);
+    ncnn::Mat bg = ncnn::Mat::from_pixels_resize(bgdata, ncnn::Mat::PIXEL_RGB, bg_width, bg_height, target_size, target_size);
+    ncnn::Mat matting = in.clone();
+
+    in.substract_mean_normalize(mean_vals, norm_vals);
+    ncnn::Extractor ex = net_->create_extractor();
+    ex.set_light_mode(light_mode_);
+    ex.set_num_threads(num_threads);
+
+    ex.input("input_blob1", in);
+    ncnn::Mat output;
+    ex.extract("sigmoid_blob1", output);
+
+    for (int c = 0; c < 3; ++c) {
+        float* pImage = matting.channel(c);
+        for (int i = 0; i < target_size*target_size; i++) {
+            const float alpha = output[i];
+            float value = pImage[i] * alpha + bg.channel(c)[i] * (1 - alpha);
+            value = std::max(std::min(value, 255.f), 0.f);
+            pImage[i] = value;
+        }
+    }
+
+    ncnn::Mat outimg;
+    ncnn::resize_bicubic(matting, outimg, img_width, img_height);
+
+    out->width = outimg.w;
+    out->height = outimg.h;
+    out->channels = outimg.c;
+    out->data = (unsigned char*)malloc(outimg.total());
+    outimg.to_pixels(out->data, ncnn::Mat::PIXEL_RGB); 
+
+    return 0;
+}
+
+}
--- a/src/pose/segmentor/erdnet/erdnet.hpp
+++ b/src/pose/segmentor/erdnet/erdnet.hpp
@@ -0,0 +1,26 @@
+#ifndef _POSE_SEGMENTOR_ERDNET_H_
+#define _POSE_SEGMENTOR_ERDNET_H_
+
+#include "../segmentor.hpp"
+#include "net.h"
+
+namespace ovpose {
+
+class ERDNetSegmentor : public Segmentor  {
+public:
+    virtual int Matting(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        Image* out);
+    virtual int Merge(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        const unsigned char* bgdata,
+        int bg_width, int bg_height,
+        Image* out);
+private:
+    const int target_size = 256;
+    const float mean_vals[3] = {104.f, 112.f, 121.f};
+    const float norm_vals[3] = {1.f/255.f, 1.f/255.f, 1.f/255.f};
+};
+
+}
+#endif // !_POSE_SEGMENTOR_ERDNET_H_
--- a/src/pose/segmentor/segmentor.cpp
+++ b/src/pose/segmentor/segmentor.cpp
@@ -0,0 +1,45 @@
+#include "../segmentor.h"
+#include "deeplabv3plus/deeplabv3plus.hpp"
+#include "erdnet/erdnet.hpp"
+
+IPoseSegmentor new_deeplabv3plus_pose_segmentor() {
+    return new ovpose::Deeplabv3plusSegmentor();
+}
+
+IPoseSegmentor new_erdnet_pose_segmentor() {
+    return new ovpose::ERDNetSegmentor();
+}
+
+int pose_segment_matting(IPoseSegmentor s, const unsigned char* rgbdata,
+    int img_width, int img_height,
+    Image* out) {
+    int ret = static_cast<ovpose::Segmentor*>(s)->Matting(rgbdata, img_width, img_height, out);
+    if (ret != 0) {
+        return ret;
+    }
+    return 0;
+}
+
+int pose_segment_merge(IPoseSegmentor s, const unsigned char* rgbdata,
+    int img_width, int img_height,
+    const unsigned char* bgdata,
+    int bg_width, int bg_height,
+    Image* out) {
+    int ret = static_cast<ovpose::Segmentor*>(s)->Merge(rgbdata, img_width, img_height, bgdata, bg_width, bg_height, out);
+    if (ret != 0) {
+        return ret;
+    }
+    return 0;
+}
+
+namespace ovpose {
+
+Segmentor* Deeplabv3plusFactory::CreateSegmentor() {
+    return new Deeplabv3plusSegmentor();
+}
+
+Segmentor* ERDNetFactory::CreateSegmentor() {
+    return new ERDNetSegmentor();
+}
+
+}
--- a/src/pose/segmentor/segmentor.hpp
+++ b/src/pose/segmentor/segmentor.hpp
@@ -0,0 +1,42 @@
+#ifndef _POSE_SEGMENTOR_H_
+#define _POSE_SEGMENTOR_H_
+
+#include "../../common/common.h"
+
+namespace ovpose {
+
+class Segmentor: public ov::Estimator {
+public:
+    virtual int Matting(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        Image* out) = 0;
+    virtual int Merge(const unsigned char* rgbdata,
+        int img_width, int img_height,
+        const unsigned char* bgdata,
+        int bg_width, int bg_height,
+        Image* out) = 0;
+};
+
+class SegmentorFactory {
+public:
+    virtual Segmentor* CreateSegmentor() = 0;
+    virtual ~SegmentorFactory() {};
+};
+
+class Deeplabv3plusFactory: public SegmentorFactory{
+public:
+    Deeplabv3plusFactory() {}
+    ~Deeplabv3plusFactory() {}
+    Segmentor* CreateSegmentor();
+};
+
+class ERDNetFactory: public SegmentorFactory{
+public:
+    ERDNetFactory() {}
+    ~ERDNetFactory() {}
+    Segmentor* CreateSegmentor();
+};
+
+}
+
+#endif //!_POSE_SEGMENTOR_H_