Optimize ToI420 conversion by using sync.Pool (#473)

Added sync.Pool to the i420 conversion to minimize overhead of creating new byte slices
This commit is contained in:
Abdarrakhman Akhmetgali
2023-03-07 02:53:44 +06:00
committed by GitHub
parent d561715bf9
commit dbd37689e4
10 changed files with 73 additions and 20 deletions

View File

@@ -64,10 +64,11 @@ func (e *encoder) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
imgReal := img.(*image.YCbCr)
var y, cb, cr C.Slice
y.data = (*C.uchar)(&imgReal.Y[0])

View File

@@ -65,10 +65,11 @@ func (e *encoder) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
yuvImg := img.(*image.YCbCr)
bounds := yuvImg.Bounds()

View File

@@ -304,10 +304,11 @@ func (e *encoderVP8) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
yuvImg := img.(*image.YCbCr)
kf := e.frameCnt%e.params.KeyFrameInterval == 0

View File

@@ -293,10 +293,11 @@ func (e *encoderVP9) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
yuvImg := img.(*image.YCbCr)
kf := e.frameCnt%e.params.KeyFrameInterval == 0

View File

@@ -219,10 +219,11 @@ func (e *encoder) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
yuvImg := img.(*image.YCbCr)
bounds := yuvImg.Bounds()
height := C.int(bounds.Dy())

View File

@@ -102,10 +102,11 @@ func (e *encoder) Read() ([]byte, func(), error) {
return nil, func() {}, io.EOF
}
img, _, err := e.r.Read()
img, release, err := e.r.Read()
if err != nil {
return nil, func() {}, err
}
defer release()
yuvImg := img.(*image.YCbCr)
var rc C.int

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"image"
"image/color"
"sync"
)
// imageToYCbCr converts src to *image.YCbCr and store it to dst
@@ -60,29 +61,61 @@ func imageToYCbCr(dst *image.YCbCr, src image.Image) {
}
}
// bytePool stores slices to be reused
// New method is not set as the slice size
// should be allocated according to subsample ratio
var bytesPool sync.Pool
// ToI420 converts r to a new reader that will output images in I420 format
func ToI420(r Reader) Reader {
var yuvImg image.YCbCr
getSlice := func(cLen int) []uint8 {
// Retrieve slice from pool
dst, ok := bytesPool.Get().([]byte)
// Compare value or capacity of retrieved object
// If less than expected, reallocate new object
if !ok || cap(dst) < 2*cLen {
// Allocating memory for Cb and Cr
dst = make([]byte, 2*cLen, 2*cLen)
}
return dst
}
return ReaderFunc(func() (image.Image, func(), error) {
img, _, err := r.Read()
if err != nil {
return nil, func() {}, err
}
var releaseFunc func() = func() {}
imageToYCbCr(&yuvImg, img)
// Covert pixel format to I420
switch yuvImg.SubsampleRatio {
case image.YCbCrSubsampleRatio420:
case image.YCbCrSubsampleRatio444:
yuvImg = i444ToI420(yuvImg)
cLen := yuvImg.CStride * yuvImg.Rect.Dy() / 4
dst := getSlice(cLen)
yuvImg = i444ToI420(yuvImg, dst)
releaseFunc = func() {
bytesPool.Put(dst)
}
case image.YCbCrSubsampleRatio422:
yuvImg = i422ToI420(yuvImg)
cLen := yuvImg.CStride * (yuvImg.Rect.Dy() / 2)
dst := getSlice(cLen)
yuvImg = i422ToI420(yuvImg, dst)
releaseFunc = func() {
bytesPool.Put(dst)
}
default:
return nil, func() {}, fmt.Errorf("unsupported pixel format: %s", yuvImg.SubsampleRatio)
return nil, releaseFunc, fmt.Errorf("unsupported pixel format: %s", yuvImg.SubsampleRatio)
}
return &yuvImg, func() {}, nil
return &yuvImg, releaseFunc, nil
})
}

View File

@@ -15,10 +15,13 @@ import "C"
// All functions switched at runtime must be declared also in convert_nocgo.go.
const hasCGOConvert = true
func i444ToI420(img image.YCbCr) image.YCbCr {
func i444ToI420(img image.YCbCr, dst []uint8) image.YCbCr {
h := img.Rect.Dy()
cLen := img.CStride * h / 4
cbDst, crDst := make([]uint8, cLen), make([]uint8, cLen)
// Divide preallocated memory to cbDst and crDst
// and truncate cap and len to cLen
cbDst, crDst := dst[:cLen:cLen], dst[cLen:]
crDst = crDst[:cLen:cLen]
C.i444ToI420CGO(
(*C.uchar)(&cbDst[0]), (*C.uchar)(&crDst[0]),
(*C.uchar)(&img.Cb[0]), (*C.uchar)(&img.Cr[0]),
@@ -31,10 +34,13 @@ func i444ToI420(img image.YCbCr) image.YCbCr {
return img
}
func i422ToI420(img image.YCbCr) image.YCbCr {
func i422ToI420(img image.YCbCr, dst []uint8) image.YCbCr {
h := img.Rect.Dy()
cLen := img.CStride * (h / 2)
cbDst, crDst := make([]uint8, cLen), make([]uint8, cLen)
// Divide preallocated memory to cbDst and crDst
// and truncate cap and len to cLen
cbDst, crDst := dst[:cLen:cLen], dst[cLen:]
crDst = crDst[:cLen:cLen]
C.i422ToI420CGO(
(*C.uchar)(&cbDst[0]), (*C.uchar)(&crDst[0]),
(*C.uchar)(&img.Cb[0]), (*C.uchar)(&img.Cr[0]),

View File

@@ -10,13 +10,16 @@ import (
const hasCGOConvert = false
func i444ToI420(img image.YCbCr) image.YCbCr {
func i444ToI420(img image.YCbCr, dst []uint8) image.YCbCr {
h := img.Rect.Dy()
addrSrc0 := 0
addrSrc1 := img.CStride
cLen := img.CStride * (h / 2)
addrDst := 0
cbDst, crDst := make([]uint8, cLen), make([]uint8, cLen)
// Divide preallocated memory to cbDst and crDst
// and truncate cap and len to cLen
cbDst, crDst := dst[:cLen:cLen], dst[cLen:]
crDst = crDst[:cLen:cLen]
for i := 0; i < h/2; i++ {
for j := 0; j < img.CStride/2; j++ {
@@ -40,11 +43,14 @@ func i444ToI420(img image.YCbCr) image.YCbCr {
return img
}
func i422ToI420(img image.YCbCr) image.YCbCr {
func i422ToI420(img image.YCbCr, dst []uint8) image.YCbCr {
h := img.Rect.Dy()
addrSrc := 0
cLen := img.CStride * (h / 2)
cbDst, crDst := make([]uint8, cLen), make([]uint8, cLen)
// Divide preallocated memory to cbDst and crDst
// and truncate cap and len to cLen
cbDst, crDst := dst[:cLen:cLen], dst[cLen:]
crDst = crDst[:cLen:cLen]
addrDst := 0
for i := 0; i < h/2; i++ {

View File

@@ -147,10 +147,11 @@ func TestToI420(t *testing.T) {
r := ToI420(ReaderFunc(func() (image.Image, func(), error) {
return c.src, func() {}, nil
}))
out, _, err := r.Read()
out, release, err := r.Read()
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
defer release()
if !reflect.DeepEqual(c.expected, out) {
t.Errorf("Expected output image:\n%v\ngot:\n%v", c.expected, out)
}
@@ -230,10 +231,11 @@ func BenchmarkToI420(b *testing.B) {
}))
for i := 0; i < b.N; i++ {
_, _, err := r.Read()
_, release, err := r.Read()
if err != nil {
b.Fatalf("Unexpected error: %v", err)
}
release()
}
})
}