From d3a0713c39d75c5756d6b3d7aaa9d2bb0f37c820 Mon Sep 17 00:00:00 2001 From: Atsushi Watanabe Date: Mon, 17 Feb 2020 19:14:31 +0900 Subject: [PATCH] Add CGO version of I420 convert --- pkg/io/video/convert.go | 98 ++++++++++++++++++++++-------------- pkg/io/video/convert_cgo.c | 56 +++++++++++++++++++++ pkg/io/video/convert_cgo.go | 42 ++++++++++++++++ pkg/io/video/convert_test.go | 9 ++++ 4 files changed, 166 insertions(+), 39 deletions(-) create mode 100644 pkg/io/video/convert_cgo.c create mode 100644 pkg/io/video/convert_cgo.go diff --git a/pkg/io/video/convert.go b/pkg/io/video/convert.go index 427629e..70b9f15 100644 --- a/pkg/io/video/convert.go +++ b/pkg/io/video/convert.go @@ -6,6 +6,10 @@ import ( "image/color" ) +var ( + hasCGOConvert = false +) + // imageToYCbCr converts src to *image.YCbCr and store it to dst // Note: conversion can be lossy func imageToYCbCr(dst *image.YCbCr, src image.Image) { @@ -70,8 +74,61 @@ func imageToYCbCr(dst *image.YCbCr, src image.Image) { } } +func i444ToI420(img *image.YCbCr) { + h := img.Rect.Dy() + addrSrc0 := 0 + addrSrc1 := img.CStride + addrDst := 0 + for i := 0; i < h/2; i++ { + for j := 0; j < img.CStride/2; j++ { + cb := uint16(img.Cb[addrSrc0]) + uint16(img.Cb[addrSrc1]) + + uint16(img.Cb[addrSrc0+1]) + uint16(img.Cb[addrSrc1+1]) + cr := uint16(img.Cr[addrSrc0]) + uint16(img.Cr[addrSrc1]) + + uint16(img.Cr[addrSrc0+1]) + uint16(img.Cr[addrSrc1+1]) + img.Cb[addrDst] = uint8(cb / 4) + img.Cr[addrDst] = uint8(cr / 4) + addrSrc0 += 2 + addrSrc1 += 2 + addrDst++ + } + addrSrc0 += img.CStride + addrSrc1 += img.CStride + } + img.CStride = img.CStride / 2 + cLen := img.CStride * (h / 2) + img.Cb = img.Cb[:cLen] + img.Cr = img.Cr[:cLen] +} + +func i422ToI420(img *image.YCbCr) { + h := img.Rect.Dy() + addrSrc := 0 + addrDst := 0 + for i := 0; i < h/2; i++ { + for j := 0; j < img.CStride; j++ { + cb := uint16(img.Cb[addrSrc]) + uint16(img.Cb[addrSrc+img.CStride]) + cr := uint16(img.Cr[addrSrc]) + uint16(img.Cr[addrSrc+img.CStride]) + img.Cb[addrDst] = uint8(cb / 2) + img.Cr[addrDst] = uint8(cr / 2) + addrDst++ + addrSrc++ + } + addrSrc += img.CStride + } + cLen := img.CStride * (h / 2) + img.Cb = img.Cb[:cLen] + img.Cr = img.Cr[:cLen] +} + // ToI420 converts r to a new reader that will output images in I420 format func ToI420(r Reader) Reader { + f444to420 := i444ToI420 + f422to420 := i422ToI420 + if hasCGOConvert { + f444to420 = i444ToI420CGO + f422to420 = i422ToI420CGO + } + var yuvImg image.YCbCr return ReaderFunc(func() (image.Image, error) { img, err := r.Read() @@ -80,50 +137,13 @@ func ToI420(r Reader) Reader { } imageToYCbCr(&yuvImg, img) - h := yuvImg.Rect.Dy() // Covert pixel format to I420 switch yuvImg.SubsampleRatio { case image.YCbCrSubsampleRatio444: - addrSrc0 := 0 - addrSrc1 := yuvImg.CStride - addrDst := 0 - for i := 0; i < h/2; i++ { - for j := 0; j < yuvImg.CStride/2; j++ { - cb := uint16(yuvImg.Cb[addrSrc0]) + uint16(yuvImg.Cb[addrSrc1]) + - uint16(yuvImg.Cb[addrSrc0+1]) + uint16(yuvImg.Cb[addrSrc1+1]) - cr := uint16(yuvImg.Cr[addrSrc0]) + uint16(yuvImg.Cr[addrSrc1]) + - uint16(yuvImg.Cr[addrSrc0+1]) + uint16(yuvImg.Cr[addrSrc1+1]) - yuvImg.Cb[addrDst] = uint8(cb / 4) - yuvImg.Cr[addrDst] = uint8(cr / 4) - addrSrc0 += 2 - addrSrc1 += 2 - addrDst++ - } - addrSrc0 += yuvImg.CStride - addrSrc1 += yuvImg.CStride - } - yuvImg.CStride = yuvImg.CStride / 2 - cLen := yuvImg.CStride * (h / 2) - yuvImg.Cb = yuvImg.Cb[:cLen] - yuvImg.Cr = yuvImg.Cr[:cLen] + f444to420(&yuvImg) case image.YCbCrSubsampleRatio422: - addrSrc := 0 - addrDst := 0 - for i := 0; i < h/2; i++ { - for j := 0; j < yuvImg.CStride; j++ { - cb := uint16(yuvImg.Cb[addrSrc]) + uint16(yuvImg.Cb[addrSrc+yuvImg.CStride]) - cr := uint16(yuvImg.Cr[addrSrc]) + uint16(yuvImg.Cr[addrSrc+yuvImg.CStride]) - yuvImg.Cb[addrDst] = uint8(cb / 2) - yuvImg.Cr[addrDst] = uint8(cr / 2) - addrDst++ - addrSrc++ - } - addrSrc += yuvImg.CStride - } - cLen := yuvImg.CStride * (h / 2) - yuvImg.Cb = yuvImg.Cb[:cLen] - yuvImg.Cr = yuvImg.Cr[:cLen] + f422to420(&yuvImg) case image.YCbCrSubsampleRatio420: default: return nil, fmt.Errorf("unsupported pixel format: %s", yuvImg.SubsampleRatio) diff --git a/pkg/io/video/convert_cgo.c b/pkg/io/video/convert_cgo.c new file mode 100644 index 0000000..1ab9a46 --- /dev/null +++ b/pkg/io/video/convert_cgo.c @@ -0,0 +1,56 @@ +#include + +#include "_cgo_export.h" + +void i444ToI420CGO( + unsigned char* cb, + unsigned char* cr, + const int stride, const int h) +{ + int isrc0 = 0; + int isrc1 = stride; + int idst = 0; + for (int y = 0; y < h / 2; y++) + { + for (int x = 0; x < stride / 2; x++) + { + const uint8_t cb2 = + ((uint16_t)cb[isrc0] + (uint16_t)cb[isrc1] + + (uint16_t)cb[isrc0 + 1] + (uint16_t)cb[isrc1 + 1]) / + 4; + const uint8_t cr2 = + ((uint16_t)cr[isrc0] + (uint16_t)cr[isrc1] + + (uint16_t)cr[isrc0 + 1] + (uint16_t)cr[isrc1 + 1]) / + 4; + cb[idst] = cb2; + cr[idst] = cr2; + isrc0 += 2; + isrc1 += 2; + idst++; + } + isrc0 += stride; + isrc1 += stride; + } +} + +void i422ToI420CGO( + unsigned char* cb, + unsigned char* cr, + const int stride, const int h) +{ + int isrc = 0; + int idst = 0; + for (int y = 0; y < h / 2; y++) + { + for (int x = 0; x < stride; x++) + { + const uint8_t cb2 = ((uint16_t)cb[isrc] + (uint16_t)cb[isrc + stride]) / 2; + const uint8_t cr2 = ((uint16_t)cr[isrc] + (uint16_t)cr[isrc + stride]) / 2; + cb[idst] = cb2; + cr[idst] = cr2; + isrc++; + idst++; + } + isrc += stride; + } +} diff --git a/pkg/io/video/convert_cgo.go b/pkg/io/video/convert_cgo.go new file mode 100644 index 0000000..030226a --- /dev/null +++ b/pkg/io/video/convert_cgo.go @@ -0,0 +1,42 @@ +// +build cgo + +package video + +import ( + "image" +) + +// void i444ToI420CGO( +// unsigned char* cb, unsigned char* cr, +// const int stride, const int h); +// void i422ToI420CGO( +// unsigned char* cb, unsigned char* cr, +// const int stride, const int h); +import "C" + +func init() { + hasCGOConvert = true +} + +func i444ToI420CGO(img *image.YCbCr) { + h := img.Rect.Dy() + C.i444ToI420CGO( + (*C.uchar)(&img.Cb[0]), (*C.uchar)(&img.Cr[0]), + C.int(img.CStride), C.int(h), + ) + img.CStride = img.CStride / 2 + cLen := img.CStride * (h / 2) + img.Cb = img.Cb[:cLen] + img.Cr = img.Cr[:cLen] +} + +func i422ToI420CGO(img *image.YCbCr) { + h := img.Rect.Dy() + C.i422ToI420CGO( + (*C.uchar)(&img.Cb[0]), (*C.uchar)(&img.Cr[0]), + C.int(img.CStride), C.int(h), + ) + cLen := img.CStride * (h / 2) + img.Cb = img.Cb[:cLen] + img.Cr = img.Cr[:cLen] +} diff --git a/pkg/io/video/convert_test.go b/pkg/io/video/convert_test.go index 28f782f..b1ada2e 100644 --- a/pkg/io/video/convert_test.go +++ b/pkg/io/video/convert_test.go @@ -152,3 +152,12 @@ func BenchmarkToI420(b *testing.B) { }) } } + +func BenchmarkToI420CGO(b *testing.B) { + if !hasCGOConvert { + b.SkipNow() + } + hasCGOConvert = false + b.Run("NoCGO", BenchmarkToI420) + hasCGOConvert = true +}