Perf improvements on stackblur algorithm

This commit is contained in:
esimov
2025-05-02 11:13:15 +03:00
parent aeab3fbb82
commit 8d0ec6281a
5 changed files with 155 additions and 83 deletions

View File

@@ -1,16 +1,18 @@
// Go implementation of StackBlur algorithm described here:
// Go implementation of the StackBlur algorithm
// http://incubator.quasimondo.com/processing/fast_blur_deluxe.php
package caire
import (
"errors"
"image"
"image/color"
)
// blurstack is a linked list containing the color value and a pointer to the next struct.
type blurstack struct {
// blurStack is a linked list containing the color value and a pointer to the next struct.
type blurStack struct {
r, g, b, a uint32
next *blurstack
next *blurStack
}
var mulTable = []uint32{
@@ -51,11 +53,86 @@ var shgTable = []uint32{
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
}
// StackBlur applies a blur filter to the provided image.
// The radius defines the bluring average.
func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
var stackEnd, stackIn, stackOut *blurstack
var width, height = uint32(img.Bounds().Dx()), uint32(img.Bounds().Dy())
// Stackblur takes the source image and returns it's blurred version by applying the blur radius defined as parameter. The destination image must be a image.NRGBA.
func Stackblur(dst, src image.Image, radius uint32) error {
// Limit the maximum blur radius to 255 to avoid overflowing the multable.
if int(radius) >= len(mulTable) {
radius = uint32(len(mulTable) - 1)
}
if radius < 1 {
return errors.New("blur radius must be greater than 0")
}
img, ok := dst.(*image.NRGBA)
if !ok {
return errors.New("the destination image must be image.NRGBA")
}
process(img, src, radius)
return nil
}
func process(dst *image.NRGBA, src image.Image, radius uint32) {
srcBounds := src.Bounds()
srcMinX := srcBounds.Min.X
srcMinY := srcBounds.Min.Y
dstBounds := srcBounds.Sub(srcBounds.Min)
dstW := dstBounds.Dx()
dstH := dstBounds.Dy()
switch src0 := src.(type) {
case *image.NRGBA:
rowSize := srcBounds.Dx() * 4
for dstY := 0; dstY < dstH; dstY++ {
di := src0.PixOffset(0, dstY)
si := src0.PixOffset(srcMinX, srcMinY+dstY)
for dstX := 0; dstX < dstW; dstX++ {
copy(dst.Pix[di:di+rowSize], src0.Pix[si:si+rowSize])
}
}
case *image.YCbCr:
for dstY := 0; dstY < dstH; dstY++ {
di := dst.PixOffset(0, dstY)
for dstX := 0; dstX < dstW; dstX++ {
srcX := srcMinX + dstX
srcY := srcMinY + dstY
siy := src0.YOffset(srcX, srcY)
sic := src0.COffset(srcX, srcY)
r, g, b := color.YCbCrToRGB(src0.Y[siy], src0.Cb[sic], src0.Cr[sic])
dst.Pix[di+0] = r
dst.Pix[di+1] = g
dst.Pix[di+2] = b
dst.Pix[di+3] = 0xff
di += 4
}
}
default:
for dstY := 0; dstY < dstH; dstY++ {
di := dst.PixOffset(0, dstY)
for dstX := 0; dstX < dstW; dstX++ {
c := color.NRGBAModel.Convert(src.At(srcMinX+dstX, srcMinY+dstY)).(color.NRGBA)
dst.Pix[di+0] = c.R
dst.Pix[di+1] = c.G
dst.Pix[di+2] = c.B
dst.Pix[di+3] = c.A
di += 4
}
}
}
blurImage(dst, radius)
}
func blurImage(src *image.NRGBA, radius uint32) {
var (
stackEnd *blurStack
stackIn *blurStack
stackOut *blurStack
)
var width, height = uint32(src.Bounds().Dx()), uint32(src.Bounds().Dy())
var (
div, widthMinus1, heightMinus1, radiusPlus1, sumFactor uint32
x, y, i, p, yp, yi, yw,
@@ -65,26 +142,17 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
pr, pg, pb, pa uint32
)
// Limit the maximum blur radius to 255, otherwise it overflows the multable length
// and will panic with and index out of range error.
if int(radius) >= len(mulTable) {
radius = uint32(len(mulTable) - 1)
}
if radius < 1 {
radius = 1
}
div = radius + radius + 1
widthMinus1 = width - 1
heightMinus1 = height - 1
radiusPlus1 = radius + 1
sumFactor = radiusPlus1 * (radiusPlus1 + 1) / 2
stackStart := new(blurstack)
stackStart := new(blurStack)
stack := stackStart
for i = 1; i < div; i++ {
stack.next = new(blurstack)
stack.next = new(blurStack)
stack = stack.next
if i == radiusPlus1 {
stackEnd = stack
@@ -98,10 +166,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
for y = 0; y < height; y++ {
rInSum, gInSum, bInSum, aInSum, rSum, gSum, bSum, aSum = 0, 0, 0, 0, 0, 0, 0, 0
pr = uint32(img.Pix[yi])
pg = uint32(img.Pix[yi+1])
pb = uint32(img.Pix[yi+2])
pa = uint32(img.Pix[yi+3])
pr = uint32(src.Pix[yi])
pg = uint32(src.Pix[yi+1])
pb = uint32(src.Pix[yi+2])
pa = uint32(src.Pix[yi+3])
rOutSum = radiusPlus1 * pr
gOutSum = radiusPlus1 * pg
@@ -131,10 +199,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
diff = i
}
p = yi + (diff << 2)
pr = uint32(img.Pix[p])
pg = uint32(img.Pix[p+1])
pb = uint32(img.Pix[p+2])
pa = uint32(img.Pix[p+3])
pr = uint32(src.Pix[p])
pg = uint32(src.Pix[p+1])
pb = uint32(src.Pix[p+2])
pa = uint32(src.Pix[p+3])
stack.r = pr
stack.g = pg
@@ -158,16 +226,16 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
for x = 0; x < width; x++ {
pa = (aSum * mulSum) >> shgSum
img.Pix[yi+3] = uint8(pa)
src.Pix[yi+3] = uint8(pa)
if pa != 0 {
img.Pix[yi] = uint8((rSum * mulSum) >> shgSum)
img.Pix[yi+1] = uint8((gSum * mulSum) >> shgSum)
img.Pix[yi+2] = uint8((bSum * mulSum) >> shgSum)
src.Pix[yi] = uint8((rSum * mulSum) >> shgSum)
src.Pix[yi+1] = uint8((gSum * mulSum) >> shgSum)
src.Pix[yi+2] = uint8((bSum * mulSum) >> shgSum)
} else {
img.Pix[yi] = 0
img.Pix[yi+1] = 0
img.Pix[yi+2] = 0
src.Pix[yi] = 0
src.Pix[yi+1] = 0
src.Pix[yi+2] = 0
}
rSum -= rOutSum
@@ -187,10 +255,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
}
p = (yw + p) << 2
stackIn.r = uint32(img.Pix[p])
stackIn.g = uint32(img.Pix[p+1])
stackIn.b = uint32(img.Pix[p+2])
stackIn.a = uint32(img.Pix[p+3])
stackIn.r = uint32(src.Pix[p])
stackIn.g = uint32(src.Pix[p+1])
stackIn.b = uint32(src.Pix[p+2])
stackIn.a = uint32(src.Pix[p+3])
rInSum += stackIn.r
gInSum += stackIn.g
@@ -230,10 +298,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
rInSum, gInSum, bInSum, aInSum, rSum, gSum, bSum, aSum = 0, 0, 0, 0, 0, 0, 0, 0
yi = x << 2
pr = uint32(img.Pix[yi])
pg = uint32(img.Pix[yi+1])
pb = uint32(img.Pix[yi+2])
pa = uint32(img.Pix[yi+3])
pr = uint32(src.Pix[yi])
pg = uint32(src.Pix[yi+1])
pb = uint32(src.Pix[yi+2])
pa = uint32(src.Pix[yi+3])
rOutSum = radiusPlus1 * pr
gOutSum = radiusPlus1 * pg
@@ -259,10 +327,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
for i = 1; i <= radius; i++ {
yi = (yp + x) << 2
pr = uint32(img.Pix[yi])
pg = uint32(img.Pix[yi+1])
pb = uint32(img.Pix[yi+2])
pa = uint32(img.Pix[yi+3])
pr = uint32(src.Pix[yi])
pg = uint32(src.Pix[yi+1])
pb = uint32(src.Pix[yi+2])
pa = uint32(src.Pix[yi+3])
stack.r = pr
stack.g = pg
@@ -293,16 +361,16 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
for y = 0; y < height; y++ {
p = yi << 2
pa = (aSum * mulSum) >> shgSum
img.Pix[p+3] = uint8(pa)
src.Pix[p+3] = uint8(pa)
if pa > 0 {
img.Pix[p] = uint8((rSum * mulSum) >> shgSum)
img.Pix[p+1] = uint8((gSum * mulSum) >> shgSum)
img.Pix[p+2] = uint8((bSum * mulSum) >> shgSum)
src.Pix[p] = uint8((rSum * mulSum) >> shgSum)
src.Pix[p+1] = uint8((gSum * mulSum) >> shgSum)
src.Pix[p+2] = uint8((bSum * mulSum) >> shgSum)
} else {
img.Pix[p] = 0
img.Pix[p+1] = 0
img.Pix[p+2] = 0
src.Pix[p] = 0
src.Pix[p+1] = 0
src.Pix[p+2] = 0
}
rSum -= rOutSum
@@ -322,10 +390,10 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
}
p = (x + (p * width)) << 2
stackIn.r = uint32(img.Pix[p])
stackIn.g = uint32(img.Pix[p+1])
stackIn.b = uint32(img.Pix[p+2])
stackIn.a = uint32(img.Pix[p+3])
stackIn.r = uint32(src.Pix[p])
stackIn.g = uint32(src.Pix[p+1])
stackIn.b = uint32(src.Pix[p+2])
stackIn.a = uint32(src.Pix[p+3])
rInSum += stackIn.r
gInSum += stackIn.g
@@ -359,5 +427,4 @@ func (c *Carver) StackBlur(img *image.NRGBA, radius uint32) *image.NRGBA {
yi += width
}
}
return img
}