ebiten: reduce boundary checks

Updates #3275
This commit is contained in:
Hajime Hoshi
2025-07-19 21:43:17 +09:00
parent 38b2e2c4ba
commit 86fd3e8d66
2 changed files with 41 additions and 36 deletions

View File

@@ -655,30 +655,34 @@ func (i *Image) DrawTriangles32(vertices []Vertex, indices []uint32, img *Image,
if options.ColorScaleMode == ColorScaleModeStraightAlpha { if options.ColorScaleMode == ColorScaleModeStraightAlpha {
// Avoid using `for i, v := range vertices` as adding `v` creates a copy from `vertices` unnecessarily on each loop (#3103). // Avoid using `for i, v := range vertices` as adding `v` creates a copy from `vertices` unnecessarily on each loop (#3103).
for i := range vertices { for i := range vertices {
// Create a temporary slice to reduce boundary checks.
vs := vs[i*graphics.VertexFloatCount : i*graphics.VertexFloatCount+8]
dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY) dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY)
vs[i*graphics.VertexFloatCount] = dx vs[0] = dx
vs[i*graphics.VertexFloatCount+1] = dy vs[1] = dy
sx, sy := img.adjustPositionF32(vertices[i].SrcX, vertices[i].SrcY) sx, sy := img.adjustPositionF32(vertices[i].SrcX, vertices[i].SrcY)
vs[i*graphics.VertexFloatCount+2] = sx vs[2] = sx
vs[i*graphics.VertexFloatCount+3] = sy vs[3] = sy
vs[i*graphics.VertexFloatCount+4] = vertices[i].ColorR * vertices[i].ColorA * cr vs[4] = vertices[i].ColorR * vertices[i].ColorA * cr
vs[i*graphics.VertexFloatCount+5] = vertices[i].ColorG * vertices[i].ColorA * cg vs[5] = vertices[i].ColorG * vertices[i].ColorA * cg
vs[i*graphics.VertexFloatCount+6] = vertices[i].ColorB * vertices[i].ColorA * cb vs[6] = vertices[i].ColorB * vertices[i].ColorA * cb
vs[i*graphics.VertexFloatCount+7] = vertices[i].ColorA * ca vs[7] = vertices[i].ColorA * ca
} }
} else { } else {
// See comment above (#3103). // See comment above (#3103).
for i := range vertices { for i := range vertices {
// Create a temporary slice to reduce boundary checks.
vs := vs[i*graphics.VertexFloatCount : i*graphics.VertexFloatCount+8]
dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY) dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY)
vs[i*graphics.VertexFloatCount] = dx vs[0] = dx
vs[i*graphics.VertexFloatCount+1] = dy vs[1] = dy
sx, sy := img.adjustPositionF32(vertices[i].SrcX, vertices[i].SrcY) sx, sy := img.adjustPositionF32(vertices[i].SrcX, vertices[i].SrcY)
vs[i*graphics.VertexFloatCount+2] = sx vs[2] = sx
vs[i*graphics.VertexFloatCount+3] = sy vs[3] = sy
vs[i*graphics.VertexFloatCount+4] = vertices[i].ColorR * cr vs[4] = vertices[i].ColorR * cr
vs[i*graphics.VertexFloatCount+5] = vertices[i].ColorG * cg vs[5] = vertices[i].ColorG * cg
vs[i*graphics.VertexFloatCount+6] = vertices[i].ColorB * cb vs[6] = vertices[i].ColorB * cb
vs[i*graphics.VertexFloatCount+7] = vertices[i].ColorA * ca vs[7] = vertices[i].ColorA * ca
} }
} }
@@ -879,23 +883,25 @@ func (i *Image) DrawTrianglesShader32(vertices []Vertex, indices []uint32, shade
src := options.Images[0] src := options.Images[0]
// Avoid using `for i, v := range vertices` as adding `v` creates a copy from `vertices` unnecessarily on each loop (#3103). // Avoid using `for i, v := range vertices` as adding `v` creates a copy from `vertices` unnecessarily on each loop (#3103).
for i := range vertices { for i := range vertices {
// Create a temporary slice to reduce boundary checks.
vs := vs[i*graphics.VertexFloatCount : i*graphics.VertexFloatCount+12]
dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY) dx, dy := dst.adjustPositionF32(vertices[i].DstX, vertices[i].DstY)
vs[i*graphics.VertexFloatCount] = dx vs[0] = dx
vs[i*graphics.VertexFloatCount+1] = dy vs[1] = dy
sx, sy := vertices[i].SrcX, vertices[i].SrcY sx, sy := vertices[i].SrcX, vertices[i].SrcY
if src != nil { if src != nil {
sx, sy = src.adjustPositionF32(sx, sy) sx, sy = src.adjustPositionF32(sx, sy)
} }
vs[i*graphics.VertexFloatCount+2] = sx vs[2] = sx
vs[i*graphics.VertexFloatCount+3] = sy vs[3] = sy
vs[i*graphics.VertexFloatCount+4] = vertices[i].ColorR vs[4] = vertices[i].ColorR
vs[i*graphics.VertexFloatCount+5] = vertices[i].ColorG vs[5] = vertices[i].ColorG
vs[i*graphics.VertexFloatCount+6] = vertices[i].ColorB vs[6] = vertices[i].ColorB
vs[i*graphics.VertexFloatCount+7] = vertices[i].ColorA vs[7] = vertices[i].ColorA
vs[i*graphics.VertexFloatCount+8] = vertices[i].Custom0 vs[8] = vertices[i].Custom0
vs[i*graphics.VertexFloatCount+9] = vertices[i].Custom1 vs[9] = vertices[i].Custom1
vs[i*graphics.VertexFloatCount+10] = vertices[i].Custom2 vs[10] = vertices[i].Custom2
vs[i*graphics.VertexFloatCount+11] = vertices[i].Custom3 vs[11] = vertices[i].Custom3
} }
var imgs [graphics.ShaderSrcImageCount]*ui.Image var imgs [graphics.ShaderSrcImageCount]*ui.Image

View File

@@ -42,11 +42,13 @@ func imageToBytes(img image.Image) []byte {
palette := make([]uint8, len(img.Palette)*4) palette := make([]uint8, len(img.Palette)*4)
for i, c := range img.Palette { for i, c := range img.Palette {
// Create a temporary slice to reduce boundary checks.
pl := palette[4*i : 4*i+4]
rgba := color.RGBAModel.Convert(c).(color.RGBA) rgba := color.RGBAModel.Convert(c).(color.RGBA)
palette[4*i] = rgba.R pl[0] = rgba.R
palette[4*i+1] = rgba.G pl[1] = rgba.G
palette[4*i+2] = rgba.B pl[2] = rgba.B
palette[4*i+3] = rgba.A pl[3] = rgba.A
} }
// Even img is a subimage of another image, Pix starts with 0-th index. // Even img is a subimage of another image, Pix starts with 0-th index.
idx0 := 0 idx0 := 0
@@ -55,10 +57,7 @@ func imageToBytes(img image.Image) []byte {
for j := 0; j < y1-y0; j++ { for j := 0; j < y1-y0; j++ {
for i := 0; i < x1-x0; i++ { for i := 0; i < x1-x0; i++ {
p := int(img.Pix[idx0]) p := int(img.Pix[idx0])
bs[idx1] = palette[4*p] copy(bs[idx1:idx1+4], palette[4*p:4*p+4])
bs[idx1+1] = palette[4*p+1]
bs[idx1+2] = palette[4*p+2]
bs[idx1+3] = palette[4*p+3]
idx0++ idx0++
idx1 += 4 idx1 += 4
} }