Alignment check added to add routine.

This commit is contained in:
Chad Kunde
2016-05-23 02:18:33 -07:00
parent d5bb447188
commit d7bd77f23f
2 changed files with 33 additions and 17 deletions

View File

@@ -9,15 +9,28 @@
// func Add(dst, s []float64)
TEXT ·Add(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI
MOVQ dst_len+8(FP), DX
MOVQ dst_len+8(FP), CX
MOVQ s_base+24(FP), SI
CMPQ s_len+32(FP), DX
CMOVQLE s_len+32(FP), DX
CMPQ DX, $0
CMPQ s_len+32(FP), CX
CMOVQLE s_len+32(FP), CX
CMPQ CX, $0
JE add_end
XORQ AX, AX
CMPQ DX, $4
JL add_tail
MOVQ DI, BX
ANDQ $15, BX
JZ add_no_trim
MOVSD (DI)(AX*8), X0
ADDSD (SI)(AX*8), X0
MOVSD X0, (DI)(AX*8)
INCQ AX
DECQ CX
JE add_end
add_no_trim:
MOVQ CX, BX
ANDQ $3, BX
SHRQ $2, CX
JZ add_tail_start
add_loop:
MOVUPS (SI)(AX*8), X0
@@ -27,19 +40,19 @@ add_loop:
ADDPD 16(DI)(AX*8), X1
MOVUPS X1, 16(DI)(AX*8)
ADDQ $4, AX
SUBQ $4, DX
CMPQ DX, $4
JGE add_loop
CMPQ DX, $0
LOOPNE add_loop
CMPQ BX, $0
JE add_end
add_tail_start:
MOVQ BX, CX
add_tail:
MOVSD (DI)(AX*8), X0
ADDSD (SI)(AX*8), X0
MOVSD X0, (DI)(AX*8)
INCQ AX
DECQ DX
JNZ add_tail
LOOPNE add_tail
add_end:
RET

View File

@@ -32,6 +32,9 @@ func TestAdd(t *testing.T) {
{[]float64{0, 1, 2, 3, 4},
[]float64{-inf, 4, nan, 8, 9},
[]float64{-inf, 5, nan, 11, 13}},
{make([]float64, 50)[1:49],
make([]float64, 50)[1:49],
make([]float64, 50)[1:49]},
} {
Add(v.dst, v.src)
for i := range v.expect {
@@ -70,7 +73,7 @@ func TestCumSum(t *testing.T) {
}{
{[]float64{0}, []float64{1}, []float64{1}},
{[]float64{nan}, []float64{nan}, []float64{nan}},
{[]float64{0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6}},
{[]float64{0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6, 10}},
{[]float64{1, nan, nan, 1, 1},