mirror of
https://github.com/gonum/gonum.git
synced 2025-10-22 06:39:26 +08:00
Alignment check added to add routine.
This commit is contained in:
@@ -9,15 +9,28 @@
|
||||
// func Add(dst, s []float64)
|
||||
TEXT ·Add(SB), NOSPLIT, $0
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ dst_len+8(FP), DX
|
||||
MOVQ dst_len+8(FP), CX
|
||||
MOVQ s_base+24(FP), SI
|
||||
CMPQ s_len+32(FP), DX
|
||||
CMOVQLE s_len+32(FP), DX
|
||||
CMPQ DX, $0
|
||||
CMPQ s_len+32(FP), CX
|
||||
CMOVQLE s_len+32(FP), CX
|
||||
CMPQ CX, $0
|
||||
JE add_end
|
||||
XORQ AX, AX
|
||||
CMPQ DX, $4
|
||||
JL add_tail
|
||||
MOVQ DI, BX
|
||||
ANDQ $15, BX
|
||||
JZ add_no_trim
|
||||
MOVSD (DI)(AX*8), X0
|
||||
ADDSD (SI)(AX*8), X0
|
||||
MOVSD X0, (DI)(AX*8)
|
||||
INCQ AX
|
||||
DECQ CX
|
||||
JE add_end
|
||||
|
||||
add_no_trim:
|
||||
MOVQ CX, BX
|
||||
ANDQ $3, BX
|
||||
SHRQ $2, CX
|
||||
JZ add_tail_start
|
||||
|
||||
add_loop:
|
||||
MOVUPS (SI)(AX*8), X0
|
||||
@@ -27,19 +40,19 @@ add_loop:
|
||||
ADDPD 16(DI)(AX*8), X1
|
||||
MOVUPS X1, 16(DI)(AX*8)
|
||||
ADDQ $4, AX
|
||||
SUBQ $4, DX
|
||||
CMPQ DX, $4
|
||||
JGE add_loop
|
||||
CMPQ DX, $0
|
||||
LOOPNE add_loop
|
||||
CMPQ BX, $0
|
||||
JE add_end
|
||||
|
||||
add_tail_start:
|
||||
MOVQ BX, CX
|
||||
|
||||
add_tail:
|
||||
MOVSD (DI)(AX*8), X0
|
||||
ADDSD (SI)(AX*8), X0
|
||||
MOVSD X0, (DI)(AX*8)
|
||||
INCQ AX
|
||||
DECQ DX
|
||||
JNZ add_tail
|
||||
MOVSD (DI)(AX*8), X0
|
||||
ADDSD (SI)(AX*8), X0
|
||||
MOVSD X0, (DI)(AX*8)
|
||||
INCQ AX
|
||||
LOOPNE add_tail
|
||||
|
||||
add_end:
|
||||
RET
|
||||
|
@@ -32,6 +32,9 @@ func TestAdd(t *testing.T) {
|
||||
{[]float64{0, 1, 2, 3, 4},
|
||||
[]float64{-inf, 4, nan, 8, 9},
|
||||
[]float64{-inf, 5, nan, 11, 13}},
|
||||
{make([]float64, 50)[1:49],
|
||||
make([]float64, 50)[1:49],
|
||||
make([]float64, 50)[1:49]},
|
||||
} {
|
||||
Add(v.dst, v.src)
|
||||
for i := range v.expect {
|
||||
@@ -70,7 +73,7 @@ func TestCumSum(t *testing.T) {
|
||||
}{
|
||||
{[]float64{0}, []float64{1}, []float64{1}},
|
||||
{[]float64{nan}, []float64{nan}, []float64{nan}},
|
||||
{[]float64{0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6}},
|
||||
{[]float64{0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
|
||||
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
|
||||
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6, 10}},
|
||||
{[]float64{1, nan, nan, 1, 1},
|
||||
|
Reference in New Issue
Block a user