mirror of
https://github.com/gonum/gonum.git
synced 2025-10-23 15:13:31 +08:00
Alignment check added to add routine.
This commit is contained in:
@@ -9,15 +9,28 @@
|
|||||||
// func Add(dst, s []float64)
|
// func Add(dst, s []float64)
|
||||||
TEXT ·Add(SB), NOSPLIT, $0
|
TEXT ·Add(SB), NOSPLIT, $0
|
||||||
MOVQ dst_base+0(FP), DI
|
MOVQ dst_base+0(FP), DI
|
||||||
MOVQ dst_len+8(FP), DX
|
MOVQ dst_len+8(FP), CX
|
||||||
MOVQ s_base+24(FP), SI
|
MOVQ s_base+24(FP), SI
|
||||||
CMPQ s_len+32(FP), DX
|
CMPQ s_len+32(FP), CX
|
||||||
CMOVQLE s_len+32(FP), DX
|
CMOVQLE s_len+32(FP), CX
|
||||||
CMPQ DX, $0
|
CMPQ CX, $0
|
||||||
JE add_end
|
JE add_end
|
||||||
XORQ AX, AX
|
XORQ AX, AX
|
||||||
CMPQ DX, $4
|
MOVQ DI, BX
|
||||||
JL add_tail
|
ANDQ $15, BX
|
||||||
|
JZ add_no_trim
|
||||||
|
MOVSD (DI)(AX*8), X0
|
||||||
|
ADDSD (SI)(AX*8), X0
|
||||||
|
MOVSD X0, (DI)(AX*8)
|
||||||
|
INCQ AX
|
||||||
|
DECQ CX
|
||||||
|
JE add_end
|
||||||
|
|
||||||
|
add_no_trim:
|
||||||
|
MOVQ CX, BX
|
||||||
|
ANDQ $3, BX
|
||||||
|
SHRQ $2, CX
|
||||||
|
JZ add_tail_start
|
||||||
|
|
||||||
add_loop:
|
add_loop:
|
||||||
MOVUPS (SI)(AX*8), X0
|
MOVUPS (SI)(AX*8), X0
|
||||||
@@ -27,19 +40,19 @@ add_loop:
|
|||||||
ADDPD 16(DI)(AX*8), X1
|
ADDPD 16(DI)(AX*8), X1
|
||||||
MOVUPS X1, 16(DI)(AX*8)
|
MOVUPS X1, 16(DI)(AX*8)
|
||||||
ADDQ $4, AX
|
ADDQ $4, AX
|
||||||
SUBQ $4, DX
|
LOOPNE add_loop
|
||||||
CMPQ DX, $4
|
CMPQ BX, $0
|
||||||
JGE add_loop
|
|
||||||
CMPQ DX, $0
|
|
||||||
JE add_end
|
JE add_end
|
||||||
|
|
||||||
|
add_tail_start:
|
||||||
|
MOVQ BX, CX
|
||||||
|
|
||||||
add_tail:
|
add_tail:
|
||||||
MOVSD (DI)(AX*8), X0
|
MOVSD (DI)(AX*8), X0
|
||||||
ADDSD (SI)(AX*8), X0
|
ADDSD (SI)(AX*8), X0
|
||||||
MOVSD X0, (DI)(AX*8)
|
MOVSD X0, (DI)(AX*8)
|
||||||
INCQ AX
|
INCQ AX
|
||||||
DECQ DX
|
LOOPNE add_tail
|
||||||
JNZ add_tail
|
|
||||||
|
|
||||||
add_end:
|
add_end:
|
||||||
RET
|
RET
|
||||||
|
@@ -32,6 +32,9 @@ func TestAdd(t *testing.T) {
|
|||||||
{[]float64{0, 1, 2, 3, 4},
|
{[]float64{0, 1, 2, 3, 4},
|
||||||
[]float64{-inf, 4, nan, 8, 9},
|
[]float64{-inf, 4, nan, 8, 9},
|
||||||
[]float64{-inf, 5, nan, 11, 13}},
|
[]float64{-inf, 5, nan, 11, 13}},
|
||||||
|
{make([]float64, 50)[1:49],
|
||||||
|
make([]float64, 50)[1:49],
|
||||||
|
make([]float64, 50)[1:49]},
|
||||||
} {
|
} {
|
||||||
Add(v.dst, v.src)
|
Add(v.dst, v.src)
|
||||||
for i := range v.expect {
|
for i := range v.expect {
|
||||||
@@ -70,7 +73,7 @@ func TestCumSum(t *testing.T) {
|
|||||||
}{
|
}{
|
||||||
{[]float64{0}, []float64{1}, []float64{1}},
|
{[]float64{0}, []float64{1}, []float64{1}},
|
||||||
{[]float64{nan}, []float64{nan}, []float64{nan}},
|
{[]float64{nan}, []float64{nan}, []float64{nan}},
|
||||||
{[]float64{0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6}},
|
{[]float64{0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
|
||||||
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
|
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3}, []float64{1, 3, 6}},
|
||||||
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6, 10}},
|
{[]float64{0, 0, 0, 0}, []float64{1, 2, 3, 4}, []float64{1, 3, 6, 10}},
|
||||||
{[]float64{1, nan, nan, 1, 1},
|
{[]float64{1, nan, nan, 1, 1},
|
||||||
|
Reference in New Issue
Block a user