mirror of
https://github.com/gonum/gonum.git
synced 2025-10-15 11:40:45 +08:00

This is a synonym for noasm since gccgo (and gollvm) do not use plan9 assembly, so we just prevent those compilers from looking at asm files.
54 lines
1.5 KiB
ArmAsm
54 lines
1.5 KiB
ArmAsm
// Copyright ©2016 The Gonum Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build !noasm,!gccgo,!safe
|
|
|
|
#include "textflag.h"
|
|
|
|
// func Addconst(alpha float64, x []float64)
|
|
TEXT ·AddConst(SB), NOSPLIT, $0
|
|
MOVQ x_base+8(FP), SI // SI = &x
|
|
MOVQ x_len+16(FP), CX // CX = len(x)
|
|
CMPQ CX, $0 // if len(x) == 0 { return }
|
|
JE ac_end
|
|
MOVSD alpha+0(FP), X4 // X4 = { a, a }
|
|
SHUFPD $0, X4, X4
|
|
MOVUPS X4, X5 // X5 = X4
|
|
XORQ AX, AX // i = 0
|
|
MOVQ CX, BX
|
|
ANDQ $7, BX // BX = len(x) % 8
|
|
SHRQ $3, CX // CX = floor( len(x) / 8 )
|
|
JZ ac_tail_start // if CX == 0 { goto ac_tail_start }
|
|
|
|
ac_loop: // Loop unrolled 8x do {
|
|
MOVUPS (SI)(AX*8), X0 // X_i = s[i:i+1]
|
|
MOVUPS 16(SI)(AX*8), X1
|
|
MOVUPS 32(SI)(AX*8), X2
|
|
MOVUPS 48(SI)(AX*8), X3
|
|
ADDPD X4, X0 // X_i += a
|
|
ADDPD X5, X1
|
|
ADDPD X4, X2
|
|
ADDPD X5, X3
|
|
MOVUPS X0, (SI)(AX*8) // s[i:i+1] = X_i
|
|
MOVUPS X1, 16(SI)(AX*8)
|
|
MOVUPS X2, 32(SI)(AX*8)
|
|
MOVUPS X3, 48(SI)(AX*8)
|
|
ADDQ $8, AX // i += 8
|
|
LOOP ac_loop // } while --CX > 0
|
|
CMPQ BX, $0 // if BX == 0 { return }
|
|
JE ac_end
|
|
|
|
ac_tail_start: // Reset loop counters
|
|
MOVQ BX, CX // Loop counter: CX = BX
|
|
|
|
ac_tail: // do {
|
|
MOVSD (SI)(AX*8), X0 // X0 = s[i]
|
|
ADDSD X4, X0 // X0 += a
|
|
MOVSD X0, (SI)(AX*8) // s[i] = X0
|
|
INCQ AX // ++i
|
|
LOOP ac_tail // } while --CX > 0
|
|
|
|
ac_end:
|
|
RET
|