mirror of
https://github.com/gonum/gonum.git
synced 2025-09-26 19:21:17 +08:00
translate netlib l2norm algorithm to asm and remove branches
Netlib algorithm reduces overflow while calculating the l2norm of a vector. Translated to asm while reducing branches in NaN and Inf checks. Overflow protection is equivalent to the Netlib standard implementation.
This commit is contained in:
@@ -36,7 +36,6 @@ func areSlicesSame(t *testing.T, truth, comp []float64, str string) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if !ok {
|
||||
t.Errorf(str+". Expected %v, returned %v", truth, comp)
|
||||
@@ -96,7 +95,6 @@ func TestAddTo(t *testing.T) {
|
||||
if !Panics(func() { AddTo(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
|
||||
t.Errorf("Did not panic with length mismatch")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestAddConst(t *testing.T) {
|
||||
@@ -208,7 +206,6 @@ func TestCumProd(t *testing.T) {
|
||||
truth = []float64{}
|
||||
CumProd(emptyReceiver, emptyReceiver)
|
||||
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumprod returned with empty receiver")
|
||||
|
||||
}
|
||||
|
||||
func TestCumSum(t *testing.T) {
|
||||
@@ -231,7 +228,6 @@ func TestCumSum(t *testing.T) {
|
||||
truth = []float64{}
|
||||
CumSum(emptyReceiver, emptyReceiver)
|
||||
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumsum returned with empty receiver")
|
||||
|
||||
}
|
||||
|
||||
func TestDistance(t *testing.T) {
|
||||
@@ -270,7 +266,6 @@ func TestDistance(t *testing.T) {
|
||||
if !Panics(func() { Distance([]float64{}, norms, 1) }) {
|
||||
t.Errorf("Did not panic with unequal lengths")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDiv(t *testing.T) {
|
||||
@@ -398,7 +393,7 @@ func TestEqualFunc(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEqualsRelative(t *testing.T) {
|
||||
var equalityTests = []struct {
|
||||
equalityTests := []struct {
|
||||
a, b float64
|
||||
tol float64
|
||||
equal bool
|
||||
@@ -518,7 +513,6 @@ func TestEqualsULP(t *testing.T) {
|
||||
if EqualWithinULP(1, math.NaN(), 10) {
|
||||
t.Errorf("NaN returned as equal")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestEqualLengths(t *testing.T) {
|
||||
@@ -699,7 +693,6 @@ func TestLogSumExp(t *testing.T) {
|
||||
if math.Abs(val-truth) > EqTolerance {
|
||||
t.Errorf("Wrong logsumexp for values with negative infinity")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestMaxAndIdx(t *testing.T) {
|
||||
@@ -1572,7 +1565,6 @@ func TestWithin(t *testing.T) {
|
||||
t.Errorf("Case %v: Idx mismatch. Want: %v, got: %v", i, test.idx, idx)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func randomSlice(l int) []float64 {
|
||||
|
@@ -6,33 +6,6 @@ package f64
|
||||
|
||||
import "math"
|
||||
|
||||
// L2NormUnitary is the level 2 norm of x.
|
||||
func L2NormUnitary(x []float64) (sum float64) {
|
||||
var scale float64
|
||||
sumSquares := 1.0
|
||||
for _, v := range x {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
absxi := math.Abs(v)
|
||||
if math.IsNaN(absxi) {
|
||||
return math.NaN()
|
||||
}
|
||||
if scale < absxi {
|
||||
s := scale / absxi
|
||||
sumSquares = 1 + sumSquares*s*s
|
||||
scale = absxi
|
||||
} else {
|
||||
s := absxi / scale
|
||||
sumSquares += s * s
|
||||
}
|
||||
}
|
||||
if math.IsInf(scale, 1) {
|
||||
return math.Inf(1)
|
||||
}
|
||||
return scale * math.Sqrt(sumSquares)
|
||||
}
|
||||
|
||||
// L2NormInc is the level 2 norm of x.
|
||||
func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
|
||||
var scale float64
|
||||
|
109
internal/asm/f64/l2norm_amd64.s
Normal file
109
internal/asm/f64/l2norm_amd64.s
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright ©2019 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !noasm,!appengine,!safe
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define SUMSQ X0
|
||||
#define ABSX X1
|
||||
#define SCALE X2
|
||||
#define ZERO X3
|
||||
#define TMP X4
|
||||
#define ABSMASK X5
|
||||
#define INF X7
|
||||
#define INFMASK X11
|
||||
#define NANMASK X12
|
||||
#define IDX AX
|
||||
#define LEN SI
|
||||
#define X_ DI
|
||||
|
||||
#define ABSMASK_DATA l2nrodata<>+0(SB)
|
||||
#define INF_DATA l2nrodata<>+8(SB)
|
||||
#define NAN_DATA l2nrodata<>+16(SB)
|
||||
// AbsMask
|
||||
DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF
|
||||
// Inf
|
||||
DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000
|
||||
// NaN
|
||||
DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000
|
||||
GLOBL l2nrodata<>+0(SB), RODATA, $24
|
||||
|
||||
// L2NormUnitary returns the L2-norm of x.
|
||||
// func L2NormUnitary(x []float64) (sum float64)
|
||||
TEXT ·L2NormUnitary(SB), NOSPLIT, $0
|
||||
MOVQ x_len+8(FP), LEN
|
||||
MOVQ x_base+0(FP), X_
|
||||
PXOR ZERO, ZERO
|
||||
CMPQ LEN, $0 // if len== 0 {return 0}
|
||||
JZ retZero
|
||||
|
||||
PXOR INFMASK, INFMASK
|
||||
PXOR NANMASK, NANMASK
|
||||
MOVSD $1.0, SUMSQ // ssq = 1
|
||||
XORPS SCALE, SCALE
|
||||
MOVSD l2nrodata<>+0(SB), ABSMASK
|
||||
MOVSD l2nrodata<>+8(SB), INF
|
||||
XORQ IDX, IDX // idx == 0
|
||||
|
||||
initZero: // for ;x[i]==0; i++ {}
|
||||
// Skip all leading zeros, to avoid divide by zero NaN
|
||||
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
|
||||
UCOMISD ABSX, ZERO
|
||||
JP retNaN // if isNaN(x[i]) { return NaN }
|
||||
JNE loop // if x[i] != 0 { goto loop }
|
||||
INCQ IDX
|
||||
CMPQ IDX, LEN
|
||||
JE retZero // if ++i == len(x) { return 0 }
|
||||
JMP initZero
|
||||
|
||||
loop:
|
||||
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
|
||||
MOVUPS ABSX, TMP
|
||||
CMPSD ABSX, TMP, $3
|
||||
ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi)
|
||||
MOVSD INF, TMP
|
||||
ANDPD ABSMASK, ABSX // absxi == Abs(absxi)
|
||||
CMPSD ABSX, TMP, $0
|
||||
ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi)
|
||||
UCOMISD SCALE, ABSX
|
||||
JA adjScale // IF SCALE > ABSXI { goto adjScale }
|
||||
|
||||
DIVSD SCALE, ABSX // absxi = scale / absxi
|
||||
MULSD ABSX, ABSX // absxi *= absxi
|
||||
ADDSD ABSX, SUMSQ // sumsq += absxi
|
||||
INCQ IDX // ++i
|
||||
CMPQ IDX, LEN
|
||||
JNE loop // if i < len(x) { continue }
|
||||
JMP retSum // if i == len(x) { goto retSum }
|
||||
|
||||
adjScale: // Scale > Absxi
|
||||
DIVSD ABSX, SCALE // tmp = absxi / scale
|
||||
MULSD SCALE, SUMSQ // sumsq *= tmp
|
||||
MULSD SCALE, SUMSQ // sumsq *= tmp
|
||||
ADDSD $1.0, SUMSQ // sumsq += 1
|
||||
MOVUPS ABSX, SCALE // scale = absxi
|
||||
INCQ IDX // ++i
|
||||
CMPQ IDX, LEN
|
||||
JNE loop // if i < len(x) { continue }
|
||||
|
||||
retSum: // Calculate return value
|
||||
SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq)
|
||||
MULSD SCALE, SUMSQ // sumsq += scale
|
||||
MOVQ SUMSQ, R10 // tmp = sumsq
|
||||
UCOMISD ZERO, INFMASK
|
||||
CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF }
|
||||
UCOMISD ZERO, NANMASK
|
||||
CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN }
|
||||
MOVQ R10, sum+24(FP) // return tmp
|
||||
RET
|
||||
|
||||
retZero:
|
||||
MOVSD ZERO, sum+24(FP) // return 0
|
||||
RET
|
||||
|
||||
retNaN:
|
||||
MOVSD NAN_DATA, TMP // return NaN
|
||||
MOVSD TMP, sum+24(FP)
|
||||
RET
|
36
internal/asm/f64/l2norm_noasm.go
Normal file
36
internal/asm/f64/l2norm_noasm.go
Normal file
@@ -0,0 +1,36 @@
|
||||
// Copyright ©2019 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64 noasm appengine safe
|
||||
|
||||
package f64
|
||||
|
||||
import "math"
|
||||
|
||||
// L2NormUnitary returns the L2-norm of x.
|
||||
func L2NormUnitary(x []float64) (sum float64) {
|
||||
var scale float64
|
||||
sumSquares := 1.0
|
||||
for _, v := range x {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
absxi := math.Abs(v)
|
||||
if math.IsNaN(absxi) {
|
||||
return math.NaN()
|
||||
}
|
||||
if scale < absxi {
|
||||
s := scale / absxi
|
||||
sumSquares = 1 + sumSquares*s*s
|
||||
scale = absxi
|
||||
} else {
|
||||
s := absxi / scale
|
||||
sumSquares += s * s
|
||||
}
|
||||
}
|
||||
if math.IsInf(scale, 1) {
|
||||
return math.Inf(1)
|
||||
}
|
||||
return scale * math.Sqrt(sumSquares)
|
||||
}
|
@@ -4,7 +4,20 @@
|
||||
|
||||
package f64
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// nanwith copied from floats package
|
||||
func nanwith(payload uint64) float64 {
|
||||
const (
|
||||
nanBits = 0x7ff8000000000000
|
||||
nanMask = 0xfff8000000000000
|
||||
)
|
||||
return math.Float64frombits(nanBits | (payload &^ nanMask))
|
||||
}
|
||||
|
||||
func TestL2NormUnitary(t *testing.T) {
|
||||
var src_gd float64 = 1
|
||||
@@ -17,6 +30,7 @@ func TestL2NormUnitary(t *testing.T) {
|
||||
{want: 3.7416573867739413, x: []float64{1, 2, 3}},
|
||||
{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
|
||||
{want: nan, x: []float64{nan}},
|
||||
{want: nan, x: []float64{1, inf, 3, nanwith(25), 5}},
|
||||
{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
|
||||
{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
|
||||
} {
|
||||
@@ -87,3 +101,52 @@ func TestL2DistanceUnitary(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkL2NormNetlib(b *testing.B) {
|
||||
netlib := func(x []float64) (sum float64) {
|
||||
var scale float64
|
||||
sumSquares := 1.0
|
||||
for _, v := range x {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
absxi := math.Abs(v)
|
||||
if math.IsNaN(absxi) {
|
||||
return math.NaN()
|
||||
}
|
||||
if scale < absxi {
|
||||
s := scale / absxi
|
||||
sumSquares = 1 + sumSquares*s*s
|
||||
scale = absxi
|
||||
} else {
|
||||
s := absxi / scale
|
||||
sumSquares += s * s
|
||||
}
|
||||
}
|
||||
if math.IsInf(scale, 1) {
|
||||
return math.Inf(1)
|
||||
}
|
||||
return scale * math.Sqrt(sumSquares)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
f func(x []float64) float64
|
||||
}{
|
||||
{"L2NormUnitaryNetlib", netlib},
|
||||
{"L2NormUnitary", L2NormUnitary},
|
||||
}
|
||||
x[0] = randomSlice(1, 1)[0] // replace the leading zero (edge case)
|
||||
for _, test := range tests {
|
||||
for _, ln := range []uintptr{1, 3, 10, 30, 1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5} {
|
||||
b.Run(fmt.Sprintf("%s-%d", test.name, ln), func(b *testing.B) {
|
||||
b.SetBytes(int64(64 * ln))
|
||||
x := x[:ln]
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
test.f(x)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -170,3 +170,29 @@ func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, inc
|
||||
// sum += x[i]
|
||||
// }
|
||||
func Sum(x []float64) float64
|
||||
|
||||
// L2NormUnitary returns the L2-norm of x.
|
||||
// var scale float64
|
||||
// sumSquares := 1.0
|
||||
// for _, v := range x {
|
||||
// if v == 0 {
|
||||
// continue
|
||||
// }
|
||||
// absxi := math.Abs(v)
|
||||
// if math.IsNaN(absxi) {
|
||||
// return math.NaN()
|
||||
// }
|
||||
// if scale < absxi {
|
||||
// s := scale / absxi
|
||||
// sumSquares = 1 + sumSquares*s*s
|
||||
// scale = absxi
|
||||
// } else {
|
||||
// s := absxi / scale
|
||||
// sumSquares += s * s
|
||||
// }
|
||||
// if math.IsInf(scale, 1) {
|
||||
// return math.Inf(1)
|
||||
// }
|
||||
// }
|
||||
// return scale * math.Sqrt(sumSquares)
|
||||
func L2NormUnitary(x []float64) (sum float64)
|
||||
|
Reference in New Issue
Block a user