translate netlib l2norm algorithm to asm and remove branches

Netlib algorithm reduces overflow while calculating the l2norm of a
vector.

Translated to asm while reducing branches in NaN and Inf checks.
Overflow protection is equivalent to the Netlib standard implementation.
This commit is contained in:
Chad Kunde
2019-11-03 21:11:46 +08:00
committed by Chad Kunde
parent 90165046ad
commit 4e1ef9c972
6 changed files with 236 additions and 37 deletions

View File

@@ -36,7 +36,6 @@ func areSlicesSame(t *testing.T, truth, comp []float64, str string) {
break
}
}
}
if !ok {
t.Errorf(str+". Expected %v, returned %v", truth, comp)
@@ -96,7 +95,6 @@ func TestAddTo(t *testing.T) {
if !Panics(func() { AddTo(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
t.Errorf("Did not panic with length mismatch")
}
}
func TestAddConst(t *testing.T) {
@@ -208,7 +206,6 @@ func TestCumProd(t *testing.T) {
truth = []float64{}
CumProd(emptyReceiver, emptyReceiver)
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumprod returned with empty receiver")
}
func TestCumSum(t *testing.T) {
@@ -231,7 +228,6 @@ func TestCumSum(t *testing.T) {
truth = []float64{}
CumSum(emptyReceiver, emptyReceiver)
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumsum returned with empty receiver")
}
func TestDistance(t *testing.T) {
@@ -270,7 +266,6 @@ func TestDistance(t *testing.T) {
if !Panics(func() { Distance([]float64{}, norms, 1) }) {
t.Errorf("Did not panic with unequal lengths")
}
}
func TestDiv(t *testing.T) {
@@ -398,7 +393,7 @@ func TestEqualFunc(t *testing.T) {
}
func TestEqualsRelative(t *testing.T) {
var equalityTests = []struct {
equalityTests := []struct {
a, b float64
tol float64
equal bool
@@ -518,7 +513,6 @@ func TestEqualsULP(t *testing.T) {
if EqualWithinULP(1, math.NaN(), 10) {
t.Errorf("NaN returned as equal")
}
}
func TestEqualLengths(t *testing.T) {
@@ -699,7 +693,6 @@ func TestLogSumExp(t *testing.T) {
if math.Abs(val-truth) > EqTolerance {
t.Errorf("Wrong logsumexp for values with negative infinity")
}
}
func TestMaxAndIdx(t *testing.T) {
@@ -1572,7 +1565,6 @@ func TestWithin(t *testing.T) {
t.Errorf("Case %v: Idx mismatch. Want: %v, got: %v", i, test.idx, idx)
}
}
}
func randomSlice(l int) []float64 {

View File

@@ -6,33 +6,6 @@ package f64
import "math"
// L2NormUnitary is the level 2 norm of x.
func L2NormUnitary(x []float64) (sum float64) {
var scale float64
sumSquares := 1.0
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// L2NormInc is the level 2 norm of x.
func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
var scale float64

View File

@@ -0,0 +1,109 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !noasm,!appengine,!safe
#include "textflag.h"
#define SUMSQ X0
#define ABSX X1
#define SCALE X2
#define ZERO X3
#define TMP X4
#define ABSMASK X5
#define INF X7
#define INFMASK X11
#define NANMASK X12
#define IDX AX
#define LEN SI
#define X_ DI
#define ABSMASK_DATA l2nrodata<>+0(SB)
#define INF_DATA l2nrodata<>+8(SB)
#define NAN_DATA l2nrodata<>+16(SB)
// AbsMask
DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF
// Inf
DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000
// NaN
DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000
GLOBL l2nrodata<>+0(SB), RODATA, $24
// L2NormUnitary returns the L2-norm of x.
// func L2NormUnitary(x []float64) (sum float64)
TEXT ·L2NormUnitary(SB), NOSPLIT, $0
MOVQ x_len+8(FP), LEN
MOVQ x_base+0(FP), X_
PXOR ZERO, ZERO
CMPQ LEN, $0 // if len== 0 {return 0}
JZ retZero
PXOR INFMASK, INFMASK
PXOR NANMASK, NANMASK
MOVSD $1.0, SUMSQ // ssq = 1
XORPS SCALE, SCALE
MOVSD l2nrodata<>+0(SB), ABSMASK
MOVSD l2nrodata<>+8(SB), INF
XORQ IDX, IDX // idx == 0
initZero: // for ;x[i]==0; i++ {}
// Skip all leading zeros, to avoid divide by zero NaN
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
UCOMISD ABSX, ZERO
JP retNaN // if isNaN(x[i]) { return NaN }
JNE loop // if x[i] != 0 { goto loop }
INCQ IDX
CMPQ IDX, LEN
JE retZero // if ++i == len(x) { return 0 }
JMP initZero
loop:
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
MOVUPS ABSX, TMP
CMPSD ABSX, TMP, $3
ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi)
MOVSD INF, TMP
ANDPD ABSMASK, ABSX // absxi == Abs(absxi)
CMPSD ABSX, TMP, $0
ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi)
UCOMISD SCALE, ABSX
JA adjScale // IF SCALE > ABSXI { goto adjScale }
DIVSD SCALE, ABSX // absxi = scale / absxi
MULSD ABSX, ABSX // absxi *= absxi
ADDSD ABSX, SUMSQ // sumsq += absxi
INCQ IDX // ++i
CMPQ IDX, LEN
JNE loop // if i < len(x) { continue }
JMP retSum // if i == len(x) { goto retSum }
adjScale: // Scale > Absxi
DIVSD ABSX, SCALE // tmp = absxi / scale
MULSD SCALE, SUMSQ // sumsq *= tmp
MULSD SCALE, SUMSQ // sumsq *= tmp
ADDSD $1.0, SUMSQ // sumsq += 1
MOVUPS ABSX, SCALE // scale = absxi
INCQ IDX // ++i
CMPQ IDX, LEN
JNE loop // if i < len(x) { continue }
retSum: // Calculate return value
SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq)
MULSD SCALE, SUMSQ // sumsq += scale
MOVQ SUMSQ, R10 // tmp = sumsq
UCOMISD ZERO, INFMASK
CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF }
UCOMISD ZERO, NANMASK
CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN }
MOVQ R10, sum+24(FP) // return tmp
RET
retZero:
MOVSD ZERO, sum+24(FP) // return 0
RET
retNaN:
MOVSD NAN_DATA, TMP // return NaN
MOVSD TMP, sum+24(FP)
RET

View File

@@ -0,0 +1,36 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm appengine safe
package f64
import "math"
// L2NormUnitary returns the L2-norm of x.
func L2NormUnitary(x []float64) (sum float64) {
var scale float64
sumSquares := 1.0
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}

View File

@@ -4,7 +4,20 @@
package f64
import "testing"
import (
"fmt"
"math"
"testing"
)
// nanwith copied from floats package
func nanwith(payload uint64) float64 {
const (
nanBits = 0x7ff8000000000000
nanMask = 0xfff8000000000000
)
return math.Float64frombits(nanBits | (payload &^ nanMask))
}
func TestL2NormUnitary(t *testing.T) {
var src_gd float64 = 1
@@ -17,6 +30,7 @@ func TestL2NormUnitary(t *testing.T) {
{want: 3.7416573867739413, x: []float64{1, 2, 3}},
{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
{want: nan, x: []float64{nan}},
{want: nan, x: []float64{1, inf, 3, nanwith(25), 5}},
{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
} {
@@ -87,3 +101,52 @@ func TestL2DistanceUnitary(t *testing.T) {
}
}
}
func BenchmarkL2NormNetlib(b *testing.B) {
netlib := func(x []float64) (sum float64) {
var scale float64
sumSquares := 1.0
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
tests := []struct {
name string
f func(x []float64) float64
}{
{"L2NormUnitaryNetlib", netlib},
{"L2NormUnitary", L2NormUnitary},
}
x[0] = randomSlice(1, 1)[0] // replace the leading zero (edge case)
for _, test := range tests {
for _, ln := range []uintptr{1, 3, 10, 30, 1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5} {
b.Run(fmt.Sprintf("%s-%d", test.name, ln), func(b *testing.B) {
b.SetBytes(int64(64 * ln))
x := x[:ln]
b.ResetTimer()
for i := 0; i < b.N; i++ {
test.f(x)
}
})
}
}
}

View File

@@ -170,3 +170,29 @@ func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, inc
// sum += x[i]
// }
func Sum(x []float64) float64
// L2NormUnitary returns the L2-norm of x.
// var scale float64
// sumSquares := 1.0
// for _, v := range x {
// if v == 0 {
// continue
// }
// absxi := math.Abs(v)
// if math.IsNaN(absxi) {
// return math.NaN()
// }
// if scale < absxi {
// s := scale / absxi
// sumSquares = 1 + sumSquares*s*s
// scale = absxi
// } else {
// s := absxi / scale
// sumSquares += s * s
// }
// if math.IsInf(scale, 1) {
// return math.Inf(1)
// }
// }
// return scale * math.Sqrt(sumSquares)
func L2NormUnitary(x []float64) (sum float64)