mirror of
https://github.com/gonum/gonum.git
synced 2025-09-27 03:26:04 +08:00
translate netlib l2norm algorithm to asm and remove branches
Netlib algorithm reduces overflow while calculating the l2norm of a vector. Translated to asm while reducing branches in NaN and Inf checks. Overflow protection is equivalent to the Netlib standard implementation.
This commit is contained in:
@@ -36,7 +36,6 @@ func areSlicesSame(t *testing.T, truth, comp []float64, str string) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if !ok {
|
if !ok {
|
||||||
t.Errorf(str+". Expected %v, returned %v", truth, comp)
|
t.Errorf(str+". Expected %v, returned %v", truth, comp)
|
||||||
@@ -96,7 +95,6 @@ func TestAddTo(t *testing.T) {
|
|||||||
if !Panics(func() { AddTo(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
|
if !Panics(func() { AddTo(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
|
||||||
t.Errorf("Did not panic with length mismatch")
|
t.Errorf("Did not panic with length mismatch")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAddConst(t *testing.T) {
|
func TestAddConst(t *testing.T) {
|
||||||
@@ -208,7 +206,6 @@ func TestCumProd(t *testing.T) {
|
|||||||
truth = []float64{}
|
truth = []float64{}
|
||||||
CumProd(emptyReceiver, emptyReceiver)
|
CumProd(emptyReceiver, emptyReceiver)
|
||||||
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumprod returned with empty receiver")
|
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumprod returned with empty receiver")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCumSum(t *testing.T) {
|
func TestCumSum(t *testing.T) {
|
||||||
@@ -231,7 +228,6 @@ func TestCumSum(t *testing.T) {
|
|||||||
truth = []float64{}
|
truth = []float64{}
|
||||||
CumSum(emptyReceiver, emptyReceiver)
|
CumSum(emptyReceiver, emptyReceiver)
|
||||||
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumsum returned with empty receiver")
|
areSlicesEqual(t, truth, emptyReceiver, "Wrong cumsum returned with empty receiver")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDistance(t *testing.T) {
|
func TestDistance(t *testing.T) {
|
||||||
@@ -270,7 +266,6 @@ func TestDistance(t *testing.T) {
|
|||||||
if !Panics(func() { Distance([]float64{}, norms, 1) }) {
|
if !Panics(func() { Distance([]float64{}, norms, 1) }) {
|
||||||
t.Errorf("Did not panic with unequal lengths")
|
t.Errorf("Did not panic with unequal lengths")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDiv(t *testing.T) {
|
func TestDiv(t *testing.T) {
|
||||||
@@ -398,7 +393,7 @@ func TestEqualFunc(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestEqualsRelative(t *testing.T) {
|
func TestEqualsRelative(t *testing.T) {
|
||||||
var equalityTests = []struct {
|
equalityTests := []struct {
|
||||||
a, b float64
|
a, b float64
|
||||||
tol float64
|
tol float64
|
||||||
equal bool
|
equal bool
|
||||||
@@ -518,7 +513,6 @@ func TestEqualsULP(t *testing.T) {
|
|||||||
if EqualWithinULP(1, math.NaN(), 10) {
|
if EqualWithinULP(1, math.NaN(), 10) {
|
||||||
t.Errorf("NaN returned as equal")
|
t.Errorf("NaN returned as equal")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEqualLengths(t *testing.T) {
|
func TestEqualLengths(t *testing.T) {
|
||||||
@@ -699,7 +693,6 @@ func TestLogSumExp(t *testing.T) {
|
|||||||
if math.Abs(val-truth) > EqTolerance {
|
if math.Abs(val-truth) > EqTolerance {
|
||||||
t.Errorf("Wrong logsumexp for values with negative infinity")
|
t.Errorf("Wrong logsumexp for values with negative infinity")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMaxAndIdx(t *testing.T) {
|
func TestMaxAndIdx(t *testing.T) {
|
||||||
@@ -1572,7 +1565,6 @@ func TestWithin(t *testing.T) {
|
|||||||
t.Errorf("Case %v: Idx mismatch. Want: %v, got: %v", i, test.idx, idx)
|
t.Errorf("Case %v: Idx mismatch. Want: %v, got: %v", i, test.idx, idx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func randomSlice(l int) []float64 {
|
func randomSlice(l int) []float64 {
|
||||||
|
@@ -6,33 +6,6 @@ package f64
|
|||||||
|
|
||||||
import "math"
|
import "math"
|
||||||
|
|
||||||
// L2NormUnitary is the level 2 norm of x.
|
|
||||||
func L2NormUnitary(x []float64) (sum float64) {
|
|
||||||
var scale float64
|
|
||||||
sumSquares := 1.0
|
|
||||||
for _, v := range x {
|
|
||||||
if v == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
absxi := math.Abs(v)
|
|
||||||
if math.IsNaN(absxi) {
|
|
||||||
return math.NaN()
|
|
||||||
}
|
|
||||||
if scale < absxi {
|
|
||||||
s := scale / absxi
|
|
||||||
sumSquares = 1 + sumSquares*s*s
|
|
||||||
scale = absxi
|
|
||||||
} else {
|
|
||||||
s := absxi / scale
|
|
||||||
sumSquares += s * s
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if math.IsInf(scale, 1) {
|
|
||||||
return math.Inf(1)
|
|
||||||
}
|
|
||||||
return scale * math.Sqrt(sumSquares)
|
|
||||||
}
|
|
||||||
|
|
||||||
// L2NormInc is the level 2 norm of x.
|
// L2NormInc is the level 2 norm of x.
|
||||||
func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
|
func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
|
||||||
var scale float64
|
var scale float64
|
||||||
|
109
internal/asm/f64/l2norm_amd64.s
Normal file
109
internal/asm/f64/l2norm_amd64.s
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
// Copyright ©2019 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !noasm,!appengine,!safe
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define SUMSQ X0
|
||||||
|
#define ABSX X1
|
||||||
|
#define SCALE X2
|
||||||
|
#define ZERO X3
|
||||||
|
#define TMP X4
|
||||||
|
#define ABSMASK X5
|
||||||
|
#define INF X7
|
||||||
|
#define INFMASK X11
|
||||||
|
#define NANMASK X12
|
||||||
|
#define IDX AX
|
||||||
|
#define LEN SI
|
||||||
|
#define X_ DI
|
||||||
|
|
||||||
|
#define ABSMASK_DATA l2nrodata<>+0(SB)
|
||||||
|
#define INF_DATA l2nrodata<>+8(SB)
|
||||||
|
#define NAN_DATA l2nrodata<>+16(SB)
|
||||||
|
// AbsMask
|
||||||
|
DATA l2nrodata<>+0(SB)/8, $0x7FFFFFFFFFFFFFFF
|
||||||
|
// Inf
|
||||||
|
DATA l2nrodata<>+8(SB)/8, $0x7FF0000000000000
|
||||||
|
// NaN
|
||||||
|
DATA l2nrodata<>+16(SB)/8, $0xFFF8000000000000
|
||||||
|
GLOBL l2nrodata<>+0(SB), RODATA, $24
|
||||||
|
|
||||||
|
// L2NormUnitary returns the L2-norm of x.
|
||||||
|
// func L2NormUnitary(x []float64) (sum float64)
|
||||||
|
TEXT ·L2NormUnitary(SB), NOSPLIT, $0
|
||||||
|
MOVQ x_len+8(FP), LEN
|
||||||
|
MOVQ x_base+0(FP), X_
|
||||||
|
PXOR ZERO, ZERO
|
||||||
|
CMPQ LEN, $0 // if len== 0 {return 0}
|
||||||
|
JZ retZero
|
||||||
|
|
||||||
|
PXOR INFMASK, INFMASK
|
||||||
|
PXOR NANMASK, NANMASK
|
||||||
|
MOVSD $1.0, SUMSQ // ssq = 1
|
||||||
|
XORPS SCALE, SCALE
|
||||||
|
MOVSD l2nrodata<>+0(SB), ABSMASK
|
||||||
|
MOVSD l2nrodata<>+8(SB), INF
|
||||||
|
XORQ IDX, IDX // idx == 0
|
||||||
|
|
||||||
|
initZero: // for ;x[i]==0; i++ {}
|
||||||
|
// Skip all leading zeros, to avoid divide by zero NaN
|
||||||
|
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
|
||||||
|
UCOMISD ABSX, ZERO
|
||||||
|
JP retNaN // if isNaN(x[i]) { return NaN }
|
||||||
|
JNE loop // if x[i] != 0 { goto loop }
|
||||||
|
INCQ IDX
|
||||||
|
CMPQ IDX, LEN
|
||||||
|
JE retZero // if ++i == len(x) { return 0 }
|
||||||
|
JMP initZero
|
||||||
|
|
||||||
|
loop:
|
||||||
|
MOVSD (X_)(IDX*8), ABSX // absxi = x[i]
|
||||||
|
MOVUPS ABSX, TMP
|
||||||
|
CMPSD ABSX, TMP, $3
|
||||||
|
ORPD TMP, NANMASK // NANMASK = NANMASK | IsNaN(absxi)
|
||||||
|
MOVSD INF, TMP
|
||||||
|
ANDPD ABSMASK, ABSX // absxi == Abs(absxi)
|
||||||
|
CMPSD ABSX, TMP, $0
|
||||||
|
ORPD TMP, INFMASK // INFMASK = INFMASK | IsInf(absxi)
|
||||||
|
UCOMISD SCALE, ABSX
|
||||||
|
JA adjScale // IF SCALE > ABSXI { goto adjScale }
|
||||||
|
|
||||||
|
DIVSD SCALE, ABSX // absxi = scale / absxi
|
||||||
|
MULSD ABSX, ABSX // absxi *= absxi
|
||||||
|
ADDSD ABSX, SUMSQ // sumsq += absxi
|
||||||
|
INCQ IDX // ++i
|
||||||
|
CMPQ IDX, LEN
|
||||||
|
JNE loop // if i < len(x) { continue }
|
||||||
|
JMP retSum // if i == len(x) { goto retSum }
|
||||||
|
|
||||||
|
adjScale: // Scale > Absxi
|
||||||
|
DIVSD ABSX, SCALE // tmp = absxi / scale
|
||||||
|
MULSD SCALE, SUMSQ // sumsq *= tmp
|
||||||
|
MULSD SCALE, SUMSQ // sumsq *= tmp
|
||||||
|
ADDSD $1.0, SUMSQ // sumsq += 1
|
||||||
|
MOVUPS ABSX, SCALE // scale = absxi
|
||||||
|
INCQ IDX // ++i
|
||||||
|
CMPQ IDX, LEN
|
||||||
|
JNE loop // if i < len(x) { continue }
|
||||||
|
|
||||||
|
retSum: // Calculate return value
|
||||||
|
SQRTSD SUMSQ, SUMSQ // sumsq = sqrt(sumsq)
|
||||||
|
MULSD SCALE, SUMSQ // sumsq += scale
|
||||||
|
MOVQ SUMSQ, R10 // tmp = sumsq
|
||||||
|
UCOMISD ZERO, INFMASK
|
||||||
|
CMOVQPS INF_DATA, R10 // if INFMASK { tmp = INF }
|
||||||
|
UCOMISD ZERO, NANMASK
|
||||||
|
CMOVQPS NAN_DATA, R10 // if NANMASK { tmp = NaN }
|
||||||
|
MOVQ R10, sum+24(FP) // return tmp
|
||||||
|
RET
|
||||||
|
|
||||||
|
retZero:
|
||||||
|
MOVSD ZERO, sum+24(FP) // return 0
|
||||||
|
RET
|
||||||
|
|
||||||
|
retNaN:
|
||||||
|
MOVSD NAN_DATA, TMP // return NaN
|
||||||
|
MOVSD TMP, sum+24(FP)
|
||||||
|
RET
|
36
internal/asm/f64/l2norm_noasm.go
Normal file
36
internal/asm/f64/l2norm_noasm.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// Copyright ©2019 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !amd64 noasm appengine safe
|
||||||
|
|
||||||
|
package f64
|
||||||
|
|
||||||
|
import "math"
|
||||||
|
|
||||||
|
// L2NormUnitary returns the L2-norm of x.
|
||||||
|
func L2NormUnitary(x []float64) (sum float64) {
|
||||||
|
var scale float64
|
||||||
|
sumSquares := 1.0
|
||||||
|
for _, v := range x {
|
||||||
|
if v == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
absxi := math.Abs(v)
|
||||||
|
if math.IsNaN(absxi) {
|
||||||
|
return math.NaN()
|
||||||
|
}
|
||||||
|
if scale < absxi {
|
||||||
|
s := scale / absxi
|
||||||
|
sumSquares = 1 + sumSquares*s*s
|
||||||
|
scale = absxi
|
||||||
|
} else {
|
||||||
|
s := absxi / scale
|
||||||
|
sumSquares += s * s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if math.IsInf(scale, 1) {
|
||||||
|
return math.Inf(1)
|
||||||
|
}
|
||||||
|
return scale * math.Sqrt(sumSquares)
|
||||||
|
}
|
@@ -4,7 +4,20 @@
|
|||||||
|
|
||||||
package f64
|
package f64
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// nanwith copied from floats package
|
||||||
|
func nanwith(payload uint64) float64 {
|
||||||
|
const (
|
||||||
|
nanBits = 0x7ff8000000000000
|
||||||
|
nanMask = 0xfff8000000000000
|
||||||
|
)
|
||||||
|
return math.Float64frombits(nanBits | (payload &^ nanMask))
|
||||||
|
}
|
||||||
|
|
||||||
func TestL2NormUnitary(t *testing.T) {
|
func TestL2NormUnitary(t *testing.T) {
|
||||||
var src_gd float64 = 1
|
var src_gd float64 = 1
|
||||||
@@ -17,6 +30,7 @@ func TestL2NormUnitary(t *testing.T) {
|
|||||||
{want: 3.7416573867739413, x: []float64{1, 2, 3}},
|
{want: 3.7416573867739413, x: []float64{1, 2, 3}},
|
||||||
{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
|
{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
|
||||||
{want: nan, x: []float64{nan}},
|
{want: nan, x: []float64{nan}},
|
||||||
|
{want: nan, x: []float64{1, inf, 3, nanwith(25), 5}},
|
||||||
{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
|
{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
|
||||||
{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
|
{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
|
||||||
} {
|
} {
|
||||||
@@ -87,3 +101,52 @@ func TestL2DistanceUnitary(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkL2NormNetlib(b *testing.B) {
|
||||||
|
netlib := func(x []float64) (sum float64) {
|
||||||
|
var scale float64
|
||||||
|
sumSquares := 1.0
|
||||||
|
for _, v := range x {
|
||||||
|
if v == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
absxi := math.Abs(v)
|
||||||
|
if math.IsNaN(absxi) {
|
||||||
|
return math.NaN()
|
||||||
|
}
|
||||||
|
if scale < absxi {
|
||||||
|
s := scale / absxi
|
||||||
|
sumSquares = 1 + sumSquares*s*s
|
||||||
|
scale = absxi
|
||||||
|
} else {
|
||||||
|
s := absxi / scale
|
||||||
|
sumSquares += s * s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if math.IsInf(scale, 1) {
|
||||||
|
return math.Inf(1)
|
||||||
|
}
|
||||||
|
return scale * math.Sqrt(sumSquares)
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
f func(x []float64) float64
|
||||||
|
}{
|
||||||
|
{"L2NormUnitaryNetlib", netlib},
|
||||||
|
{"L2NormUnitary", L2NormUnitary},
|
||||||
|
}
|
||||||
|
x[0] = randomSlice(1, 1)[0] // replace the leading zero (edge case)
|
||||||
|
for _, test := range tests {
|
||||||
|
for _, ln := range []uintptr{1, 3, 10, 30, 1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5} {
|
||||||
|
b.Run(fmt.Sprintf("%s-%d", test.name, ln), func(b *testing.B) {
|
||||||
|
b.SetBytes(int64(64 * ln))
|
||||||
|
x := x[:ln]
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
test.f(x)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -170,3 +170,29 @@ func ScalIncTo(dst []float64, incDst uintptr, alpha float64, x []float64, n, inc
|
|||||||
// sum += x[i]
|
// sum += x[i]
|
||||||
// }
|
// }
|
||||||
func Sum(x []float64) float64
|
func Sum(x []float64) float64
|
||||||
|
|
||||||
|
// L2NormUnitary returns the L2-norm of x.
|
||||||
|
// var scale float64
|
||||||
|
// sumSquares := 1.0
|
||||||
|
// for _, v := range x {
|
||||||
|
// if v == 0 {
|
||||||
|
// continue
|
||||||
|
// }
|
||||||
|
// absxi := math.Abs(v)
|
||||||
|
// if math.IsNaN(absxi) {
|
||||||
|
// return math.NaN()
|
||||||
|
// }
|
||||||
|
// if scale < absxi {
|
||||||
|
// s := scale / absxi
|
||||||
|
// sumSquares = 1 + sumSquares*s*s
|
||||||
|
// scale = absxi
|
||||||
|
// } else {
|
||||||
|
// s := absxi / scale
|
||||||
|
// sumSquares += s * s
|
||||||
|
// }
|
||||||
|
// if math.IsInf(scale, 1) {
|
||||||
|
// return math.Inf(1)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return scale * math.Sqrt(sumSquares)
|
||||||
|
func L2NormUnitary(x []float64) (sum float64)
|
||||||
|
Reference in New Issue
Block a user