internal/asm,blas,floats: move level 2 norm to asm

This allows sharing of the blas implementation with floats and opens the
possibility of an assembly implementation of this function.
This commit is contained in:
Dan Kortschak
2019-10-18 11:42:47 +10:30
parent 0732d350bf
commit e2ba7f0950
9 changed files with 264 additions and 94 deletions

View File

@@ -39,52 +39,10 @@ func (Implementation) Snrm2(n int, x []float32, incX int) float32 {
}
panic(nLT0)
}
var (
scale float32 = 0
sumSquares float32 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
return f32.L2NormUnitary(x[:n])
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
return f32.L2NormInc(x, uintptr(n), uintptr(incX))
}
// Sasum computes the sum of the absolute values of the elements of x.

View File

@@ -35,52 +35,10 @@ func (Implementation) Dnrm2(n int, x []float64, incX int) float64 {
}
panic(nLT0)
}
var (
scale float64 = 0
sumSquares float64 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
return f64.L2NormUnitary(x[:n])
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
return f64.L2NormInc(x, uintptr(n), uintptr(incX))
}
// Dasum computes the sum of the absolute values of the elements of x.

View File

@@ -24,6 +24,8 @@ cat level1float64.go \
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.L2NormInc -> f32.L2NormInc' \
| gofmt -r 'f64.L2NormUnitary -> f32.L2NormUnitary' \
| gofmt -r 'f64.ScalInc -> f32.ScalInc' \
| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
\

View File

@@ -648,11 +648,7 @@ func Norm(s []float64, L float64) float64 {
return 0
}
if L == 2 {
twoNorm := math.Abs(s[0])
for i := 1; i < len(s); i++ {
twoNorm = math.Hypot(twoNorm, s[i])
}
return twoNorm
return f64.L2NormUnitary(s)
}
var norm float64
if L == 1 {

View File

@@ -261,7 +261,7 @@ func TestDistance(t *testing.T) {
copy(tmp, test.s)
Sub(tmp, test.t)
norm := Norm(tmp, L)
if dist != norm { // Use equality because they should be identical
if !EqualWithinAbsOrRel(dist, norm, 1e-15, 1e-15) {
t.Errorf("Distance does not match norm for case %v, %v. Expected %v, Found %v.", i, j, norm, dist)
}
}
@@ -1753,3 +1753,15 @@ func BenchmarkScaleSmall(b *testing.B) { benchmarkScale(b, Small) }
func BenchmarkScaleMedium(b *testing.B) { benchmarkScale(b, Medium) }
func BenchmarkScaleLarge(b *testing.B) { benchmarkScale(b, Large) }
func BenchmarkScaleHuge(b *testing.B) { benchmarkScale(b, Huge) }
func benchmarkNorm2(b *testing.B, size int) {
s := randomSlice(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
Norm(s, 2)
}
}
func BenchmarkNorm2Small(b *testing.B) { benchmarkNorm2(b, Small) }
func BenchmarkNorm2Medium(b *testing.B) { benchmarkNorm2(b, Medium) }
func BenchmarkNorm2Large(b *testing.B) { benchmarkNorm2(b, Large) }
func BenchmarkNorm2Huge(b *testing.B) { benchmarkNorm2(b, Huge) }

View File

@@ -0,0 +1,62 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f32
import "gonum.org/v1/gonum/internal/math32"
// L2NormUnitary is the level 2 norm of x.
func L2NormUnitary(x []float32) (sum float32) {
var scale float32
var sumSquares float32 = 1
for _, v := range x {
if v == 0 {
continue
}
absxi := math32.Abs(v)
if math32.IsNaN(absxi) {
return math32.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math32.IsInf(scale, 1) {
return math32.Inf(1)
}
return scale * math32.Sqrt(sumSquares)
}
// L2NormInc is the level 2 norm of x.
func L2NormInc(x []float32, n, incX uintptr) (sum float32) {
var scale float32
var sumSquares float32 = 1
for ix := uintptr(0); ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math32.Abs(val)
if math32.IsNaN(absxi) {
return math32.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math32.IsInf(scale, 1) {
return math32.Inf(1)
}
return scale * math32.Sqrt(sumSquares)
}

View File

@@ -0,0 +1,60 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f32
import "testing"
func TestL2NormUnitary(t *testing.T) {
var src_gd float32 = 1
for j, v := range []struct {
want float32
x []float32
}{
{want: 0, x: []float32{}},
{want: 2, x: []float32{2}},
{want: 3.7416573867739413, x: []float32{1, 2, 3}},
{want: 3.7416573867739413, x: []float32{-1, -2, -3}},
{want: nan, x: []float32{nan}},
{want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
{want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
} {
g_ln := 4 + j%2
v.x = guardVector(v.x, src_gd, g_ln)
src := v.x[g_ln : len(v.x)-g_ln]
ret := L2NormUnitary(src)
if !within(ret, v.want) {
t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
}
if !isValidGuard(v.x, src_gd, g_ln) {
t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
}
}
}
func TestL2NormInc(t *testing.T) {
var src_gd float32 = 1
for j, v := range []struct {
inc int
want float32
x []float32
}{
{inc: 2, want: 0, x: []float32{}},
{inc: 3, want: 2, x: []float32{2}},
{inc: 10, want: 3.7416573867739413, x: []float32{1, 2, 3}},
{inc: 5, want: 3.7416573867739413, x: []float32{-1, -2, -3}},
{inc: 3, want: nan, x: []float32{nan}},
{inc: 15, want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
{inc: 1, want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
} {
g_ln, ln := 4+j%2, len(v.x)
v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
src := v.x[g_ln : len(v.x)-g_ln]
ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
if !within(ret, v.want) {
t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
}
checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
}
}

View File

@@ -0,0 +1,62 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f64
import "math"
// L2NormUnitary is the level 2 norm of x.
func L2NormUnitary(x []float64) (sum float64) {
var scale float64
sumSquares := 1.0
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// L2NormInc is the level 2 norm of x.
func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
var scale float64
sumSquares := 1.0
for ix := uintptr(0); ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
s := scale / absxi
sumSquares = 1 + sumSquares*s*s
scale = absxi
} else {
s := absxi / scale
sumSquares += s * s
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}

View File

@@ -0,0 +1,60 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f64
import "testing"
func TestL2NormUnitary(t *testing.T) {
var src_gd float64 = 1
for j, v := range []struct {
want float64
x []float64
}{
{want: 0, x: []float64{}},
{want: 2, x: []float64{2}},
{want: 3.7416573867739413, x: []float64{1, 2, 3}},
{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
{want: nan, x: []float64{nan}},
{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
} {
g_ln := 4 + j%2
v.x = guardVector(v.x, src_gd, g_ln)
src := v.x[g_ln : len(v.x)-g_ln]
ret := L2NormUnitary(src)
if !within(ret, v.want) {
t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
}
if !isValidGuard(v.x, src_gd, g_ln) {
t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
}
}
}
func TestL2NormInc(t *testing.T) {
var src_gd float64 = 1
for j, v := range []struct {
inc int
want float64
x []float64
}{
{inc: 2, want: 0, x: []float64{}},
{inc: 3, want: 2, x: []float64{2}},
{inc: 10, want: 3.7416573867739413, x: []float64{1, 2, 3}},
{inc: 5, want: 3.7416573867739413, x: []float64{-1, -2, -3}},
{inc: 3, want: nan, x: []float64{nan}},
{inc: 15, want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
{inc: 1, want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
} {
g_ln, ln := 4+j%2, len(v.x)
v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
src := v.x[g_ln : len(v.x)-g_ln]
ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
if !within(ret, v.want) {
t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
}
checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
}
}