internal/asm,blas,floats: move level 2 norm to asm

This allows sharing of the blas implementation with floats and opens the possibility of an assembly implementation of this function.
2025-10-23 15:13:31 +08:00 · 2019-10-18 11:42:47 +10:30
parent 0732d350bf
commit e2ba7f0950
9 changed files with 264 additions and 94 deletions
--- a/blas/gonum/level1float32.go
+++ b/blas/gonum/level1float32.go
@@ -39,52 +39,10 @@ func (Implementation) Snrm2(n int, x []float32, incX int) float32 {
 		}
 		panic(nLT0)
 	}
 	var (
 		scale      float32 = 0
 		sumSquares float32 = 1
 	)
 	if incX == 1 {
-		x = x[:n]
+		return f32.L2NormUnitary(x[:n])
 		for _, v := range x {
 			if v == 0 {
 				continue
 	}
-			absxi := math.Abs(v)
+	return f32.L2NormInc(x, uintptr(n), uintptr(incX))
 			if math.IsNaN(absxi) {
 				return math.NaN()
 			}
 			if scale < absxi {
 				sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
 				scale = absxi
 			} else {
 				sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
 			}
 		}
 		if math.IsInf(scale, 1) {
 			return math.Inf(1)
 		}
 		return scale * math.Sqrt(sumSquares)
 	}
 	for ix := 0; ix < n*incX; ix += incX {
 		val := x[ix]
 		if val == 0 {
 			continue
 		}
 		absxi := math.Abs(val)
 		if math.IsNaN(absxi) {
 			return math.NaN()
 		}
 		if scale < absxi {
 			sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
 			scale = absxi
 		} else {
 			sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
 		}
 	}
 	if math.IsInf(scale, 1) {
 		return math.Inf(1)
 	}
 	return scale * math.Sqrt(sumSquares)
 }
 // Sasum computes the sum of the absolute values of the elements of x.
--- a/blas/gonum/level1float64.go
+++ b/blas/gonum/level1float64.go
@@ -35,52 +35,10 @@ func (Implementation) Dnrm2(n int, x []float64, incX int) float64 {
 		}
 		panic(nLT0)
 	}
 	var (
 		scale      float64 = 0
 		sumSquares float64 = 1
 	)
 	if incX == 1 {
-		x = x[:n]
+		return f64.L2NormUnitary(x[:n])
 		for _, v := range x {
 			if v == 0 {
 				continue
 	}
-			absxi := math.Abs(v)
+	return f64.L2NormInc(x, uintptr(n), uintptr(incX))
 			if math.IsNaN(absxi) {
 				return math.NaN()
 			}
 			if scale < absxi {
 				sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
 				scale = absxi
 			} else {
 				sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
 			}
 		}
 		if math.IsInf(scale, 1) {
 			return math.Inf(1)
 		}
 		return scale * math.Sqrt(sumSquares)
 	}
 	for ix := 0; ix < n*incX; ix += incX {
 		val := x[ix]
 		if val == 0 {
 			continue
 		}
 		absxi := math.Abs(val)
 		if math.IsNaN(absxi) {
 			return math.NaN()
 		}
 		if scale < absxi {
 			sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
 			scale = absxi
 		} else {
 			sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
 		}
 	}
 	if math.IsInf(scale, 1) {
 		return math.Inf(1)
 	}
 	return scale * math.Sqrt(sumSquares)
 }
 // Dasum computes the sum of the absolute values of the elements of x.
--- a/blas/gonum/single_precision.bash
+++ b/blas/gonum/single_precision.bash
@@ -24,6 +24,8 @@ cat level1float64.go \
 | gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
 | gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
 | gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
 | gofmt -r 'f64.L2NormInc -> f32.L2NormInc' \
 | gofmt -r 'f64.L2NormUnitary -> f32.L2NormUnitary' \
 | gofmt -r 'f64.ScalInc -> f32.ScalInc' \
 | gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
 \
--- a/floats/floats.go
+++ b/floats/floats.go
@@ -648,11 +648,7 @@ func Norm(s []float64, L float64) float64 {
 		return 0
 	}
 	if L == 2 {
-		twoNorm := math.Abs(s[0])
+		return f64.L2NormUnitary(s)
 		for i := 1; i < len(s); i++ {
 			twoNorm = math.Hypot(twoNorm, s[i])
 		}
 		return twoNorm
 	}
 	var norm float64
 	if L == 1 {
--- a/floats/floats_test.go
+++ b/floats/floats_test.go
@@ -261,7 +261,7 @@ func TestDistance(t *testing.T) {
 			copy(tmp, test.s)
 			Sub(tmp, test.t)
 			norm := Norm(tmp, L)
-			if dist != norm { // Use equality because they should be identical
+			if !EqualWithinAbsOrRel(dist, norm, 1e-15, 1e-15) {
 				t.Errorf("Distance does not match norm for case %v, %v. Expected %v, Found %v.", i, j, norm, dist)
 			}
 		}
@@ -1753,3 +1753,15 @@ func BenchmarkScaleSmall(b *testing.B)  { benchmarkScale(b, Small) }
 func BenchmarkScaleMedium(b *testing.B) { benchmarkScale(b, Medium) }
 func BenchmarkScaleLarge(b *testing.B)  { benchmarkScale(b, Large) }
 func BenchmarkScaleHuge(b *testing.B)   { benchmarkScale(b, Huge) }
 func benchmarkNorm2(b *testing.B, size int) {
 	s := randomSlice(size)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		Norm(s, 2)
 	}
 }
 func BenchmarkNorm2Small(b *testing.B)  { benchmarkNorm2(b, Small) }
 func BenchmarkNorm2Medium(b *testing.B) { benchmarkNorm2(b, Medium) }
 func BenchmarkNorm2Large(b *testing.B)  { benchmarkNorm2(b, Large) }
 func BenchmarkNorm2Huge(b *testing.B)   { benchmarkNorm2(b, Huge) }
--- a/internal/asm/f32/l2norm.go
+++ b/internal/asm/f32/l2norm.go
@@ -0,0 +1,62 @@
 // Copyright ©2019 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package f32
 import "gonum.org/v1/gonum/internal/math32"
 // L2NormUnitary is the level 2 norm of x.
 func L2NormUnitary(x []float32) (sum float32) {
 	var scale float32
 	var sumSquares float32 = 1
 	for _, v := range x {
 		if v == 0 {
 			continue
 		}
 		absxi := math32.Abs(v)
 		if math32.IsNaN(absxi) {
 			return math32.NaN()
 		}
 		if scale < absxi {
 			s := scale / absxi
 			sumSquares = 1 + sumSquares*s*s
 			scale = absxi
 		} else {
 			s := absxi / scale
 			sumSquares += s * s
 		}
 	}
 	if math32.IsInf(scale, 1) {
 		return math32.Inf(1)
 	}
 	return scale * math32.Sqrt(sumSquares)
 }
 // L2NormInc is the level 2 norm of x.
 func L2NormInc(x []float32, n, incX uintptr) (sum float32) {
 	var scale float32
 	var sumSquares float32 = 1
 	for ix := uintptr(0); ix < n*incX; ix += incX {
 		val := x[ix]
 		if val == 0 {
 			continue
 		}
 		absxi := math32.Abs(val)
 		if math32.IsNaN(absxi) {
 			return math32.NaN()
 		}
 		if scale < absxi {
 			s := scale / absxi
 			sumSquares = 1 + sumSquares*s*s
 			scale = absxi
 		} else {
 			s := absxi / scale
 			sumSquares += s * s
 		}
 	}
 	if math32.IsInf(scale, 1) {
 		return math32.Inf(1)
 	}
 	return scale * math32.Sqrt(sumSquares)
 }
--- a/internal/asm/f32/l2norm_test.go
+++ b/internal/asm/f32/l2norm_test.go
@@ -0,0 +1,60 @@
 // Copyright ©2019 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package f32
 import "testing"
 func TestL2NormUnitary(t *testing.T) {
 	var src_gd float32 = 1
 	for j, v := range []struct {
 		want float32
 		x    []float32
 	}{
 		{want: 0, x: []float32{}},
 		{want: 2, x: []float32{2}},
 		{want: 3.7416573867739413, x: []float32{1, 2, 3}},
 		{want: 3.7416573867739413, x: []float32{-1, -2, -3}},
 		{want: nan, x: []float32{nan}},
 		{want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
 		{want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
 	} {
 		g_ln := 4 + j%2
 		v.x = guardVector(v.x, src_gd, g_ln)
 		src := v.x[g_ln : len(v.x)-g_ln]
 		ret := L2NormUnitary(src)
 		if !within(ret, v.want) {
 			t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
 		}
 		if !isValidGuard(v.x, src_gd, g_ln) {
 			t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
 		}
 	}
 }
 func TestL2NormInc(t *testing.T) {
 	var src_gd float32 = 1
 	for j, v := range []struct {
 		inc  int
 		want float32
 		x    []float32
 	}{
 		{inc: 2, want: 0, x: []float32{}},
 		{inc: 3, want: 2, x: []float32{2}},
 		{inc: 10, want: 3.7416573867739413, x: []float32{1, 2, 3}},
 		{inc: 5, want: 3.7416573867739413, x: []float32{-1, -2, -3}},
 		{inc: 3, want: nan, x: []float32{nan}},
 		{inc: 15, want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
 		{inc: 1, want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
 	} {
 		g_ln, ln := 4+j%2, len(v.x)
 		v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
 		src := v.x[g_ln : len(v.x)-g_ln]
 		ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
 		if !within(ret, v.want) {
 			t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
 		}
 		checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
 	}
 }
--- a/internal/asm/f64/l2norm.go
+++ b/internal/asm/f64/l2norm.go
@@ -0,0 +1,62 @@
 // Copyright ©2019 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package f64
 import "math"
 // L2NormUnitary is the level 2 norm of x.
 func L2NormUnitary(x []float64) (sum float64) {
 	var scale float64
 	sumSquares := 1.0
 	for _, v := range x {
 		if v == 0 {
 			continue
 		}
 		absxi := math.Abs(v)
 		if math.IsNaN(absxi) {
 			return math.NaN()
 		}
 		if scale < absxi {
 			s := scale / absxi
 			sumSquares = 1 + sumSquares*s*s
 			scale = absxi
 		} else {
 			s := absxi / scale
 			sumSquares += s * s
 		}
 	}
 	if math.IsInf(scale, 1) {
 		return math.Inf(1)
 	}
 	return scale * math.Sqrt(sumSquares)
 }
 // L2NormInc is the level 2 norm of x.
 func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
 	var scale float64
 	sumSquares := 1.0
 	for ix := uintptr(0); ix < n*incX; ix += incX {
 		val := x[ix]
 		if val == 0 {
 			continue
 		}
 		absxi := math.Abs(val)
 		if math.IsNaN(absxi) {
 			return math.NaN()
 		}
 		if scale < absxi {
 			s := scale / absxi
 			sumSquares = 1 + sumSquares*s*s
 			scale = absxi
 		} else {
 			s := absxi / scale
 			sumSquares += s * s
 		}
 	}
 	if math.IsInf(scale, 1) {
 		return math.Inf(1)
 	}
 	return scale * math.Sqrt(sumSquares)
 }
--- a/internal/asm/f64/l2norm_test.go
+++ b/internal/asm/f64/l2norm_test.go
@@ -0,0 +1,60 @@
 // Copyright ©2019 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package f64
 import "testing"
 func TestL2NormUnitary(t *testing.T) {
 	var src_gd float64 = 1
 	for j, v := range []struct {
 		want float64
 		x    []float64
 	}{
 		{want: 0, x: []float64{}},
 		{want: 2, x: []float64{2}},
 		{want: 3.7416573867739413, x: []float64{1, 2, 3}},
 		{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
 		{want: nan, x: []float64{nan}},
 		{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
 		{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
 	} {
 		g_ln := 4 + j%2
 		v.x = guardVector(v.x, src_gd, g_ln)
 		src := v.x[g_ln : len(v.x)-g_ln]
 		ret := L2NormUnitary(src)
 		if !within(ret, v.want) {
 			t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
 		}
 		if !isValidGuard(v.x, src_gd, g_ln) {
 			t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
 		}
 	}
 }
 func TestL2NormInc(t *testing.T) {
 	var src_gd float64 = 1
 	for j, v := range []struct {
 		inc  int
 		want float64
 		x    []float64
 	}{
 		{inc: 2, want: 0, x: []float64{}},
 		{inc: 3, want: 2, x: []float64{2}},
 		{inc: 10, want: 3.7416573867739413, x: []float64{1, 2, 3}},
 		{inc: 5, want: 3.7416573867739413, x: []float64{-1, -2, -3}},
 		{inc: 3, want: nan, x: []float64{nan}},
 		{inc: 15, want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
 		{inc: 1, want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
 	} {
 		g_ln, ln := 4+j%2, len(v.x)
 		v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
 		src := v.x[g_ln : len(v.x)-g_ln]
 		ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
 		if !within(ret, v.want) {
 			t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
 		}
 		checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
 	}
 }