internal/asm,blas,floats: move level 2 norm to asm

This allows sharing of the blas implementation with floats and opens the possibility of an assembly implementation of this function.
2025-10-22 22:59:24 +08:00 · 2019-10-18 11:42:47 +10:30
parent 0732d350bf
commit e2ba7f0950
9 changed files with 264 additions and 94 deletions
--- a/blas/gonum/level1float32.go
+++ b/blas/gonum/level1float32.go
@@ -39,52 +39,10 @@ func (Implementation) Snrm2(n int, x []float32, incX int) float32 {
 		}
 		panic(nLT0)
 	}
-	var (
-		scale      float32 = 0
-		sumSquares float32 = 1
-	)
 	if incX == 1 {
-		x = x[:n]
-		for _, v := range x {
-			if v == 0 {
-				continue
+		return f32.L2NormUnitary(x[:n])
 	}
-			absxi := math.Abs(v)
-			if math.IsNaN(absxi) {
-				return math.NaN()
-			}
-			if scale < absxi {
-				sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
-				scale = absxi
-			} else {
-				sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
-			}
-		}
-		if math.IsInf(scale, 1) {
-			return math.Inf(1)
-		}
-		return scale * math.Sqrt(sumSquares)
-	}
-	for ix := 0; ix < n*incX; ix += incX {
-		val := x[ix]
-		if val == 0 {
-			continue
-		}
-		absxi := math.Abs(val)
-		if math.IsNaN(absxi) {
-			return math.NaN()
-		}
-		if scale < absxi {
-			sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
-			scale = absxi
-		} else {
-			sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
-		}
-	}
-	if math.IsInf(scale, 1) {
-		return math.Inf(1)
-	}
-	return scale * math.Sqrt(sumSquares)
+	return f32.L2NormInc(x, uintptr(n), uintptr(incX))
 }

 // Sasum computes the sum of the absolute values of the elements of x.
--- a/blas/gonum/level1float64.go
+++ b/blas/gonum/level1float64.go
@@ -35,52 +35,10 @@ func (Implementation) Dnrm2(n int, x []float64, incX int) float64 {
 		}
 		panic(nLT0)
 	}
-	var (
-		scale      float64 = 0
-		sumSquares float64 = 1
-	)
 	if incX == 1 {
-		x = x[:n]
-		for _, v := range x {
-			if v == 0 {
-				continue
+		return f64.L2NormUnitary(x[:n])
 	}
-			absxi := math.Abs(v)
-			if math.IsNaN(absxi) {
-				return math.NaN()
-			}
-			if scale < absxi {
-				sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
-				scale = absxi
-			} else {
-				sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
-			}
-		}
-		if math.IsInf(scale, 1) {
-			return math.Inf(1)
-		}
-		return scale * math.Sqrt(sumSquares)
-	}
-	for ix := 0; ix < n*incX; ix += incX {
-		val := x[ix]
-		if val == 0 {
-			continue
-		}
-		absxi := math.Abs(val)
-		if math.IsNaN(absxi) {
-			return math.NaN()
-		}
-		if scale < absxi {
-			sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
-			scale = absxi
-		} else {
-			sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
-		}
-	}
-	if math.IsInf(scale, 1) {
-		return math.Inf(1)
-	}
-	return scale * math.Sqrt(sumSquares)
+	return f64.L2NormInc(x, uintptr(n), uintptr(incX))
 }

 // Dasum computes the sum of the absolute values of the elements of x.
--- a/blas/gonum/single_precision.bash
+++ b/blas/gonum/single_precision.bash
@@ -24,6 +24,8 @@ cat level1float64.go \
 | gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
 | gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
 | gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
+| gofmt -r 'f64.L2NormInc -> f32.L2NormInc' \
+| gofmt -r 'f64.L2NormUnitary -> f32.L2NormUnitary' \
 | gofmt -r 'f64.ScalInc -> f32.ScalInc' \
 | gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
 \
--- a/floats/floats.go
+++ b/floats/floats.go
@@ -648,11 +648,7 @@ func Norm(s []float64, L float64) float64 {
 		return 0
 	}
 	if L == 2 {
-		twoNorm := math.Abs(s[0])
-		for i := 1; i < len(s); i++ {
-			twoNorm = math.Hypot(twoNorm, s[i])
-		}
-		return twoNorm
+		return f64.L2NormUnitary(s)
 	}
 	var norm float64
 	if L == 1 {
--- a/floats/floats_test.go
+++ b/floats/floats_test.go
@@ -261,7 +261,7 @@ func TestDistance(t *testing.T) {
 			copy(tmp, test.s)
 			Sub(tmp, test.t)
 			norm := Norm(tmp, L)
-			if dist != norm { // Use equality because they should be identical
+			if !EqualWithinAbsOrRel(dist, norm, 1e-15, 1e-15) {
 				t.Errorf("Distance does not match norm for case %v, %v. Expected %v, Found %v.", i, j, norm, dist)
 			}
 		}
@@ -1753,3 +1753,15 @@ func BenchmarkScaleSmall(b *testing.B)  { benchmarkScale(b, Small) }
 func BenchmarkScaleMedium(b *testing.B) { benchmarkScale(b, Medium) }
 func BenchmarkScaleLarge(b *testing.B)  { benchmarkScale(b, Large) }
 func BenchmarkScaleHuge(b *testing.B)   { benchmarkScale(b, Huge) }
+
+func benchmarkNorm2(b *testing.B, size int) {
+	s := randomSlice(size)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Norm(s, 2)
+	}
+}
+func BenchmarkNorm2Small(b *testing.B)  { benchmarkNorm2(b, Small) }
+func BenchmarkNorm2Medium(b *testing.B) { benchmarkNorm2(b, Medium) }
+func BenchmarkNorm2Large(b *testing.B)  { benchmarkNorm2(b, Large) }
+func BenchmarkNorm2Huge(b *testing.B)   { benchmarkNorm2(b, Huge) }
--- a/internal/asm/f32/l2norm.go
+++ b/internal/asm/f32/l2norm.go
@@ -0,0 +1,62 @@
+// Copyright ©2019 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package f32
+
+import "gonum.org/v1/gonum/internal/math32"
+
+// L2NormUnitary is the level 2 norm of x.
+func L2NormUnitary(x []float32) (sum float32) {
+	var scale float32
+	var sumSquares float32 = 1
+	for _, v := range x {
+		if v == 0 {
+			continue
+		}
+		absxi := math32.Abs(v)
+		if math32.IsNaN(absxi) {
+			return math32.NaN()
+		}
+		if scale < absxi {
+			s := scale / absxi
+			sumSquares = 1 + sumSquares*s*s
+			scale = absxi
+		} else {
+			s := absxi / scale
+			sumSquares += s * s
+		}
+	}
+	if math32.IsInf(scale, 1) {
+		return math32.Inf(1)
+	}
+	return scale * math32.Sqrt(sumSquares)
+}
+
+// L2NormInc is the level 2 norm of x.
+func L2NormInc(x []float32, n, incX uintptr) (sum float32) {
+	var scale float32
+	var sumSquares float32 = 1
+	for ix := uintptr(0); ix < n*incX; ix += incX {
+		val := x[ix]
+		if val == 0 {
+			continue
+		}
+		absxi := math32.Abs(val)
+		if math32.IsNaN(absxi) {
+			return math32.NaN()
+		}
+		if scale < absxi {
+			s := scale / absxi
+			sumSquares = 1 + sumSquares*s*s
+			scale = absxi
+		} else {
+			s := absxi / scale
+			sumSquares += s * s
+		}
+	}
+	if math32.IsInf(scale, 1) {
+		return math32.Inf(1)
+	}
+	return scale * math32.Sqrt(sumSquares)
+}
--- a/internal/asm/f32/l2norm_test.go
+++ b/internal/asm/f32/l2norm_test.go
@@ -0,0 +1,60 @@
+// Copyright ©2019 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package f32
+
+import "testing"
+
+func TestL2NormUnitary(t *testing.T) {
+	var src_gd float32 = 1
+	for j, v := range []struct {
+		want float32
+		x    []float32
+	}{
+		{want: 0, x: []float32{}},
+		{want: 2, x: []float32{2}},
+		{want: 3.7416573867739413, x: []float32{1, 2, 3}},
+		{want: 3.7416573867739413, x: []float32{-1, -2, -3}},
+		{want: nan, x: []float32{nan}},
+		{want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
+		{want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
+	} {
+		g_ln := 4 + j%2
+		v.x = guardVector(v.x, src_gd, g_ln)
+		src := v.x[g_ln : len(v.x)-g_ln]
+		ret := L2NormUnitary(src)
+		if !within(ret, v.want) {
+			t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
+		}
+		if !isValidGuard(v.x, src_gd, g_ln) {
+			t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
+		}
+	}
+}
+
+func TestL2NormInc(t *testing.T) {
+	var src_gd float32 = 1
+	for j, v := range []struct {
+		inc  int
+		want float32
+		x    []float32
+	}{
+		{inc: 2, want: 0, x: []float32{}},
+		{inc: 3, want: 2, x: []float32{2}},
+		{inc: 10, want: 3.7416573867739413, x: []float32{1, 2, 3}},
+		{inc: 5, want: 3.7416573867739413, x: []float32{-1, -2, -3}},
+		{inc: 3, want: nan, x: []float32{nan}},
+		{inc: 15, want: 17.88854381999832, x: []float32{8, -8, 8, -8, 8}},
+		{inc: 1, want: 2.23606797749979, x: []float32{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
+	} {
+		g_ln, ln := 4+j%2, len(v.x)
+		v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
+		src := v.x[g_ln : len(v.x)-g_ln]
+		ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
+		if !within(ret, v.want) {
+			t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
+		}
+		checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
+	}
+}
--- a/internal/asm/f64/l2norm.go
+++ b/internal/asm/f64/l2norm.go
@@ -0,0 +1,62 @@
+// Copyright ©2019 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package f64
+
+import "math"
+
+// L2NormUnitary is the level 2 norm of x.
+func L2NormUnitary(x []float64) (sum float64) {
+	var scale float64
+	sumSquares := 1.0
+	for _, v := range x {
+		if v == 0 {
+			continue
+		}
+		absxi := math.Abs(v)
+		if math.IsNaN(absxi) {
+			return math.NaN()
+		}
+		if scale < absxi {
+			s := scale / absxi
+			sumSquares = 1 + sumSquares*s*s
+			scale = absxi
+		} else {
+			s := absxi / scale
+			sumSquares += s * s
+		}
+	}
+	if math.IsInf(scale, 1) {
+		return math.Inf(1)
+	}
+	return scale * math.Sqrt(sumSquares)
+}
+
+// L2NormInc is the level 2 norm of x.
+func L2NormInc(x []float64, n, incX uintptr) (sum float64) {
+	var scale float64
+	sumSquares := 1.0
+	for ix := uintptr(0); ix < n*incX; ix += incX {
+		val := x[ix]
+		if val == 0 {
+			continue
+		}
+		absxi := math.Abs(val)
+		if math.IsNaN(absxi) {
+			return math.NaN()
+		}
+		if scale < absxi {
+			s := scale / absxi
+			sumSquares = 1 + sumSquares*s*s
+			scale = absxi
+		} else {
+			s := absxi / scale
+			sumSquares += s * s
+		}
+	}
+	if math.IsInf(scale, 1) {
+		return math.Inf(1)
+	}
+	return scale * math.Sqrt(sumSquares)
+}
--- a/internal/asm/f64/l2norm_test.go
+++ b/internal/asm/f64/l2norm_test.go
@@ -0,0 +1,60 @@
+// Copyright ©2019 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package f64
+
+import "testing"
+
+func TestL2NormUnitary(t *testing.T) {
+	var src_gd float64 = 1
+	for j, v := range []struct {
+		want float64
+		x    []float64
+	}{
+		{want: 0, x: []float64{}},
+		{want: 2, x: []float64{2}},
+		{want: 3.7416573867739413, x: []float64{1, 2, 3}},
+		{want: 3.7416573867739413, x: []float64{-1, -2, -3}},
+		{want: nan, x: []float64{nan}},
+		{want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
+		{want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
+	} {
+		g_ln := 4 + j%2
+		v.x = guardVector(v.x, src_gd, g_ln)
+		src := v.x[g_ln : len(v.x)-g_ln]
+		ret := L2NormUnitary(src)
+		if !within(ret, v.want) {
+			t.Errorf("Test %d L2Norm error Got: %f Expected: %f", j, ret, v.want)
+		}
+		if !isValidGuard(v.x, src_gd, g_ln) {
+			t.Errorf("Test %d Guard violated in src vector %v %v", j, v.x[:g_ln], v.x[len(v.x)-g_ln:])
+		}
+	}
+}
+
+func TestL2NormInc(t *testing.T) {
+	var src_gd float64 = 1
+	for j, v := range []struct {
+		inc  int
+		want float64
+		x    []float64
+	}{
+		{inc: 2, want: 0, x: []float64{}},
+		{inc: 3, want: 2, x: []float64{2}},
+		{inc: 10, want: 3.7416573867739413, x: []float64{1, 2, 3}},
+		{inc: 5, want: 3.7416573867739413, x: []float64{-1, -2, -3}},
+		{inc: 3, want: nan, x: []float64{nan}},
+		{inc: 15, want: 17.88854381999832, x: []float64{8, -8, 8, -8, 8}},
+		{inc: 1, want: 2.23606797749979, x: []float64{0, 1, 0, -1, 0, 1, 0, -1, 0, 1}},
+	} {
+		g_ln, ln := 4+j%2, len(v.x)
+		v.x = guardIncVector(v.x, src_gd, v.inc, g_ln)
+		src := v.x[g_ln : len(v.x)-g_ln]
+		ret := L2NormInc(src, uintptr(ln), uintptr(v.inc))
+		if !within(ret, v.want) {
+			t.Errorf("Test %d L2NormInc error Got: %f Expected: %f", j, ret, v.want)
+		}
+		checkValidIncGuard(t, v.x, src_gd, v.inc, g_ln)
+	}
+}