mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-31 02:26:59 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			269 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			269 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright ©2017 The Gonum Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package f32_test
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"testing"
 | |
| 
 | |
| 	. "gonum.org/v1/gonum/internal/asm/f32"
 | |
| )
 | |
| 
 | |
| var gerTests = []struct {
 | |
| 	x, y, a []float32
 | |
| 	want    []float32
 | |
| }{ // m x n ( kernels executed )
 | |
| 	{ // 1 x 1 (1x1)
 | |
| 		x:    []float32{2},
 | |
| 		y:    []float32{4.4},
 | |
| 		a:    []float32{10},
 | |
| 		want: []float32{18.8},
 | |
| 	},
 | |
| 	{ // 3 x 2 ( 2x2, 1x2 )
 | |
| 		x: []float32{-2, -3, 0},
 | |
| 		y: []float32{-1.1, 5},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4,
 | |
| 			2.6, 2.8,
 | |
| 			-1.3, -4.3,
 | |
| 		},
 | |
| 		want: []float32{3.5, -7.6, 5.9, -12.2, -1.3, -4.3},
 | |
| 	},
 | |
| 	{ // 3 x 3 ( 2x2, 2x1, 1x2, 1x1 )
 | |
| 		x: []float32{-2, 7, 12},
 | |
| 		y: []float32{-1.1, 0, 6},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4, 3.5,
 | |
| 			2.6, 2.8, 3.3,
 | |
| 			-1.3, -4.3, -9.7,
 | |
| 		},
 | |
| 		want: []float32{3.5, 2.4, -8.5, -5.1, 2.8, 45.3, -14.5, -4.3, 62.3},
 | |
| 	},
 | |
| 	{ // 5 x 3 ( 4x2, 4x1, 1x2, 1x1 )
 | |
| 		x: []float32{-2, -3, 0, 1, 2},
 | |
| 		y: []float32{-1.1, 5, 0},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4, 3.5,
 | |
| 			2.6, 2.8, 3.3,
 | |
| 			-1.3, -4.3, -9.7,
 | |
| 			8, 9, -10,
 | |
| 			-12, -14, -6,
 | |
| 		},
 | |
| 		want: []float32{3.5, -7.6, 3.5, 5.9, -12.2, 3.3, -1.3, -4.3, -9.7, 6.9, 14, -10, -14.2, -4, -6},
 | |
| 	},
 | |
| 	{ // 3 x 6 ( 2x4, 2x2, 1x4, 1x2 )
 | |
| 		x: []float32{-2, -3, 0},
 | |
| 		y: []float32{-1.1, 5, 0, 9, 19, 22},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4, 3.5, 4.8, 1.11, -9,
 | |
| 			2.6, 2.8, 3.3, -3.4, 6.2, -8.7,
 | |
| 			-1.3, -4.3, -9.7, -3.1, 8.9, 8.9,
 | |
| 		},
 | |
| 		want: []float32{3.5, -7.6, 3.5, -13.2, -36.89, -53, 5.9, -12.2, 3.3, -30.4, -50.8, -74.7, -1.3, -4.3, -9.7, -3.1, 8.9, 8.9},
 | |
| 	},
 | |
| 	{ // 5 x 5 ( 4x4, 4x1, 1x4, 1x1)
 | |
| 		x: []float32{-2, 0, 2, 0, 7},
 | |
| 		y: []float32{-1.1, 8, 7, 3, 5},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4, 3.5, 2.2, 8.3,
 | |
| 			2.6, 2.8, 3.3, 4.4, -1.5,
 | |
| 			-1.3, -4.3, -9.7, -8.8, 6.2,
 | |
| 			8, 9, -10, -11, 12,
 | |
| 			-12, -14, -6, -2, 4,
 | |
| 		},
 | |
| 		want: []float32{
 | |
| 			3.5, -13.6, -10.5, -3.8, -1.7,
 | |
| 			2.6, 2.8, 3.3, 4.4, -1.5,
 | |
| 			-3.5, 11.7, 4.3, -2.8, 16.2,
 | |
| 			8, 9, -10, -11, 12,
 | |
| 			-19.700000000000003, 42, 43, 19, 39,
 | |
| 		},
 | |
| 	},
 | |
| 	{ // 7 x 7 ( 4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1 ) < nan test >
 | |
| 		x: []float32{-2, 8, 9, -3, -1.2, 5, 4.5},
 | |
| 		y: []float32{-1.1, nan, 19, 11, -9.22, 7, 3.3},
 | |
| 		a: []float32{
 | |
| 			1.3, 2.4, 3.5, 4.8, 1.11, -9, 2.2,
 | |
| 			2.6, 2.8, 3.3, -3.4, 6.2, -8.7, 5.1,
 | |
| 			-1.3, -4.3, -9.7, -3.1, 8.9, 8.9, 8,
 | |
| 			5, -2.5, 1.8, -3.6, 2.8, 4.9, 7,
 | |
| 			-1.3, -4.3, -9.7, -3.1, 8.9, 8.9, 8,
 | |
| 			2.6, 2.8, 3.3, -3.4, 6.2, -8.7, 5.1,
 | |
| 			1.3, 2.4, 3.5, 4.8, 1.11, -9, 2.2,
 | |
| 		},
 | |
| 		want: []float32{
 | |
| 			3.5, nan, -34.5, -17.2, 19.55, -23, -4.4,
 | |
| 			-6.2, nan, 155.3, 84.6, -67.56, 47.3, 31.5,
 | |
| 			-11.2, nan, 161.3, 95.9, -74.08, 71.9, 37.7,
 | |
| 			8.3, nan, -55.2, -36.6, 30.46, -16.1, -2.9,
 | |
| 			0.02, nan, -32.5, -16.3, 19.964, 0.5, 4.04,
 | |
| 			-2.9, nan, 98.3, 51.6, -39.9, 26.3, 21.6,
 | |
| 			-3.65, nan, 89, 54.3, -40.38, 22.5, 17.05,
 | |
| 		},
 | |
| 	},
 | |
| 	{ // 15 x 15 ( 4x8 4x4, 4x2, 4x1, 2x8, 2x4, 2x2, 2x1, 1x8, 1x4, 1x2, 1x1 ) < nan test >
 | |
| 		x: []float32{6.2, -5, 88.68, 43.4, -30.5, -40.2, 19.9, 3, 19.9, -40.2, -30.5, 43.4, 88.68, -5, 6.2},
 | |
| 		y: []float32{1.5, 21.7, -28.7, -11.9, 18.1, 3.1, 21, 8, 21, 3.1, 18.1, -11.9, -28.7, 21.7, 1.5},
 | |
| 		a: []float32{
 | |
| 			-20.5, 17.1, -8.4, -23.8, 3.9, 7.7, 6.25, 2.9, -0.29, 25.6, -9.4, 36.5, 9.7, 2.3, 4.1,
 | |
| 			-34.1, 10.3, 4.5, -42.05, 9.4, 4, 19.2, 9.8, -32.7, 4.1, 4.4, -22.5, -7.8, 3.6, -24.5,
 | |
| 			21.7, 8.6, -13.82, 3.05, -2.29, 39.4, -40, 7.9, -2.5, -7.7, 18.1, -25.5, -18.5, 43.2, 2.1,
 | |
| 			-20.5, 17.1, -8.4, -23.8, 3.9, 7.7, 6.25, 2.9, -0.29, 25.6, -9.4, 36.5, 9.7, 2.3, 4.1,
 | |
| 			-34.1, 10.3, 4.5, -42.05, 9.4, 4, 19.2, 9.8, -32.7, 4.1, 4.4, -22.5, -7.8, 3.6, -24.5,
 | |
| 			21.7, 8.6, -13.82, 3.05, -2.29, 39.4, -40, 7.9, -2.5, -7.7, 18.1, -25.5, -18.5, 43.2, 2.1,
 | |
| 			21.7, 8.6, -13.82, 3.05, -2.29, 39.4, -40, 7.9, -2.5, -7.7, 18.1, -25.5, -18.5, 43.2, 2.1,
 | |
| 			-34.1, 10.3, 4.5, -42.05, 9.4, 4, 19.2, 9.8, -32.7, 4.1, 4.4, -22.5, -7.8, 3.6, -24.5,
 | |
| 			-20.5, 17.1, -8.4, -23.8, 3.9, 7.7, 6.25, 2.9, -0.29, 25.6, -9.4, 36.5, 9.7, 2.3, 4.1,
 | |
| 			21.7, 8.6, -13.82, 3.05, -2.29, 39.4, -40, 7.9, -2.5, -7.7, 18.1, -25.5, -18.5, 43.2, 2.1,
 | |
| 			-34.1, 10.3, 4.5, -42.05, 9.4, 4, 19.2, 9.8, -32.7, 4.1, 4.4, -22.5, -7.8, 3.6, -24.5,
 | |
| 			-20.5, 17.1, -8.4, -23.8, 3.9, 7.7, 6.25, 2.9, -0.29, 25.6, -9.4, 36.5, 9.7, 2.3, 4.1,
 | |
| 			-20.5, 17.1, -8.4, -23.8, 3.9, 7.7, 6.25, 2.9, -0.29, 25.6, -9.4, 36.5, 9.7, 2.3, 4.1,
 | |
| 			21.7, 8.6, -13.82, 3.05, -2.29, 39.4, -40, 7.9, -2.5, -7.7, 18.1, -25.5, -18.5, 43.2, 2.1,
 | |
| 			-34.1, 10.3, 4.5, -42.05, 9.4, 4, 19.2, 9.8, -32.7, 4.1, 4.4, -22.5, -7.8, 3.6, -24.5,
 | |
| 		},
 | |
| 		want: []float32{
 | |
| 			-11.200001, 151.64, -186.34, -97.58, 116.12, 26.919998, 136.45, 52.5, 129.91, 44.82, 102.82, -37.28, -168.24, 136.84, 13.4,
 | |
| 			-41.6, -98.2, 148, 17.45, -81.1, -11.5, -85.8, -30.2, -137.7, -11.4, -86.1, 37, 135.7, -104.9, -32,
 | |
| 			154.72, 1932.956, -2558.936, -1052.242, 1602.818, 314.30798, 1822.28, 717.34, 1859.78, 267.20798, 1623.208, -1080.792, -2563.616, 1967.556, 135.12001,
 | |
| 			44.600006, 958.88007, -1253.9801, -540.26, 789.44006, 142.23999, 917.65, 350.1, 911.11005, 160.14, 776.14, -479.96002, -1235.8801, 944.0801, 69.200005,
 | |
| 			-79.85, -651.55005, 879.85004, 320.9, -542.64996, -90.549995, -621.3, -234.2, -673.2, -90.45, -547.64996, 340.44998, 867.55005, -658.25006, -70.25,
 | |
| 			-38.600002, -863.74005, 1139.9202, 481.43, -729.91003, -85.21999, -884.2, -313.7, -846.7, -132.31999, -709.5201, 452.88, 1135.2401, -829.14, -58.200005,
 | |
| 			51.55, 440.43002, -584.95, -233.75998, 357.9, 101.09, 377.9, 167.09999, 415.4, 53.989998, 378.29, -262.31, -589.63, 475.03003, 31.949999,
 | |
| 			-29.599998, 75.40001, -81.600006, -77.75, 63.700005, 13.299999, 82.2, 33.8, 30.3, 13.4, 58.700005, -58.199997, -93.90001, 68.700005, -20,
 | |
| 			9.349998, 448.93002, -579.53, -260.61, 364.09, 69.39, 424.15, 162.09999, 417.61, 87.29, 350.79, -200.30998, -561.43, 434.13, 33.949997,
 | |
| 			-38.600002, -863.74005, 1139.9202, 481.43, -729.91003, -85.21999, -884.2, -313.7, -846.7, -132.31999, -709.5201, 452.88, 1135.2401, -829.14, -58.200005,
 | |
| 			-79.85, -651.55005, 879.85004, 320.9, -542.64996, -90.549995, -621.3, -234.2, -673.2, -90.45, -547.64996, 340.44998, 867.55005, -658.25006, -70.25,
 | |
| 			44.600006, 958.88007, -1253.9801, -540.26, 789.44006, 142.23999, 917.65, 350.1, 911.11005, 160.14, 776.14, -479.96002, -1235.8801, 944.0801, 69.200005,
 | |
| 			112.520004, 1941.456, -2553.5159, -1079.092, 1609.008, 282.608, 1868.53, 712.34, 1861.99, 300.508, 1595.708, -1018.792, -2535.416, 1926.6561, 137.12001,
 | |
| 			14.200001, -99.9, 129.68, 62.55, -92.79, 23.900002, -145, -32.1, -107.5, -23.2, -72.4, 34, 125, -65.3, -5.4,
 | |
| 			-24.8, 144.84, -173.44, -115.83, 121.62, 23.22, 149.4, 59.399998, 97.5, 23.32, 116.62, -96.28, -185.74, 138.14, -15.200001,
 | |
| 		},
 | |
| 	},
 | |
| }
 | |
| 
 | |
| func TestGer(t *testing.T) {
 | |
| 	const (
 | |
| 		tol = 1e-6
 | |
| 
 | |
| 		xGdVal, yGdVal, aGdVal = -0.5, 1.5, 10
 | |
| 		gdLn                   = 4
 | |
| 	)
 | |
| 	for i, test := range gerTests {
 | |
| 		m, n := len(test.x), len(test.y)
 | |
| 		for _, align := range align2 {
 | |
| 			prefix := fmt.Sprintf("Test %v (%vx%v) align(x:%v,y:%v,a:%v)",
 | |
| 				i, m, n, align.x, align.y, align.x^align.y)
 | |
| 			xgLn, ygLn, agLn := gdLn+align.x, gdLn+align.y, gdLn+align.x^align.y
 | |
| 			xg, yg := guardVector(test.x, xGdVal, xgLn), guardVector(test.y, yGdVal, ygLn)
 | |
| 			x, y := xg[xgLn:len(xg)-xgLn], yg[ygLn:len(yg)-ygLn]
 | |
| 			ag := guardVector(test.a, aGdVal, agLn)
 | |
| 			a := ag[agLn : len(ag)-agLn]
 | |
| 
 | |
| 			var alpha float32 = 1.0
 | |
| 			Ger(uintptr(m), uintptr(n), alpha, x, 1, y, 1, a, uintptr(n))
 | |
| 			for i := range test.want {
 | |
| 				if !sameApprox(a[i], test.want[i], tol) {
 | |
| 					t.Errorf(msgVal, prefix, i, a[i], test.want[i])
 | |
| 					return
 | |
| 				}
 | |
| 			}
 | |
| 			if !isValidGuard(xg, xGdVal, xgLn) {
 | |
| 				t.Errorf(msgGuard, prefix, "x", xg[:xgLn], xg[len(xg)-xgLn:])
 | |
| 			}
 | |
| 			if !isValidGuard(yg, yGdVal, ygLn) {
 | |
| 				t.Errorf(msgGuard, prefix, "y", yg[:ygLn], yg[len(yg)-ygLn:])
 | |
| 			}
 | |
| 			if !isValidGuard(ag, aGdVal, agLn) {
 | |
| 				t.Errorf(msgGuard, prefix, "a", ag[:agLn], ag[len(ag)-agLn:])
 | |
| 				t.Errorf(msgReadOnly, prefix, "x")
 | |
| 			}
 | |
| 			if !sameStrided(test.y, y, 1) {
 | |
| 				t.Errorf(msgReadOnly, prefix, "y")
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for _, inc := range newIncSet(1, 2) {
 | |
| 			prefix := fmt.Sprintf("Test %v (%vx%v) inc(x:%v,y:%v)", i, m, n, inc.x, inc.y)
 | |
| 			xg := guardIncVector(test.x, xGdVal, inc.x, gdLn)
 | |
| 			yg := guardIncVector(test.y, yGdVal, inc.y, gdLn)
 | |
| 			x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn]
 | |
| 			ag := guardVector(test.a, aGdVal, gdLn)
 | |
| 			a := ag[gdLn : len(ag)-gdLn]
 | |
| 
 | |
| 			var alpha float32 = 3.5
 | |
| 			Ger(uintptr(m), uintptr(n), alpha,
 | |
| 				x, uintptr(inc.x),
 | |
| 				y, uintptr(inc.y),
 | |
| 				a, uintptr(n))
 | |
| 			for i := range test.want {
 | |
| 				want := alpha*test.x[i/n]*test.y[i%n] + test.a[i]
 | |
| 				if !sameApprox(a[i], want, tol) {
 | |
| 					t.Errorf(msgVal, prefix, i, a[i], want)
 | |
| 				}
 | |
| 			}
 | |
| 			checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn)
 | |
| 			checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn)
 | |
| 			if !isValidGuard(ag, aGdVal, gdLn) {
 | |
| 				t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:])
 | |
| 			}
 | |
| 			if !sameStrided(test.x, x, inc.x) {
 | |
| 				t.Errorf(msgReadOnly, prefix, "x")
 | |
| 			}
 | |
| 			if !sameStrided(test.y, y, inc.y) {
 | |
| 				t.Errorf(msgReadOnly, prefix, "y")
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func BenchmarkGer(t *testing.B) {
 | |
| 	const alpha = 3
 | |
| 	for _, dims := range newIncSet(3, 10, 30, 100, 300, 1e3, 3e3, 1e4) {
 | |
| 		m, n := dims.x, dims.y
 | |
| 		if m/n >= 100 || n/m >= 100 {
 | |
| 			continue
 | |
| 		}
 | |
| 		for _, inc := range newIncSet(1, 3, 4, 10) {
 | |
| 			t.Run(fmt.Sprintf("Dger %dx%d (%d %d)", m, n, inc.x, inc.y), func(b *testing.B) {
 | |
| 				x, y, a := gerData(m, n, inc.x, inc.y)
 | |
| 				b.ResetTimer()
 | |
| 				for i := 0; i < b.N; i++ {
 | |
| 					Ger(uintptr(m), uintptr(n), alpha,
 | |
| 						x, uintptr(inc.x),
 | |
| 						y, uintptr(inc.y),
 | |
| 						a, uintptr(n))
 | |
| 				}
 | |
| 			})
 | |
| 
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func gerData(m, n, incX, incY int) (x, y, a []float32) {
 | |
| 	x = make([]float32, m*incX)
 | |
| 	y = make([]float32, n*incY)
 | |
| 	a = make([]float32, m*n)
 | |
| 	ln := len(x)
 | |
| 	if len(y) > ln {
 | |
| 		ln = len(y)
 | |
| 	}
 | |
| 	if len(a) > ln {
 | |
| 		ln = len(a)
 | |
| 	}
 | |
| 	for i := 0; i < ln; i++ {
 | |
| 		v := float32(i)
 | |
| 		if i < len(a) {
 | |
| 			a[i] = v
 | |
| 		}
 | |
| 		if i < len(x) {
 | |
| 			x[i] = v
 | |
| 		}
 | |
| 		if i < len(y) {
 | |
| 			y[i] = v
 | |
| 		}
 | |
| 	}
 | |
| 	return x, y, a
 | |
| }
 | 
