lapack/gonum: rewrite Dlartg to use safe scaling from https://doi.org/10.1145/3061665

2025-10-20 13:55:20 +08:00 · 2021-03-16 21:54:37 +01:00
parent 13102a112c
commit 7a88995ca8
7 changed files with 102 additions and 118 deletions
--- a/lapack/gonum/dlartg.go
+++ b/lapack/gonum/dlartg.go
@@ -9,72 +9,50 @@ import "math"
 // Dlartg generates a plane rotation so that
 //  [ cs sn] * [f] = [r]
 //  [-sn cs]   [g] = [0]
-// This is a more accurate version of BLAS drotg, with the other differences that
-// if g = 0, then cs = 1 and sn = 0, and if f = 0 and g != 0, then cs = 0 and sn = 1.
-// If abs(f) > abs(g), cs will be positive.
+// where cs*cs + sn*sn = 1.
+//
+// This is a more accurate version of BLAS Drotg, with the other differences
+// that
+//  - if g = 0, then cs = 1 and sn = 0
+//  - if f = 0 and g != 0, then cs = 0 and sn = 1
+//  - r takes the sign of f and so cs is always non-negative
 //
 // Dlartg is an internal routine. It is exported for testing purposes.
 func (impl Implementation) Dlartg(f, g float64) (cs, sn, r float64) {
-	safmn2 := math.Pow(dlamchB, math.Trunc(math.Log(dlamchS/dlamchE)/math.Log(dlamchB)/2))
-	safmx2 := 1 / safmn2
-	if g == 0 {
+	// Implementation based on Supplemental Material to:
+	// Edward Anderson. 2017. Algorithm 978: Safe Scaling in the Level 1 BLAS.
+	// ACM Trans. Math. Softw. 44, 1, Article 12 (July 2017), 28 pages.
+	// DOI: https://doi.org/10.1145/3061665
+	const safmin = dlamchS
+	const safmax = 1 / safmin
+	f1 := math.Abs(f)
+	g1 := math.Abs(g)
+	switch {
+	case g == 0:
 		cs = 1
 		sn = 0
 		r = f
-		return cs, sn, r
-	}
-	if f == 0 {
+	case f == 0:
 		cs = 0
-		sn = 1
-		r = g
-		return cs, sn, r
-	}
-	f1 := f
-	g1 := g
-	scale := math.Max(math.Abs(f1), math.Abs(g1))
-	if scale >= safmx2 {
-		var count int
-		for {
-			count++
-			f1 *= safmn2
-			g1 *= safmn2
-			scale = math.Max(math.Abs(f1), math.Abs(g1))
-			if scale < safmx2 || count == 20 {
-				break
-			}
-		}
-		r = math.Sqrt(f1*f1 + g1*g1)
-		cs = f1 / r
-		sn = g1 / r
-		for i := 0; i < count; i++ {
-			r *= safmx2
-		}
-	} else if scale <= safmn2 {
-		var count int
-		for {
-			count++
-			f1 *= safmx2
-			g1 *= safmx2
-			scale = math.Max(math.Abs(f1), math.Abs(g1))
-			if scale >= safmn2 || count == 20 {
-				break
-			}
-		}
-		r = math.Sqrt(f1*f1 + g1*g1)
-		cs = f1 / r
-		sn = g1 / r
-		for i := 0; i < count; i++ {
-			r *= safmn2
-		}
-	} else {
-		r = math.Sqrt(f1*f1 + g1*g1)
-		cs = f1 / r
-		sn = g1 / r
-	}
-	if math.Abs(f) > math.Abs(g) && cs < 0 {
-		cs *= -1
-		sn *= -1
-		r *= -1
+		sn = math.Copysign(1, g)
+		r = g1
+	case drtmin < f1 && f1 < drtmax && drtmin < g1 && g1 < drtmax:
+		d := math.Sqrt(f*f + g*g)
+		p := 1 / d
+		cs = f1 * p
+		sn = g * math.Copysign(p, f)
+		r = math.Copysign(d, f)
+	default:
+		maxfg := math.Max(f1, g1)
+		u := math.Min(math.Max(safmin, maxfg), safmax)
+		uu := 1 / u
+		fs := f * uu
+		gs := g * uu
+		d := math.Sqrt(fs*fs + gs*gs)
+		p := 1 / d
+		cs = math.Abs(fs) * p
+		sn = gs * math.Copysign(p, f)
+		r = math.Copysign(d, f) * u
 	}
 	return cs, sn, r
 }
--- a/lapack/gonum/lapack.go
+++ b/lapack/gonum/lapack.go
@@ -48,4 +48,10 @@ const (
 	// 1/dlamchS does not overflow, or also the smallest normal number.
 	// For IEEE this is 2^{-1022}.
 	dlamchS = 0x1p-1022
+
+	// (rtmin,rtmax) is a range of well-scaled numbers whose square
+	// or sum of squares is also safe.
+	// drtmin is sqrt(dlamchS/dlamchP)
+	drtmin = 0x1p-485
+	drtmax = 1 / drtmin
 )
--- a/mat/eigen_test.go
+++ b/mat/eigen_test.go
@@ -50,10 +50,10 @@ func TestEigen(t *testing.T) {
 			}),
 			values: []complex128{1, 0.7300317046114154, -0.1400158523057075 + 0.452854925738716i, -0.1400158523057075 - 0.452854925738716i},
 			left: NewCDense(4, 4, []complex128{
-				0.5, -0.3135167160788314, 0.0205812178013689 - 0.0045809393001271i, 0.0205812178013689 + 0.0045809393001271i,
-				0.5, 0.7842199280224774, -0.3755102695419336 + 0.2924634904103882i, -0.3755102695419336 - 0.2924634904103882i,
-				0.5, 0.3320220078078358, -0.1605261632278496 - 0.3881393645202528i, -0.1605261632278496 + 0.3881393645202528i,
-				0.5, 0.4200806584012395, 0.7723935249234153, 0.7723935249234153,
+				0.5, -0.3135167160788313, -0.02058121780136903 + 0.004580939300127051i, -0.02058121780136903 - 0.004580939300127051i,
+				0.5, 0.7842199280224781, 0.37551026954193356 - 0.2924634904103879i, 0.37551026954193356 + 0.2924634904103879i,
+				0.5, 0.33202200780783525, 0.16052616322784943 + 0.3881393645202527i, 0.16052616322784943 - 0.3881393645202527i,
+				0.5, 0.42008065840123954, -0.7723935249234155, -0.7723935249234155,
 			}),
 			right: NewCDense(4, 4, []complex128{
 				0.9476399565969628, -0.8637347682162745, -0.2688989440320280 - 0.1282234938321029i, -0.2688989440320280 + 0.1282234938321029i,
--- a/mat/gsvd_example_test.go
+++ b/mat/gsvd_example_test.go
@@ -76,9 +76,9 @@ func ExampleGSVD() {
 	//
 	// Common basis vectors
 	//
-	// 	Qᵀ = ⎡ -8172.4084   -4524.2933    4813.9616⎤
-	// 	     ⎢ 22581.8020   12397.1070  -16364.8933⎥
-	// 	     ⎣ -8910.8462  -10902.1488   15762.8719⎦
+	// 	Qᵀ = ⎡ 14508.5881    4524.2933   -4813.9616⎤
+	// 	     ⎢ 15562.9323   12397.1070  -16364.8933⎥
+	// 	     ⎣-14262.7217  -10902.1488   15762.8719⎦
 	//
 	// Significance:
 	// 	eigenvar_0: +0.7807
--- a/stat/cca_example_test.go
+++ b/stat/cca_example_test.go
@@ -138,29 +138,29 @@ func ExampleCC() {
 	//
 	// ccors = [0.9451 0.6787 0.5714 0.2010]
 	//
-	// pVecs = ⎡-0.2574   0.0158   0.2122  -0.0946⎤
-	//         ⎢-0.4837   0.3837   0.1474   0.6597⎥
-	//         ⎢-0.0801   0.3494   0.3287  -0.2862⎥
-	//         ⎢ 0.1278  -0.7337   0.4851   0.2248⎥
-	//         ⎢-0.6969  -0.4342  -0.3603   0.0291⎥
-	//         ⎢-0.0991   0.0503   0.6384   0.1022⎥
-	//         ⎣ 0.4260   0.0323  -0.2290   0.6419⎦
+	// pVecs = ⎡-0.2574  -0.0158  -0.2122  -0.0946⎤
+	//         ⎢-0.4837  -0.3837  -0.1474   0.6597⎥
+	//         ⎢-0.0801  -0.3494  -0.3287  -0.2862⎥
+	//         ⎢ 0.1278   0.7337  -0.4851   0.2248⎥
+	//         ⎢-0.6969   0.4342   0.3603   0.0291⎥
+	//         ⎢-0.0991  -0.0503  -0.6384   0.1022⎥
+	//         ⎣ 0.4260  -0.0323   0.2290   0.6419⎦
 	//
-	// qVecs = ⎡ 0.0182  -0.1583  -0.0067  -0.9872⎤
-	//         ⎢-0.2348   0.9483  -0.1462  -0.1554⎥
-	//         ⎢-0.9701  -0.2406  -0.0252   0.0209⎥
-	//         ⎣ 0.0593  -0.1330  -0.9889   0.0291⎦
+	// qVecs = ⎡ 0.0182   0.1583   0.0067  -0.9872⎤
+	//         ⎢-0.2348  -0.9483   0.1462  -0.1554⎥
+	//         ⎢-0.9701   0.2406   0.0252   0.0209⎥
+	//         ⎣ 0.0593   0.1330   0.9889   0.0291⎦
 	//
-	// phiVs = ⎡-0.0027   0.0093   0.0490  -0.0155⎤
-	//         ⎢-0.0429  -0.0242   0.0361   0.1839⎥
-	//         ⎢-1.2248   5.6031   5.8094  -4.7927⎥
-	//         ⎢-0.0044  -0.3424   0.4470   0.1150⎥
-	//         ⎢-0.0742  -0.1193  -0.1116   0.0022⎥
-	//         ⎢-0.0233   0.1046   0.3853  -0.0161⎥
-	//         ⎣ 0.0001   0.0005  -0.0030   0.0082⎦
+	// phiVs = ⎡-0.0027  -0.0093  -0.0490  -0.0155⎤
+	//         ⎢-0.0429   0.0242  -0.0361   0.1839⎥
+	//         ⎢-1.2248  -5.6031  -5.8094  -4.7927⎥
+	//         ⎢-0.0044   0.3424  -0.4470   0.1150⎥
+	//         ⎢-0.0742   0.1193   0.1116   0.0022⎥
+	//         ⎢-0.0233  -0.1046  -0.3853  -0.0161⎥
+	//         ⎣ 0.0001  -0.0005   0.0030   0.0082⎦
 	//
-	// psiVs = ⎡ 0.0302  -0.3002   0.0878  -1.9583⎤
-	//         ⎢-0.0065   0.0392  -0.0118  -0.0061⎥
-	//         ⎢-0.0052  -0.0046  -0.0023   0.0008⎥
-	//         ⎣ 0.0020   0.0037  -0.1293   0.1038⎦
+	// psiVs = ⎡ 0.0302   0.3002  -0.0878  -1.9583⎤
+	//         ⎢-0.0065  -0.0392   0.0118  -0.0061⎥
+	//         ⎢-0.0052   0.0046   0.0023   0.0008⎥
+	//         ⎣ 0.0020  -0.0037   0.1293   0.1038⎦
 }
--- a/stat/cca_test.go
+++ b/stat/cca_test.go
@@ -117,34 +117,34 @@ tests:
 			ydata:     bostonData.Slice(0, 506, 7, 11),
 			wantCorrs: []float64{0.9451239443886021, 0.6786622733370654, 0.5714338361583764, 0.2009739704710440},
 			wantpVecs: mat.NewDense(7, 4, []float64{
-				-0.2574391924541903, 0.0158477516621194, 0.2122169934631024, -0.0945733803894706,
-				-0.4836594430018478, 0.3837101908138468, 0.1474448317415911, 0.6597324886718275,
-				-0.0800776365873296, 0.3493556742809252, 0.3287336458109373, -0.2862040444334655,
-				0.1277586360386374, -0.7337427663667596, 0.4851134819037011, 0.2247964865970192,
-				-0.6969432006136684, -0.4341748776002893, -0.3602872887636357, 0.0290661608626292,
-				-0.0990903250057199, 0.0503411215453873, 0.6384330631742202, 0.1022367136218303,
-				0.4260459963765036, 0.0323334351308141, -0.2289527516030810, 0.6419232947608805,
+				-0.2574391924541896, -0.015847751662118038, -0.21221699346310258, -0.09457338038947205,
+				-0.48365944300184865, -0.3837101908138455, -0.14744483174159395, 0.6597324886718278,
+				-0.08007763658732961, -0.34935567428092285, -0.3287336458109394, -0.2862040444334662,
+				0.127758636038638, 0.7337427663667616, -0.4851134819036985, 0.22479648659701942,
+				-0.6969432006136685, 0.43417487760028844, 0.360287288763638, 0.029066160862628414,
+				-0.0990903250057202, -0.05034112154538474, -0.6384330631742202, 0.10223671362182897,
+				0.42604599637650303, -0.032333435130815824, 0.22895275160308087, 0.6419232947608798,
 			}),
 			wantqVecs: mat.NewDense(4, 4, []float64{
-				0.0181660502363264, -0.1583489460479038, -0.0066723577642883, -0.9871935400650649,
-				-0.2347699045986119, 0.9483314614936594, -0.1462420505631345, -0.1554470767919033,
-				-0.9700704038477141, -0.2406071741000039, -0.0251838984227037, 0.0209134074358349,
-				0.0593000682318482, -0.1330460003097728, -0.9889057151969489, 0.0291161494720761,
+				0.018166050236326788, 0.1583489460479047, 0.006672357764289544, -0.9871935400650647,
+				-0.23476990459861324, -0.9483314614936598, 0.14624205056313114, -0.1554470767919039,
+				-0.9700704038477144, 0.24060717410000537, 0.025183898422704167, 0.020913407435834964,
+				0.05930006823184807, 0.13304600030976868, 0.9889057151969495, 0.029116149472076858,
 			}),
 			wantphiVs: mat.NewDense(7, 4, []float64{
-				-0.0027462234108197, 0.0093444513500898, 0.0489643932714296, -0.0154967189805819,
-				-0.0428564455279537, -0.0241708702119420, 0.0360723472093996, 0.1838983230588095,
-				-1.2248435648802380, 5.6030921364723980, 5.8094144583797025, -4.7926812190419676,
-				-0.0043684825094649, -0.3424101164977618, 0.4469961215717917, 0.1150161814353696,
-				-0.0741534069521954, -0.1193135794923700, -0.1115518305471460, 0.0021638758323088,
-				-0.0233270323101624, 0.1046330818178399, 0.3853045975077387, -0.0160927870102877,
-				0.0001293051387859, 0.0004540746921446, -0.0030296315865440, 0.0081895477974654,
+				-0.002746223410819314, -0.009344451350088911, -0.04896439327142919, -0.015496718980582016,
+				-0.042856445527953785, 0.024170870211944927, -0.036072347209397136, 0.18389832305881182,
+				-1.2248435648802678, -5.603092136472504, -5.809414458379886, -4.792681219042103,
+				-0.00436848250946508, 0.34241011649776265, -0.4469961215717922, 0.11501618143536857,
+				-0.07415340695219563, 0.11931357949236807, 0.1115518305471455, 0.002163875832307984,
+				-0.023327032310162924, -0.1046330818178401, -0.38530459750774165, -0.016092787010290065,
+				0.00012930513878583387, -0.0004540746921447011, 0.0030296315865439264, 0.008189547797465318,
 			}),
 			wantpsiVs: mat.NewDense(4, 4, []float64{
-				0.0301593362017375, -0.3002219289647127, 0.0878217377593682, -1.9583226531517062,
-				-0.0065483104073892, 0.0392212086716247, -0.0117570776209991, -0.0061113064481860,
-				-0.0052075523350125, -0.0045770200452960, -0.0022762313289592, 0.0008441873006821,
-				0.0020111735096327, 0.0037352799829930, -0.1292578071621794, 0.1037709056329765,
+				0.030159336201738367, 0.3002219289647159, -0.08782173775936601, -1.9583226531517122,
+				-0.00654831040738931, -0.03922120867162458, 0.011757077620998818, -0.006111306448187141,
+				-0.0052075523350125505, 0.004577020045295936, 0.0022762313289591976, 0.0008441873006823151,
+				0.0020111735096325924, -0.0037352799829939247, 0.12925780716217938, 0.10377090563297825,
 			}),
 			epsilon: 1e-12,
 		},
--- a/stat/pca_test.go
+++ b/stat/pca_test.go
@@ -52,10 +52,10 @@ tests:
 				4.9, 3.1, 1.5, 0.1,
 			}),
 			wantVecs: mat.NewDense(4, 4, []float64{
-				-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
-				-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
-				-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
-				-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
+				-0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122,
+				-0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923,
+				-0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735,
+				-0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032,
 			}),
 			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
 			epsilon:  1e-12,
@@ -118,10 +118,10 @@ tests:
 			}),
 			weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
 			wantVecs: mat.NewDense(4, 4, []float64{
-				-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
-				-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
-				-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
-				-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
+				-0.6681110197952723, 0.706476485753953, 0.14026590216895127, 0.18666578956412122,
+				-0.7166344774801547, -0.6427036135482663, 0.13565028590525394, -0.23444848208629923,
+				-0.16441127516630702, 0.11898477441068217, -0.9136367900709544, -0.35224901970831735,
+				-0.11415613655453066, -0.27141419208874273, -0.3566402843922649, 0.8866286823515032,
 			}),
 			wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
 			epsilon:  1e-12,