lapack/gonum: clean up Dlatdf (#1739)

2025-10-05 23:26:52 +08:00 · 2021-11-17 07:59:35 +01:00
parent 030e69b6a7
commit 897ca24e8b
3 changed files with 115 additions and 109 deletions
--- a/lapack/gonum/dlatdf.go
+++ b/lapack/gonum/dlatdf.go
@@ -11,64 +11,52 @@ import (
 	"gonum.org/v1/gonum/lapack"
 )

-// Dlatdf uses the LU factorization of the n×n matrix Z computed by Dgetc2 and
-// computes a contribution to the reciprocal Dif-estimate by solving
-//  Z * x = b for x
-// and choosing the rhs b such that the norm of x is as large as possible.
-// On entry rhs = b holds the contribution from earlier solved sub-systems, and on return rhs = x.
+// Dlatdf computes a contribution to the reciprocal Dif-estimate by solving
+//  Z * x = h - f
+// and choosing the vector h such that the norm of x is as large as possible.
 //
-// The factorization of Z returned by Dgetc2 has the form
-//  Z = P*L*U*Q,
-// where P and Q are permutation matrices. L is lower triangular with
-// unit diagonal elements and U is upper triangular.
+// The n×n matrix Z is represented by its LU factorization as computed by Dgetc2
+// and has the form
+//  Z = P * L * U * Q
+// where P and Q are permutation matrices, L is lower triangular with unit
+// diagonal elements and U is upper triangular.
 //
-// It must hold that n <= len(ipiv), n <= len(jpiv). On exit the pivot indices where row i has
-// been interchanged with ipiv[i] and column j interchanged with column jpiv[j].
+// job specifies the heuristic method for computing the contribution.
 //
-// rhs must have n accesible elements.
+// If job is lapack.LocalLookAhead, all entries of h are chosen as either +1 or
+// -1.
 //
-// if ijob==2
-//  First compute an approximative null-vector e of Z using DGECON,
-//  e is normalized and solve for Zx = +-e - f with the sign giving the greater value
-//  of 2-norm(x). About 5 times as expensive as Default.
-// if ijob!=2
-//  Local look ahead strategy where all entries of the rhs.
-//  b is chosen as either +1 or -1 (Default).
+// If job is lapack.NormalizedNullVector, an approximate null-vector e of Z is
+// computed using Dgecon and normalized. h is chosen as ±e with the sign giving
+// the greater value of 2-norm(x). This strategy is about 5 times as expensive
+// as LocalLookAhead.
 //
-// rdsum is the sum of squares of computed contributions to the Dif-estimate
-// under computation by Dtgsyl, where the scaling factor rdscal (see below)
-// has been factored out.
+// On entry, rhs holds the contribution f from earlier solved sub-systems. On
+// return, rhs holds the solution x.
 //
-// rdscal is the scaling factor used to prevent overflow in rdsum.
+// ipiv and jpiv contain the pivot indices as returned by Dgetc2: row i of the
+// matrix has been interchanged with row ipiv[i] and column j of the matrix has
+// been interchanged with column jpiv[j].
 //
-// NOTE: rdscal and rdsum only makes sense when Dtgsy2 is called by Dtgsyl.
+// n must be at most 8, ipiv and jpiv must have length n, and rhs must have
+// length at least n, otherwise Dlatdf will panic.
 //
-// scal is rdscal updated w.r.t. the current contributions in rdsum.
-// if trans = 'T'
-//  scal == rdscal
-//
-// sum is the sum of squares updated with the contributions from the current sub-system.
-// If trans = 'T'
-//  sum = rdsum
-//
-// Dlatdf implicitly expects z matrix to be at most an 8×8 matrix. This is fulfilled by it's only caller, Dtgsy2.
+// rdsum and rdscal represent the sum of squares of computed contributions to
+// the Dif-estimate from earlier solved sub-systems. rdscal is the scaling
+// factor used to prevent overflow in rdsum. Dlatdf returns this sum of squares
+// updated with the contributions from the current sub-system.
 //
 // Dlatdf is an internal routine. It is exported for testing purposes.
-func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (sum, scale float64) {
+func (impl Implementation) Dlatdf(job lapack.MaximizeNormXJob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (scale, sum float64) {
 	switch {
+	case job != lapack.LocalLookAhead && job != lapack.NormalizedNullVector:
+		panic(badMaximizeNormXJob)
 	case n < 0:
 		panic(nLT0)
+	case n > 8:
+		panic("lapack: n > 8")
 	case ldz < max(1, n):
 		panic(badLdZ)
-	case len(rhs) < n:
-		panic(shortRHS)
-	case len(ipiv) < n:
-		panic(badLenIpiv)
-	case len(jpiv) < n:
-		panic(badLenJpiv)
-	case n > 8:
-		// Dlatdf expects z to be less than 8 in dimension.
-		panic("lapack: Dlatdf expects z to be at most 8×8")
 	}

 	// Quick return if possible.
@@ -76,40 +64,51 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float
 		return
 	}

-	if len(z) < (n-1)*ldz+n {
+	switch {
+	case len(z) < (n-1)*ldz+n:
 		panic(shortZ)
+	case len(rhs) < n:
+		panic(shortRHS)
+	case len(ipiv) != n:
+		panic(badLenIpiv)
+	case len(jpiv) != n:
+		panic(badLenJpiv)
 	}

+	const maxdim = 8
+	var (
+		xps   [maxdim]float64
+		xms   [maxdim]float64
+		work  [4 * maxdim]float64
+		iwork [maxdim]int
+	)
 	bi := blas64.Implementation()
-	var temp float64
-	xp := make([]float64, n)
-	if ijob == 2 {
-		// Compute approximate nullvector xm of z when ijob == 2.
-		xm := make([]float64, n)
-		work := make([]float64, 4*n)
-		impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work, make([]int, n)) // iwork is unused.
+	xp := xps[:n]
+	xm := xms[:n]
+	if job == lapack.NormalizedNullVector {
+		// Compute approximate nullvector xm of Z.
+		_ = impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work[:], iwork[:])
+		// This relies on undocumented content in work[n:2*n] stored by Dgecon.
 		bi.Dcopy(n, work[n:], 1, xm, 1)

 		// Compute rhs.
 		impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1)
-		temp = 1 / math.Sqrt(bi.Ddot(n, xm, 1, xm, 1))
-		bi.Dscal(n, temp, xm, 1)
+		tmp := 1 / bi.Dnrm2(n, xm, 1)
+		bi.Dscal(n, tmp, xm, 1)
 		bi.Dcopy(n, xm, 1, xp, 1)
-		bi.Daxpy(n, 1.0, rhs, 1, xp, 1)
+		bi.Daxpy(n, 1, rhs, 1, xp, 1)
 		bi.Daxpy(n, -1.0, xm, 1, rhs, 1)
-		impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv)
-		impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv)
+		_ = impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv)
+		_ = impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv)
 		if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) {
 			bi.Dcopy(n, xp, 1, rhs, 1)
 		}

-		// Compute sum of squares.
-		rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum)
-		return rdsum, rdscal
+		// Compute and return the updated sum of squares.
+		return impl.Dlassq(n, rhs, 1, rdscal, rdsum)
 	}

 	// Apply permutations ipiv to rhs
-	// If ijob != 2 uses look-ahead strategy.
 	impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1)

 	// Solve for L-part choosing rhs either to +1 or -1.
@@ -118,8 +117,8 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float
 		bp := rhs[j] + 1
 		bm := rhs[j] - 1

-		// Look-ahead for L-part rhs[0:n-2] = + or -1, splus and
-		// smin computed more efficiently than in Bsolve [1].
+		// Look-ahead for L-part rhs[0:n-2] = +1 or -1, splus and sminu computed
+		// more efficiently than in https://doi.org/10.1109/9.29404.
 		splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz)
 		sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1)
 		splus *= rhs[j]
@@ -129,36 +128,33 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float
 		case sminu > splus:
 			rhs[j] = bm
 		default:
-			// In this case the updating sums are equal and we can
-			// choose rsh[j] +1 or -1. The first time this happens
-			// we choose -1, thereafter +1. This is a simple way to
-			// get good estimates of matrices like Byers well-known
-			// example (see [1]). (Not done in Bsolve.)
+			// In this case the updating sums are equal and we can choose rsh[j]
+			// +1 or -1. The first time this happens we choose -1, thereafter
+			// +1. This is a simple way to get good estimates of matrices like
+			// Byers well-known example (see https://doi.org/10.1109/9.29404).
 			rhs[j] += pmone
-			pmone = 1.0
+			pmone = 1
 		}

 		// Compute remaining rhs.
-		temp = -rhs[j]
-		bi.Daxpy(n-j-1, temp, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1)
+		bi.Daxpy(n-j-1, -rhs[j], z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1)
 	}

-	// Solve for U-part, look-ahead for rhs[n-1] = +-1. This is not done
-	// in Bsolve and will hopefully give us a better estimate because
-	// any ill-conditioning of the original matrix is transferred to U
-	// and not to L. U[n-1,n-1] is an approximation to sigma_min(LU).
+	// Solve for U-part, look-ahead for rhs[n-1] = ±1. This is not done in
+	// Bsolve and will hopefully give us a better estimate because any
+	// ill-conditioning of the original matrix is transferred to U and not to L.
+	// U[n-1,n-1] is an approximation to sigma_min(LU).
 	bi.Dcopy(n-1, rhs, 1, xp, 1)
 	xp[n-1] = rhs[n-1] + 1
 	rhs[n-1] -= 1
-	splus := 0.0
-	sminu := 0.0
+	var splus, sminu float64
 	for i := n - 1; i >= 0; i-- {
-		temp = 1 / z[i*ldz+i]
-		xp[i] *= temp
-		rhs[i] *= temp
+		tmp := 1 / z[i*ldz+i]
+		xp[i] *= tmp
+		rhs[i] *= tmp
 		for k := i + 1; k < n; k++ {
-			xp[i] -= float64(xp[k] * (z[i*ldz+k] * temp))
-			rhs[i] -= float64(rhs[k] * (z[i*ldz+k] * temp))
+			xp[i] -= xp[k] * (z[i*ldz+k] * tmp)
+			rhs[i] -= rhs[k] * (z[i*ldz+k] * tmp)
 		}
 		splus += math.Abs(xp[i])
 		sminu += math.Abs(rhs[i])
@@ -169,7 +165,7 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float

 	// Apply the permutations jpiv to the computed solution (rhs).
 	impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1)
-	// Compute the sum of squares.
-	rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum)
-	return rdsum, rdscal
+
+	// Compute and return the updated sum of squares.
+	return impl.Dlassq(n, rhs, 1, rdscal, rdsum)
 }
--- a/lapack/gonum/errors.go
+++ b/lapack/gonum/errors.go
@@ -19,6 +19,7 @@ const (
 	badGenOrtho         = "lapack: bad GenOrtho"
 	badLeftEVJob        = "lapack: bad LeftEVJob"
 	badMatrixType       = "lapack: bad MatrixType"
+	badMaximizeNormXJob = "lapack: bad MaximizeNormXJob"
 	badNorm             = "lapack: bad Norm"
 	badPivot            = "lapack: bad Pivot"
 	badRightEVJob       = "lapack: bad RightEVJob"
--- a/lapack/lapack.go
+++ b/lapack/lapack.go
@@ -215,3 +215,12 @@ const (
 	EVAllMulQ  EVHowMany = 'B' // Compute all right and/or left eigenvectors multiplied by an input matrix.
 	EVSelected EVHowMany = 'S' // Compute selected right and/or left eigenvectors.
 )
+
+// MaximizeNormX specifies the heuristic method for computing a contribution to
+// the reciprocal Dif-estimate in Dlatdf.
+type MaximizeNormXJob byte
+
+const (
+	LocalLookAhead       MaximizeNormXJob = 0 // Solve Z*x=h-f where h is a vector of ±1.
+	NormalizedNullVector MaximizeNormXJob = 2 // Compute an approximate null-vector e of Z, normalize e and solve Z*x=±e-f.
+)