diff --git a/lapack/gonum/dlatdf.go b/lapack/gonum/dlatdf.go index 6b01fb15..d6d99e80 100644 --- a/lapack/gonum/dlatdf.go +++ b/lapack/gonum/dlatdf.go @@ -11,64 +11,52 @@ import ( "gonum.org/v1/gonum/lapack" ) -// Dlatdf uses the LU factorization of the n×n matrix Z computed by Dgetc2 and -// computes a contribution to the reciprocal Dif-estimate by solving -// Z * x = b for x -// and choosing the rhs b such that the norm of x is as large as possible. -// On entry rhs = b holds the contribution from earlier solved sub-systems, and on return rhs = x. +// Dlatdf computes a contribution to the reciprocal Dif-estimate by solving +// Z * x = h - f +// and choosing the vector h such that the norm of x is as large as possible. // -// The factorization of Z returned by Dgetc2 has the form -// Z = P*L*U*Q, -// where P and Q are permutation matrices. L is lower triangular with -// unit diagonal elements and U is upper triangular. +// The n×n matrix Z is represented by its LU factorization as computed by Dgetc2 +// and has the form +// Z = P * L * U * Q +// where P and Q are permutation matrices, L is lower triangular with unit +// diagonal elements and U is upper triangular. // -// It must hold that n <= len(ipiv), n <= len(jpiv). On exit the pivot indices where row i has -// been interchanged with ipiv[i] and column j interchanged with column jpiv[j]. +// job specifies the heuristic method for computing the contribution. // -// rhs must have n accesible elements. +// If job is lapack.LocalLookAhead, all entries of h are chosen as either +1 or +// -1. // -// if ijob==2 -// First compute an approximative null-vector e of Z using DGECON, -// e is normalized and solve for Zx = +-e - f with the sign giving the greater value -// of 2-norm(x). About 5 times as expensive as Default. -// if ijob!=2 -// Local look ahead strategy where all entries of the rhs. -// b is chosen as either +1 or -1 (Default). +// If job is lapack.NormalizedNullVector, an approximate null-vector e of Z is +// computed using Dgecon and normalized. h is chosen as ±e with the sign giving +// the greater value of 2-norm(x). This strategy is about 5 times as expensive +// as LocalLookAhead. // -// rdsum is the sum of squares of computed contributions to the Dif-estimate -// under computation by Dtgsyl, where the scaling factor rdscal (see below) -// has been factored out. +// On entry, rhs holds the contribution f from earlier solved sub-systems. On +// return, rhs holds the solution x. // -// rdscal is the scaling factor used to prevent overflow in rdsum. +// ipiv and jpiv contain the pivot indices as returned by Dgetc2: row i of the +// matrix has been interchanged with row ipiv[i] and column j of the matrix has +// been interchanged with column jpiv[j]. // -// NOTE: rdscal and rdsum only makes sense when Dtgsy2 is called by Dtgsyl. +// n must be at most 8, ipiv and jpiv must have length n, and rhs must have +// length at least n, otherwise Dlatdf will panic. // -// scal is rdscal updated w.r.t. the current contributions in rdsum. -// if trans = 'T' -// scal == rdscal -// -// sum is the sum of squares updated with the contributions from the current sub-system. -// If trans = 'T' -// sum = rdsum -// -// Dlatdf implicitly expects z matrix to be at most an 8×8 matrix. This is fulfilled by it's only caller, Dtgsy2. +// rdsum and rdscal represent the sum of squares of computed contributions to +// the Dif-estimate from earlier solved sub-systems. rdscal is the scaling +// factor used to prevent overflow in rdsum. Dlatdf returns this sum of squares +// updated with the contributions from the current sub-system. // // Dlatdf is an internal routine. It is exported for testing purposes. -func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (sum, scale float64) { +func (impl Implementation) Dlatdf(job lapack.MaximizeNormXJob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (scale, sum float64) { switch { + case job != lapack.LocalLookAhead && job != lapack.NormalizedNullVector: + panic(badMaximizeNormXJob) case n < 0: panic(nLT0) + case n > 8: + panic("lapack: n > 8") case ldz < max(1, n): panic(badLdZ) - case len(rhs) < n: - panic(shortRHS) - case len(ipiv) < n: - panic(badLenIpiv) - case len(jpiv) < n: - panic(badLenJpiv) - case n > 8: - // Dlatdf expects z to be less than 8 in dimension. - panic("lapack: Dlatdf expects z to be at most 8×8") } // Quick return if possible. @@ -76,40 +64,51 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float return } - if len(z) < (n-1)*ldz+n { + switch { + case len(z) < (n-1)*ldz+n: panic(shortZ) + case len(rhs) < n: + panic(shortRHS) + case len(ipiv) != n: + panic(badLenIpiv) + case len(jpiv) != n: + panic(badLenJpiv) } + const maxdim = 8 + var ( + xps [maxdim]float64 + xms [maxdim]float64 + work [4 * maxdim]float64 + iwork [maxdim]int + ) bi := blas64.Implementation() - var temp float64 - xp := make([]float64, n) - if ijob == 2 { - // Compute approximate nullvector xm of z when ijob == 2. - xm := make([]float64, n) - work := make([]float64, 4*n) - impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work, make([]int, n)) // iwork is unused. + xp := xps[:n] + xm := xms[:n] + if job == lapack.NormalizedNullVector { + // Compute approximate nullvector xm of Z. + _ = impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work[:], iwork[:]) + // This relies on undocumented content in work[n:2*n] stored by Dgecon. bi.Dcopy(n, work[n:], 1, xm, 1) // Compute rhs. impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1) - temp = 1 / math.Sqrt(bi.Ddot(n, xm, 1, xm, 1)) - bi.Dscal(n, temp, xm, 1) + tmp := 1 / bi.Dnrm2(n, xm, 1) + bi.Dscal(n, tmp, xm, 1) bi.Dcopy(n, xm, 1, xp, 1) - bi.Daxpy(n, 1.0, rhs, 1, xp, 1) + bi.Daxpy(n, 1, rhs, 1, xp, 1) bi.Daxpy(n, -1.0, xm, 1, rhs, 1) - impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv) - impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv) + _ = impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv) + _ = impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv) if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) { bi.Dcopy(n, xp, 1, rhs, 1) } - // Compute sum of squares. - rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum) - return rdsum, rdscal + // Compute and return the updated sum of squares. + return impl.Dlassq(n, rhs, 1, rdscal, rdsum) } // Apply permutations ipiv to rhs - // If ijob != 2 uses look-ahead strategy. impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1) // Solve for L-part choosing rhs either to +1 or -1. @@ -118,8 +117,8 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float bp := rhs[j] + 1 bm := rhs[j] - 1 - // Look-ahead for L-part rhs[0:n-2] = + or -1, splus and - // smin computed more efficiently than in Bsolve [1]. + // Look-ahead for L-part rhs[0:n-2] = +1 or -1, splus and sminu computed + // more efficiently than in https://doi.org/10.1109/9.29404. splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz) sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) splus *= rhs[j] @@ -129,36 +128,33 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float case sminu > splus: rhs[j] = bm default: - // In this case the updating sums are equal and we can - // choose rsh[j] +1 or -1. The first time this happens - // we choose -1, thereafter +1. This is a simple way to - // get good estimates of matrices like Byers well-known - // example (see [1]). (Not done in Bsolve.) + // In this case the updating sums are equal and we can choose rsh[j] + // +1 or -1. The first time this happens we choose -1, thereafter + // +1. This is a simple way to get good estimates of matrices like + // Byers well-known example (see https://doi.org/10.1109/9.29404). rhs[j] += pmone - pmone = 1.0 + pmone = 1 } // Compute remaining rhs. - temp = -rhs[j] - bi.Daxpy(n-j-1, temp, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) + bi.Daxpy(n-j-1, -rhs[j], z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) } - // Solve for U-part, look-ahead for rhs[n-1] = +-1. This is not done - // in Bsolve and will hopefully give us a better estimate because - // any ill-conditioning of the original matrix is transferred to U - // and not to L. U[n-1,n-1] is an approximation to sigma_min(LU). + // Solve for U-part, look-ahead for rhs[n-1] = ±1. This is not done in + // Bsolve and will hopefully give us a better estimate because any + // ill-conditioning of the original matrix is transferred to U and not to L. + // U[n-1,n-1] is an approximation to sigma_min(LU). bi.Dcopy(n-1, rhs, 1, xp, 1) xp[n-1] = rhs[n-1] + 1 rhs[n-1] -= 1 - splus := 0.0 - sminu := 0.0 + var splus, sminu float64 for i := n - 1; i >= 0; i-- { - temp = 1 / z[i*ldz+i] - xp[i] *= temp - rhs[i] *= temp + tmp := 1 / z[i*ldz+i] + xp[i] *= tmp + rhs[i] *= tmp for k := i + 1; k < n; k++ { - xp[i] -= float64(xp[k] * (z[i*ldz+k] * temp)) - rhs[i] -= float64(rhs[k] * (z[i*ldz+k] * temp)) + xp[i] -= xp[k] * (z[i*ldz+k] * tmp) + rhs[i] -= rhs[k] * (z[i*ldz+k] * tmp) } splus += math.Abs(xp[i]) sminu += math.Abs(rhs[i]) @@ -169,7 +165,7 @@ func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float // Apply the permutations jpiv to the computed solution (rhs). impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1) - // Compute the sum of squares. - rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum) - return rdsum, rdscal + + // Compute and return the updated sum of squares. + return impl.Dlassq(n, rhs, 1, rdscal, rdsum) } diff --git a/lapack/gonum/errors.go b/lapack/gonum/errors.go index 406a8c10..47652841 100644 --- a/lapack/gonum/errors.go +++ b/lapack/gonum/errors.go @@ -7,31 +7,32 @@ package gonum // This list is duplicated in netlib/lapack/netlib. Keep in sync. const ( // Panic strings for bad enumeration values. - badApplyOrtho = "lapack: bad ApplyOrtho" - badBalanceJob = "lapack: bad BalanceJob" - badDiag = "lapack: bad Diag" - badDirect = "lapack: bad Direct" - badEVComp = "lapack: bad EVComp" - badEVHowMany = "lapack: bad EVHowMany" - badEVJob = "lapack: bad EVJob" - badEVSide = "lapack: bad EVSide" - badGSVDJob = "lapack: bad GSVDJob" - badGenOrtho = "lapack: bad GenOrtho" - badLeftEVJob = "lapack: bad LeftEVJob" - badMatrixType = "lapack: bad MatrixType" - badNorm = "lapack: bad Norm" - badPivot = "lapack: bad Pivot" - badRightEVJob = "lapack: bad RightEVJob" - badSVDJob = "lapack: bad SVDJob" - badSchurComp = "lapack: bad SchurComp" - badSchurJob = "lapack: bad SchurJob" - badSide = "lapack: bad Side" - badSort = "lapack: bad Sort" - badStoreV = "lapack: bad StoreV" - badTrans = "lapack: bad Trans" - badUpdateSchurComp = "lapack: bad UpdateSchurComp" - badUplo = "lapack: bad Uplo" - bothSVDOver = "lapack: both jobU and jobVT are lapack.SVDOverwrite" + badApplyOrtho = "lapack: bad ApplyOrtho" + badBalanceJob = "lapack: bad BalanceJob" + badDiag = "lapack: bad Diag" + badDirect = "lapack: bad Direct" + badEVComp = "lapack: bad EVComp" + badEVHowMany = "lapack: bad EVHowMany" + badEVJob = "lapack: bad EVJob" + badEVSide = "lapack: bad EVSide" + badGSVDJob = "lapack: bad GSVDJob" + badGenOrtho = "lapack: bad GenOrtho" + badLeftEVJob = "lapack: bad LeftEVJob" + badMatrixType = "lapack: bad MatrixType" + badMaximizeNormXJob = "lapack: bad MaximizeNormXJob" + badNorm = "lapack: bad Norm" + badPivot = "lapack: bad Pivot" + badRightEVJob = "lapack: bad RightEVJob" + badSVDJob = "lapack: bad SVDJob" + badSchurComp = "lapack: bad SchurComp" + badSchurJob = "lapack: bad SchurJob" + badSide = "lapack: bad Side" + badSort = "lapack: bad Sort" + badStoreV = "lapack: bad StoreV" + badTrans = "lapack: bad Trans" + badUpdateSchurComp = "lapack: bad UpdateSchurComp" + badUplo = "lapack: bad Uplo" + bothSVDOver = "lapack: both jobU and jobVT are lapack.SVDOverwrite" // Panic strings for bad numerical and string values. badIfst = "lapack: ifst out of range" diff --git a/lapack/lapack.go b/lapack/lapack.go index 947cbe45..4b6cb9f5 100644 --- a/lapack/lapack.go +++ b/lapack/lapack.go @@ -215,3 +215,12 @@ const ( EVAllMulQ EVHowMany = 'B' // Compute all right and/or left eigenvectors multiplied by an input matrix. EVSelected EVHowMany = 'S' // Compute selected right and/or left eigenvectors. ) + +// MaximizeNormX specifies the heuristic method for computing a contribution to +// the reciprocal Dif-estimate in Dlatdf. +type MaximizeNormXJob byte + +const ( + LocalLookAhead MaximizeNormXJob = 0 // Solve Z*x=h-f where h is a vector of ±1. + NormalizedNullVector MaximizeNormXJob = 2 // Compute an approximate null-vector e of Z, normalize e and solve Z*x=±e-f. +)