From 295bfca71ff005698ffd1d90261a2a8cf9183eb3 Mon Sep 17 00:00:00 2001 From: soypat Date: Sat, 25 Sep 2021 16:34:30 -0300 Subject: [PATCH] lapack: add Dlatdf --- lapack/gonum/dlatdf.go | 175 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 lapack/gonum/dlatdf.go diff --git a/lapack/gonum/dlatdf.go b/lapack/gonum/dlatdf.go new file mode 100644 index 00000000..6b01fb15 --- /dev/null +++ b/lapack/gonum/dlatdf.go @@ -0,0 +1,175 @@ +// Copyright ©2021 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "math" + + "gonum.org/v1/gonum/blas/blas64" + "gonum.org/v1/gonum/lapack" +) + +// Dlatdf uses the LU factorization of the n×n matrix Z computed by Dgetc2 and +// computes a contribution to the reciprocal Dif-estimate by solving +// Z * x = b for x +// and choosing the rhs b such that the norm of x is as large as possible. +// On entry rhs = b holds the contribution from earlier solved sub-systems, and on return rhs = x. +// +// The factorization of Z returned by Dgetc2 has the form +// Z = P*L*U*Q, +// where P and Q are permutation matrices. L is lower triangular with +// unit diagonal elements and U is upper triangular. +// +// It must hold that n <= len(ipiv), n <= len(jpiv). On exit the pivot indices where row i has +// been interchanged with ipiv[i] and column j interchanged with column jpiv[j]. +// +// rhs must have n accesible elements. +// +// if ijob==2 +// First compute an approximative null-vector e of Z using DGECON, +// e is normalized and solve for Zx = +-e - f with the sign giving the greater value +// of 2-norm(x). About 5 times as expensive as Default. +// if ijob!=2 +// Local look ahead strategy where all entries of the rhs. +// b is chosen as either +1 or -1 (Default). +// +// rdsum is the sum of squares of computed contributions to the Dif-estimate +// under computation by Dtgsyl, where the scaling factor rdscal (see below) +// has been factored out. +// +// rdscal is the scaling factor used to prevent overflow in rdsum. +// +// NOTE: rdscal and rdsum only makes sense when Dtgsy2 is called by Dtgsyl. +// +// scal is rdscal updated w.r.t. the current contributions in rdsum. +// if trans = 'T' +// scal == rdscal +// +// sum is the sum of squares updated with the contributions from the current sub-system. +// If trans = 'T' +// sum = rdsum +// +// Dlatdf implicitly expects z matrix to be at most an 8×8 matrix. This is fulfilled by it's only caller, Dtgsy2. +// +// Dlatdf is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlatdf(ijob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (sum, scale float64) { + switch { + case n < 0: + panic(nLT0) + case ldz < max(1, n): + panic(badLdZ) + case len(rhs) < n: + panic(shortRHS) + case len(ipiv) < n: + panic(badLenIpiv) + case len(jpiv) < n: + panic(badLenJpiv) + case n > 8: + // Dlatdf expects z to be less than 8 in dimension. + panic("lapack: Dlatdf expects z to be at most 8×8") + } + + // Quick return if possible. + if n == 0 { + return + } + + if len(z) < (n-1)*ldz+n { + panic(shortZ) + } + + bi := blas64.Implementation() + var temp float64 + xp := make([]float64, n) + if ijob == 2 { + // Compute approximate nullvector xm of z when ijob == 2. + xm := make([]float64, n) + work := make([]float64, 4*n) + impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work, make([]int, n)) // iwork is unused. + bi.Dcopy(n, work[n:], 1, xm, 1) + + // Compute rhs. + impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1) + temp = 1 / math.Sqrt(bi.Ddot(n, xm, 1, xm, 1)) + bi.Dscal(n, temp, xm, 1) + bi.Dcopy(n, xm, 1, xp, 1) + bi.Daxpy(n, 1.0, rhs, 1, xp, 1) + bi.Daxpy(n, -1.0, xm, 1, rhs, 1) + impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv) + impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv) + if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) { + bi.Dcopy(n, xp, 1, rhs, 1) + } + + // Compute sum of squares. + rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum) + return rdsum, rdscal + } + + // Apply permutations ipiv to rhs + // If ijob != 2 uses look-ahead strategy. + impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1) + + // Solve for L-part choosing rhs either to +1 or -1. + pmone := -1.0 + for j := 0; j < n-2; j++ { + bp := rhs[j] + 1 + bm := rhs[j] - 1 + + // Look-ahead for L-part rhs[0:n-2] = + or -1, splus and + // smin computed more efficiently than in Bsolve [1]. + splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz) + sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) + splus *= rhs[j] + switch { + case splus > sminu: + rhs[j] = bp + case sminu > splus: + rhs[j] = bm + default: + // In this case the updating sums are equal and we can + // choose rsh[j] +1 or -1. The first time this happens + // we choose -1, thereafter +1. This is a simple way to + // get good estimates of matrices like Byers well-known + // example (see [1]). (Not done in Bsolve.) + rhs[j] += pmone + pmone = 1.0 + } + + // Compute remaining rhs. + temp = -rhs[j] + bi.Daxpy(n-j-1, temp, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) + } + + // Solve for U-part, look-ahead for rhs[n-1] = +-1. This is not done + // in Bsolve and will hopefully give us a better estimate because + // any ill-conditioning of the original matrix is transferred to U + // and not to L. U[n-1,n-1] is an approximation to sigma_min(LU). + bi.Dcopy(n-1, rhs, 1, xp, 1) + xp[n-1] = rhs[n-1] + 1 + rhs[n-1] -= 1 + splus := 0.0 + sminu := 0.0 + for i := n - 1; i >= 0; i-- { + temp = 1 / z[i*ldz+i] + xp[i] *= temp + rhs[i] *= temp + for k := i + 1; k < n; k++ { + xp[i] -= float64(xp[k] * (z[i*ldz+k] * temp)) + rhs[i] -= float64(rhs[k] * (z[i*ldz+k] * temp)) + } + splus += math.Abs(xp[i]) + sminu += math.Abs(rhs[i]) + } + if splus > sminu { + bi.Dcopy(n, xp, 1, rhs, 1) + } + + // Apply the permutations jpiv to the computed solution (rhs). + impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1) + // Compute the sum of squares. + rdscal, rdsum = impl.Dlassq(n, rhs, 1, rdscal, rdsum) + return rdsum, rdscal +}