diff --git a/cgo/lapack.go b/cgo/lapack.go index a876aec6..5035901f 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -14,6 +14,7 @@ import ( // Copied from lapack/native. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badDiag = "lapack: bad diag" badDirect = "lapack: bad direct" badIpiv = "lapack: insufficient permutation length" badLdA = "lapack: index of a out of range" diff --git a/native/dlacn2.go b/native/dlacn2.go new file mode 100644 index 00000000..04184a4b --- /dev/null +++ b/native/dlacn2.go @@ -0,0 +1,119 @@ +package native + +import ( + "math" + + "github.com/gonum/blas/blas64" +) + +// Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with +// matrix-vector products provided externally. +// +// Dlacn2 is called sequentially. In between calls, x should be overwritten by +// A * X if kase == 1 +// A^T * X if kase == 2 +// all other prameters should be unchanged during sequential calls, and the updated +// values of est and kase should be used. On the final return (when kase is returned +// as 0), V = A * W, where est = norm(V) / norm(W). +// +// isign, v, and x must all have length n and will panic otherwise. isave is used +// for temporary storage. +func (impl Implementation) Dlacn2(n int, v, x []float64, isgn []int, est float64, kase int, isave [3]int) (float64, int) { + checkVector(n, x, 1) + checkVector(n, v, 1) + if len(isgn) < n { + panic("lapack: insufficient isgn length") + } + if isave[0] < 1 || isave[0] > 5 { + panic("lapack: bad isave value") + } + itmax := 5 + bi := blas64.Implementation() + if kase == 0 { + for i := 0; i < n; i++ { + x[i] = 1 / float64(n) + } + kase = 1 + isave[0] = 1 + return est, kase + } + switch isave[0] { + default: + panic("unknown case") + case 1: + if n == 1 { + v[0] = x[0] + est = math.Abs(v[0]) + kase = 0 + return est, kase + } + est = bi.Dasum(n, x, 1) + for i := 0; i < n; i++ { + x[i] = math.Copysign(1, x[i]) + isgn[i] = int(x[i]) + } + kase = 2 + isave[0] = 2 + return est, kase + case 2: + isave[1] = bi.Idamax(n, x, 1) + isave[2] = 2 + for i := 0; i < n; i++ { + x[i] = 0 + } + x[isave[1]] = 1 + kase = 1 + isave[0] = 3 + return est, kase + case 3: + bi.Dcopy(n, x, 1, v, 1) + estold := est + est = bi.Dasum(n, v, 1) + sameSigns := true + for i := 0; i < n; i++ { + if int(math.Copysign(1, x[i])) != isgn[i] { + sameSigns = false + break + } + } + if !sameSigns && est > estold { + for i := 0; i < n; i++ { + x[i] = math.Copysign(1, x[i]) + isgn[i] = int(x[i]) + } + kase = 2 + isave[0] = 4 + return est, kase + } + case 4: + jlast := isave[1] + isave[1] = bi.Idamax(n, x, 1) + if x[jlast] != math.Abs(x[isave[1]]) && isave[2] < itmax { + isave[2] += 1 + for i := 0; i < n; i++ { + x[i] = 0 + } + x[isave[1]] = 1 + kase = 1 + isave[0] = 3 + return est, kase + } + case 5: + tmp := 2 * (bi.Dasum(n, x, 1)) / float64(3*n) + if tmp > est { + bi.Dcopy(n, x, 1, v, 1) + est = tmp + } + kase = 0 + return est, kase + } + // Iteration complete. Final stage + altsgn := 1.0 + for i := 0; i < n; i++ { + x[i] = altsgn * (1 + float64(i)/float64(n-1)) + altsgn *= -1 + } + kase = 1 + isave[0] = 5 + return est, kase +} diff --git a/native/dlatrs.go b/native/dlatrs.go new file mode 100644 index 00000000..283a5d3a --- /dev/null +++ b/native/dlatrs.go @@ -0,0 +1,334 @@ +package native + +import ( + "math" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" +) + +// Dlatrs solves a triangular system of equations scaled to prevent overflow. It +// solves +// A * x = scale * b if trans == blas.NoTrans +// A^T * x = scale * b if trans == blas.Trans +// where the scale s is set for numeric stability. +// +// A is an n×n triangular matrix. On entry, the slice x contains the values of +// of b, and on exit it contains the solution vector x. +// +// If normin == true, cnorm is an input and cnorm[j] contains the norm of the off-diagonal +// part of the j^th column of A. If trans == blas.NoTrans, cnorm[j] must be greater +// than or equal to the infinity norm, and greater than or equal to the one-norm +// otherwise. If normin == false, then cnorm is treated as an output, and is set +// to contain the 1-norm of the off-diagonal part of the j^th column of A. +func (impl Implementation) Dlatrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, normin bool, n int, a []float64, lda int, x []float64, cnorm []float64) (scale float64) { + if uplo != blas.Upper && uplo != blas.Lower { + panic(badUplo) + } + if trans != blas.Trans && trans != blas.NoTrans { + panic(badTrans) + } + if diag != blas.Unit && diag != blas.NonUnit { + panic(badDiag) + } + upper := uplo == blas.Upper + noTrans := trans == blas.NoTrans + nonUnit := diag == blas.NonUnit + + if n < 0 { + panic(nLT0) + } + checkMatrix(n, n, a, lda) + checkVector(n, x, 1) + checkVector(n, cnorm, 1) + + if n == 0 { + return + } + scale = 1 + bi := blas64.Implementation() + if !normin { + if upper { + for j := 0; j < n; j++ { + cnorm[j] = bi.Dasum(j, a[j:], lda) + } + } else { + for j := 0; j < n-1; j++ { + cnorm[j] = bi.Dasum(n-j-1, a[(j+1)*lda+j:], lda) + } + cnorm[n-1] = 0 + } + } + // Scale the column norms by tscal if the maximum element in cnorm is greater than bignum. + imax := bi.Idamax(n, cnorm, 1) + tmax := cnorm[imax] + var tscal float64 + if tmax <= bignum { + tscal = 1 + } else { + tscal = 1 / (smlnum * tmax) + bi.Dscal(n, tscal, cnorm, 1) + } + + // Compute a bound on the computed solution vector to see if bi.Dtrsv can be used. + j := bi.Idamax(n, x, 1) + xmax := math.Abs(x[j]) + xbnd := xmax + var grow float64 + var jfirst, jlast, jinc int + if noTrans { + if upper { + jfirst = n - 1 + jlast = 0 + jinc = -1 + } else { + jfirst = 0 + jlast = n - 1 + jinc = 1 + } + // Compute the growth in A * x = b. + if tscal != 1 { + grow = 0 + goto Finish + } + if nonUnit { + grow = 1 / math.Max(xbnd, smlnum) + xbnd = grow + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Finish + } + tjj := math.Abs(a[j*lda+j]) + xbnd = math.Min(xbnd, math.Min(1, tjj)*grow) + if tjj+cnorm[j] >= smlnum { + grow *= tjj / (tjj + cnorm[j]) + } else { + grow = 0 + } + } + grow = xbnd + } else { + grow = math.Min(1, 1/math.Max(xbnd, smlnum)) + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Finish + } + grow *= 1 / (1 + cnorm[j]) + } + } + } else { + if upper { + jfirst = 0 + jlast = n - 1 + jinc = 1 + } else { + jfirst = n - 1 + jlast = 0 + jinc = -1 + } + if tscal != 1 { + grow = 0 + goto Finish + } + if nonUnit { + grow = 1 / (math.Max(xbnd, smlnum)) + xbnd = grow + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Finish + } + xj := 1 + cnorm[j] + grow = math.Min(grow, xbnd/xj) + tjj := math.Abs(a[j*lda+j]) + if xj > tjj { + xbnd *= tjj / xj + } + } + grow = math.Min(grow, xbnd) + } else { + grow = math.Min(1, 1/math.Max(xbnd, smlnum)) + for j := jfirst; j != jlast; j += jinc { + if grow <= smlnum { + goto Finish + } + xj := 1 + cnorm[j] + grow /= xj + } + } + } + +Finish: + if grow*tscal > smlnum { + bi.Dtrsv(uplo, trans, diag, n, a, lda, x, 1) + // TODO(btracey): check if this else is everything + } else { + if xmax > bignum { + scale = bignum / xmax + bi.Dscal(n, scale, x, 1) + xmax = bignum + } + if noTrans { + for j := jfirst; j != jlast; j += jinc { + xj := math.Abs(x[j]) + var tjjs float64 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + if tscal == 1 { + break + } + } + tjj := math.Abs(tjjs) + if tjj > smlnum { + if tjj < 1 { + if xj > tjj*bignum { + rec := 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + x[j] /= tjjs + xj = math.Abs(x[j]) + } else if tjj > 0 { + if xj > tjj*bignum { + rec := (tjj * bignum) / xj + if cnorm[j] > 1 { + rec /= cnorm[j] + } + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + x[j] /= tjjs + xj = math.Abs(x[j]) + } else { + for i := 0; i < n; i++ { + x[i] = 0 + } + x[j] = 1 + xj = 1 + scale = 0 + xmax = 0 + } + if xj > 1 { + rec := 1 / xj + if cnorm[j] > (bignum-xmax)*rec { + rec *= 0.5 + bi.Dscal(n, rec, x, 1) + scale *= rec + } + } else if xj*cnorm[j] > bignum-xmax { + bi.Dscal(n, 0.5, x, 1) + scale *= 0.5 + } + if upper { + if j > 0 { + bi.Daxpy(j, -x[j]*tscal, a[j:], lda, x, 1) + i := bi.Idamax(j, x, 1) + xmax = math.Abs(x[i]) + } + } else { + if j < n-1 { + bi.Daxpy(n-j-1, -x[j]*tscal, a[(j+1)*lda+j:], lda, x[j+1:], 1) + i := j + bi.Idamax(n-j-1, x[j+1:], 1) + xmax = math.Abs(x[i]) + } + } + } + } else { + for j := jfirst; j != jlast; j += jinc { + xj := math.Abs(x[j]) + uscal := tscal + rec := 1 / math.Max(xmax, 1) + var tjjs float64 + if cnorm[j] > (bignum-xj)*rec { + rec *= 0.5 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + } + tjj := math.Abs(tjjs) + if tjj > 1 { + rec = math.Min(1, rec*tjj) + uscal /= tjjs + } + if rec < 1 { + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + var sumj float64 + if uscal == 1 { + if upper { + sumj = bi.Ddot(j, a[j:], lda, x, 1) + } else if j < n-1 { + sumj = bi.Ddot(n-j-1, a[(j+1)*lda+j:], lda, x[j+1:], 1) + } + } else { + if upper { + for i := 0; i < j; i++ { + sumj += (a[i*lda+j] * uscal) * x[i] + } + } else if j < n { + for i := j + 1; i < n; i++ { + sumj += (a[i*lda+j] * uscal) * x[i] + } + } + } + if uscal == tscal { + x[j] -= sumj + xj := math.Abs(x[j]) + var tjjs float64 + if nonUnit { + tjjs = a[j*lda+j] * tscal + } else { + tjjs = tscal + if tscal == 1 { + goto Out2 + } + } + tjj := math.Abs(tjjs) + if tjj > smlnum { + if tjj < 1 { + if xj > tjj*bignum { + rec = 1 / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + } + x[j] /= tjjs + } else if tjj > 0 { + if xj > tjj*bignum { + rec = (tjj * bignum) / xj + bi.Dscal(n, rec, x, 1) + scale *= rec + xmax *= rec + } + x[j] /= tjjs + } else { + for i := 0; i < n; i++ { + x[i] = 0 + } + x[j] = 1 + scale = 0 + xmax = 0 + } + } else { + x[j] = x[j]/tjjs - sumj + } + Out2: + xmax = math.Max(xmax, math.Abs(x[j])) + } + } + scale /= tscal + } + if tscal != 1 { + bi.Dscal(n, 1/tscal, cnorm, 1) + } + return scale +} diff --git a/native/general.go b/native/general.go index 8a3e3962..3ada2e79 100644 --- a/native/general.go +++ b/native/general.go @@ -20,6 +20,7 @@ var _ lapack.Float64 = Implementation{} // This list is duplicated in lapack/cgo. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badDiag = "lapack: bad diag" badDirect = "lapack: bad direct" badIpiv = "lapack: insufficient permutation length" badLdA = "lapack: index of a out of range" @@ -74,10 +75,6 @@ func max(a, b int) int { return b } -// dlamch is a function in fortran, but since go forces IEEE-754 these are all -// fixed values. Probably a way to get them as constants. -// TODO(btracey): Is there a better way to find the smallest number such that 1+E > 1 - var ( // dlamchE is the machine epsilon. For IEEE this is 2^-53. dlamchE = math.Float64frombits(0x3ca0000000000000) @@ -89,4 +86,7 @@ var ( // not overflow. The Netlib code for calculating this number is not correct -- // it overflows. Found by trial and error, it is equal to (1/math.MaxFloat64) * (1+ 6*eps) dlamchS = math.Float64frombits(0x4000000000001) + + smlnum = dlamchS / dlamchP + bignum = 1 / smlnum )