Pull from master

2025-10-20 21:59:25 +08:00 · 2015-09-05 13:55:27 -06:00
parent 8debf09945 a76c841cc2
commit cceacb08d0
4 changed files with 458 additions and 4 deletions
--- a/cgo/lapack.go
+++ b/cgo/lapack.go
@@ -14,6 +14,7 @@ import (
 // Copied from lapack/native. Keep in sync.
 const (
 	absIncNotOne  = "lapack: increment not one or negative one"
+	badDiag       = "lapack: bad diag"
 	badDirect     = "lapack: bad direct"
 	badIpiv       = "lapack: insufficient permutation length"
 	badLdA        = "lapack: index of a out of range"
--- a/native/dlacn2.go
+++ b/native/dlacn2.go
@@ -0,0 +1,119 @@
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/blas/blas64"
+)
+
+// Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with
+// matrix-vector products provided externally.
+//
+// Dlacn2 is called sequentially. In between calls, x should be overwritten by
+//  A * X if kase == 1
+//  A^T * X if kase == 2
+// all other prameters should be unchanged during sequential calls, and the updated
+// values of est and kase should be used. On the final return (when kase is returned
+// as 0), V = A * W, where est = norm(V) / norm(W).
+//
+// isign, v, and x must all have length n and will panic otherwise. isave is used
+// for temporary storage.
+func (impl Implementation) Dlacn2(n int, v, x []float64, isgn []int, est float64, kase int, isave [3]int) (float64, int) {
+	checkVector(n, x, 1)
+	checkVector(n, v, 1)
+	if len(isgn) < n {
+		panic("lapack: insufficient isgn length")
+	}
+	if isave[0] < 1 || isave[0] > 5 {
+		panic("lapack: bad isave value")
+	}
+	itmax := 5
+	bi := blas64.Implementation()
+	if kase == 0 {
+		for i := 0; i < n; i++ {
+			x[i] = 1 / float64(n)
+		}
+		kase = 1
+		isave[0] = 1
+		return est, kase
+	}
+	switch isave[0] {
+	default:
+		panic("unknown case")
+	case 1:
+		if n == 1 {
+			v[0] = x[0]
+			est = math.Abs(v[0])
+			kase = 0
+			return est, kase
+		}
+		est = bi.Dasum(n, x, 1)
+		for i := 0; i < n; i++ {
+			x[i] = math.Copysign(1, x[i])
+			isgn[i] = int(x[i])
+		}
+		kase = 2
+		isave[0] = 2
+		return est, kase
+	case 2:
+		isave[1] = bi.Idamax(n, x, 1)
+		isave[2] = 2
+		for i := 0; i < n; i++ {
+			x[i] = 0
+		}
+		x[isave[1]] = 1
+		kase = 1
+		isave[0] = 3
+		return est, kase
+	case 3:
+		bi.Dcopy(n, x, 1, v, 1)
+		estold := est
+		est = bi.Dasum(n, v, 1)
+		sameSigns := true
+		for i := 0; i < n; i++ {
+			if int(math.Copysign(1, x[i])) != isgn[i] {
+				sameSigns = false
+				break
+			}
+		}
+		if !sameSigns && est > estold {
+			for i := 0; i < n; i++ {
+				x[i] = math.Copysign(1, x[i])
+				isgn[i] = int(x[i])
+			}
+			kase = 2
+			isave[0] = 4
+			return est, kase
+		}
+	case 4:
+		jlast := isave[1]
+		isave[1] = bi.Idamax(n, x, 1)
+		if x[jlast] != math.Abs(x[isave[1]]) && isave[2] < itmax {
+			isave[2] += 1
+			for i := 0; i < n; i++ {
+				x[i] = 0
+			}
+			x[isave[1]] = 1
+			kase = 1
+			isave[0] = 3
+			return est, kase
+		}
+	case 5:
+		tmp := 2 * (bi.Dasum(n, x, 1)) / float64(3*n)
+		if tmp > est {
+			bi.Dcopy(n, x, 1, v, 1)
+			est = tmp
+		}
+		kase = 0
+		return est, kase
+	}
+	// Iteration complete. Final stage
+	altsgn := 1.0
+	for i := 0; i < n; i++ {
+		x[i] = altsgn * (1 + float64(i)/float64(n-1))
+		altsgn *= -1
+	}
+	kase = 1
+	isave[0] = 5
+	return est, kase
+}
--- a/native/dlatrs.go
+++ b/native/dlatrs.go
@@ -0,0 +1,334 @@
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+)
+
+// Dlatrs solves a triangular system of equations scaled to prevent overflow. It
+// solves
+//  A * x = scale * b if trans == blas.NoTrans
+//  A^T * x = scale * b if trans == blas.Trans
+// where the scale s is set for numeric stability.
+//
+// A is an n×n triangular matrix. On entry, the slice x contains the values of
+// of b, and on exit it contains the solution vector x.
+//
+// If normin == true, cnorm is an input and cnorm[j] contains the norm of the off-diagonal
+// part of the j^th column of A. If trans == blas.NoTrans, cnorm[j] must be greater
+// than or equal to the infinity norm, and greater than or equal to the one-norm
+// otherwise. If normin == false, then cnorm is treated as an output, and is set
+// to contain the 1-norm of the off-diagonal part of the j^th column of A.
+func (impl Implementation) Dlatrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, normin bool, n int, a []float64, lda int, x []float64, cnorm []float64) (scale float64) {
+	if uplo != blas.Upper && uplo != blas.Lower {
+		panic(badUplo)
+	}
+	if trans != blas.Trans && trans != blas.NoTrans {
+		panic(badTrans)
+	}
+	if diag != blas.Unit && diag != blas.NonUnit {
+		panic(badDiag)
+	}
+	upper := uplo == blas.Upper
+	noTrans := trans == blas.NoTrans
+	nonUnit := diag == blas.NonUnit
+
+	if n < 0 {
+		panic(nLT0)
+	}
+	checkMatrix(n, n, a, lda)
+	checkVector(n, x, 1)
+	checkVector(n, cnorm, 1)
+
+	if n == 0 {
+		return
+	}
+	scale = 1
+	bi := blas64.Implementation()
+	if !normin {
+		if upper {
+			for j := 0; j < n; j++ {
+				cnorm[j] = bi.Dasum(j, a[j:], lda)
+			}
+		} else {
+			for j := 0; j < n-1; j++ {
+				cnorm[j] = bi.Dasum(n-j-1, a[(j+1)*lda+j:], lda)
+			}
+			cnorm[n-1] = 0
+		}
+	}
+	// Scale the column norms by tscal if the maximum element in cnorm is greater than bignum.
+	imax := bi.Idamax(n, cnorm, 1)
+	tmax := cnorm[imax]
+	var tscal float64
+	if tmax <= bignum {
+		tscal = 1
+	} else {
+		tscal = 1 / (smlnum * tmax)
+		bi.Dscal(n, tscal, cnorm, 1)
+	}
+
+	// Compute a bound on the computed solution vector to see if bi.Dtrsv can be used.
+	j := bi.Idamax(n, x, 1)
+	xmax := math.Abs(x[j])
+	xbnd := xmax
+	var grow float64
+	var jfirst, jlast, jinc int
+	if noTrans {
+		if upper {
+			jfirst = n - 1
+			jlast = 0
+			jinc = -1
+		} else {
+			jfirst = 0
+			jlast = n - 1
+			jinc = 1
+		}
+		// Compute the growth in A * x = b.
+		if tscal != 1 {
+			grow = 0
+			goto Finish
+		}
+		if nonUnit {
+			grow = 1 / math.Max(xbnd, smlnum)
+			xbnd = grow
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Finish
+				}
+				tjj := math.Abs(a[j*lda+j])
+				xbnd = math.Min(xbnd, math.Min(1, tjj)*grow)
+				if tjj+cnorm[j] >= smlnum {
+					grow *= tjj / (tjj + cnorm[j])
+				} else {
+					grow = 0
+				}
+			}
+			grow = xbnd
+		} else {
+			grow = math.Min(1, 1/math.Max(xbnd, smlnum))
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Finish
+				}
+				grow *= 1 / (1 + cnorm[j])
+			}
+		}
+	} else {
+		if upper {
+			jfirst = 0
+			jlast = n - 1
+			jinc = 1
+		} else {
+			jfirst = n - 1
+			jlast = 0
+			jinc = -1
+		}
+		if tscal != 1 {
+			grow = 0
+			goto Finish
+		}
+		if nonUnit {
+			grow = 1 / (math.Max(xbnd, smlnum))
+			xbnd = grow
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Finish
+				}
+				xj := 1 + cnorm[j]
+				grow = math.Min(grow, xbnd/xj)
+				tjj := math.Abs(a[j*lda+j])
+				if xj > tjj {
+					xbnd *= tjj / xj
+				}
+			}
+			grow = math.Min(grow, xbnd)
+		} else {
+			grow = math.Min(1, 1/math.Max(xbnd, smlnum))
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Finish
+				}
+				xj := 1 + cnorm[j]
+				grow /= xj
+			}
+		}
+	}
+
+Finish:
+	if grow*tscal > smlnum {
+		bi.Dtrsv(uplo, trans, diag, n, a, lda, x, 1)
+		// TODO(btracey): check if this else is everything
+	} else {
+		if xmax > bignum {
+			scale = bignum / xmax
+			bi.Dscal(n, scale, x, 1)
+			xmax = bignum
+		}
+		if noTrans {
+			for j := jfirst; j != jlast; j += jinc {
+				xj := math.Abs(x[j])
+				var tjjs float64
+				if nonUnit {
+					tjjs = a[j*lda+j] * tscal
+				} else {
+					tjjs = tscal
+					if tscal == 1 {
+						break
+					}
+				}
+				tjj := math.Abs(tjjs)
+				if tjj > smlnum {
+					if tjj < 1 {
+						if xj > tjj*bignum {
+							rec := 1 / xj
+							bi.Dscal(n, rec, x, 1)
+							scale *= rec
+							xmax *= rec
+						}
+					}
+					x[j] /= tjjs
+					xj = math.Abs(x[j])
+				} else if tjj > 0 {
+					if xj > tjj*bignum {
+						rec := (tjj * bignum) / xj
+						if cnorm[j] > 1 {
+							rec /= cnorm[j]
+						}
+						bi.Dscal(n, rec, x, 1)
+						scale *= rec
+						xmax *= rec
+					}
+					x[j] /= tjjs
+					xj = math.Abs(x[j])
+				} else {
+					for i := 0; i < n; i++ {
+						x[i] = 0
+					}
+					x[j] = 1
+					xj = 1
+					scale = 0
+					xmax = 0
+				}
+				if xj > 1 {
+					rec := 1 / xj
+					if cnorm[j] > (bignum-xmax)*rec {
+						rec *= 0.5
+						bi.Dscal(n, rec, x, 1)
+						scale *= rec
+					}
+				} else if xj*cnorm[j] > bignum-xmax {
+					bi.Dscal(n, 0.5, x, 1)
+					scale *= 0.5
+				}
+				if upper {
+					if j > 0 {
+						bi.Daxpy(j, -x[j]*tscal, a[j:], lda, x, 1)
+						i := bi.Idamax(j, x, 1)
+						xmax = math.Abs(x[i])
+					}
+				} else {
+					if j < n-1 {
+						bi.Daxpy(n-j-1, -x[j]*tscal, a[(j+1)*lda+j:], lda, x[j+1:], 1)
+						i := j + bi.Idamax(n-j-1, x[j+1:], 1)
+						xmax = math.Abs(x[i])
+					}
+				}
+			}
+		} else {
+			for j := jfirst; j != jlast; j += jinc {
+				xj := math.Abs(x[j])
+				uscal := tscal
+				rec := 1 / math.Max(xmax, 1)
+				var tjjs float64
+				if cnorm[j] > (bignum-xj)*rec {
+					rec *= 0.5
+					if nonUnit {
+						tjjs = a[j*lda+j] * tscal
+					} else {
+						tjjs = tscal
+					}
+					tjj := math.Abs(tjjs)
+					if tjj > 1 {
+						rec = math.Min(1, rec*tjj)
+						uscal /= tjjs
+					}
+					if rec < 1 {
+						bi.Dscal(n, rec, x, 1)
+						scale *= rec
+						xmax *= rec
+					}
+				}
+				var sumj float64
+				if uscal == 1 {
+					if upper {
+						sumj = bi.Ddot(j, a[j:], lda, x, 1)
+					} else if j < n-1 {
+						sumj = bi.Ddot(n-j-1, a[(j+1)*lda+j:], lda, x[j+1:], 1)
+					}
+				} else {
+					if upper {
+						for i := 0; i < j; i++ {
+							sumj += (a[i*lda+j] * uscal) * x[i]
+						}
+					} else if j < n {
+						for i := j + 1; i < n; i++ {
+							sumj += (a[i*lda+j] * uscal) * x[i]
+						}
+					}
+				}
+				if uscal == tscal {
+					x[j] -= sumj
+					xj := math.Abs(x[j])
+					var tjjs float64
+					if nonUnit {
+						tjjs = a[j*lda+j] * tscal
+					} else {
+						tjjs = tscal
+						if tscal == 1 {
+							goto Out2
+						}
+					}
+					tjj := math.Abs(tjjs)
+					if tjj > smlnum {
+						if tjj < 1 {
+							if xj > tjj*bignum {
+								rec = 1 / xj
+								bi.Dscal(n, rec, x, 1)
+								scale *= rec
+								xmax *= rec
+							}
+						}
+						x[j] /= tjjs
+					} else if tjj > 0 {
+						if xj > tjj*bignum {
+							rec = (tjj * bignum) / xj
+							bi.Dscal(n, rec, x, 1)
+							scale *= rec
+							xmax *= rec
+						}
+						x[j] /= tjjs
+					} else {
+						for i := 0; i < n; i++ {
+							x[i] = 0
+						}
+						x[j] = 1
+						scale = 0
+						xmax = 0
+					}
+				} else {
+					x[j] = x[j]/tjjs - sumj
+				}
+			Out2:
+				xmax = math.Max(xmax, math.Abs(x[j]))
+			}
+		}
+		scale /= tscal
+	}
+	if tscal != 1 {
+		bi.Dscal(n, 1/tscal, cnorm, 1)
+	}
+	return scale
+}
--- a/native/general.go
+++ b/native/general.go
@@ -20,6 +20,7 @@ var _ lapack.Float64 = Implementation{}
 // This list is duplicated in lapack/cgo. Keep in sync.
 const (
 	absIncNotOne  = "lapack: increment not one or negative one"
+	badDiag       = "lapack: bad diag"
 	badDirect     = "lapack: bad direct"
 	badIpiv       = "lapack: insufficient permutation length"
 	badLdA        = "lapack: index of a out of range"
@@ -74,10 +75,6 @@ func max(a, b int) int {
 	return b
 }

-// dlamch is a function in fortran, but since go forces IEEE-754 these are all
-// fixed values. Probably a way to get them as constants.
-// TODO(btracey): Is there a better way to find the smallest number such that 1+E > 1
-
 var (
 	// dlamchE is the machine epsilon. For IEEE this is 2^-53.
 	dlamchE = math.Float64frombits(0x3ca0000000000000)
@@ -89,4 +86,7 @@ var (
 	// not overflow. The Netlib code for calculating this number is not correct --
 	// it overflows. Found by trial and error, it is equal to (1/math.MaxFloat64) * (1+ 6*eps)
 	dlamchS = math.Float64frombits(0x4000000000001)
+
+	smlnum = dlamchS / dlamchP
+	bignum = 1 / smlnum
 )