Add Dbdsqr and test

2025-10-20 21:59:25 +08:00 · 2015-12-11 16:31:29 -08:00
parent ed772a7bca
commit 1211255fc4
8 changed files with 784 additions and 4 deletions
--- a/cgo/lapack.go
+++ b/cgo/lapack.go
@@ -172,6 +172,76 @@ func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok
 	return clapack.Dpotrf(ul, n, a, lda)
 }

+// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
+//
+// The SVD of the bidiagonal matrix B is
+//  B = Q * S * P^T
+// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
+// left singular vectors, and P is an orthogonal matrix of right singular vectors.
+//
+// Q and P are only computed if requested. If left singular vectors are requested,
+// this routine returns U * Q instead of Q, and if right singular vectors are
+// requested P^T * VT is returned instead of P^T.
+//
+// Frequently Dbdsqr is used in conjuction with Dgebrd which reduces a general
+// matrix A into bidiagonal form. In this case, the SVD of A is
+//  A = (U * Q) * S * (P^T * VT)
+//
+// This routine may also compute Q^T * C.
+//
+// d and e contain the elements of the bidiagonal matrix b. d must have length at
+// least n, and e must have length at least n-1. Dbdsqr will panic if there is
+// insufficient length. On exit, D contains the singular values of B in decreasing
+// order.
+//
+// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
+// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
+//
+// U is a matrix of size nru×n whose elements are stored in u. The elements
+// of u are modified to contain U * Q on exit. U is not used if nru == 0.
+//
+// C is a matrix of size n×ncc whose elements are stored in c. The elements
+// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
+//
+// work contains temporary storage and must have length at least 4*n. Dbdsqr
+// will panic if there is insufficient working memory.
+//
+// Dbdsqr returns whether the decomposition was successful.
+func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
+	if uplo != blas.Upper && uplo != blas.Lower {
+		panic(badUplo)
+	}
+	if ncvt != 0 {
+		checkMatrix(n, ncvt, vt, ldvt)
+	}
+	if nru != 0 {
+		checkMatrix(nru, n, u, ldu)
+	}
+	if ncc != 0 {
+		checkMatrix(n, ncc, c, ldc)
+	}
+	if len(d) < n {
+		panic(badD)
+	}
+	if len(e) < n-1 {
+		panic(badE)
+	}
+	if len(work) < 4*n {
+		panic(badWork)
+	}
+	// An address must be passed to cgo. If lengths are zero, allocate a slice.
+	if len(vt) == 0 {
+		vt = make([]float64, 1)
+	}
+	if len(u) == 0 {
+		vt = make([]float64, 1)
+	}
+	if len(c) == 0 {
+		c = make([]float64, 1)
+	}
+	return clapack.Dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc)
+}
+
 // Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by
 // an orthogonal transformation:
 //  Q^T * A * P = B.
--- a/cgo/lapack_test.go
+++ b/cgo/lapack_test.go
@@ -20,6 +20,10 @@ type blockedTranslate struct {
 	Implementation
 }

+func TestDbdsqr(t *testing.T) {
+	testlapack.DbdsqrTest(t, impl)
+}
+
 func (bl blockedTranslate) Dgebd2(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64) {
 	impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, len(work))
 }
--- a/native/dbdsqr.go
+++ b/native/dbdsqr.go
@@ -0,0 +1,485 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"math"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/lapack"
+)
+
+// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
+//
+// The SVD of the bidiagonal matrix B is
+//  B = Q * S * P^T
+// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
+// left singular vectors, and P is an orthogonal matrix of right singular vectors.
+//
+// Q and P are only computed if requested. If left singular vectors are requested,
+// this routine returns U * Q instead of Q, and if right singular vectors are
+// requested P^T * VT is returned instead of P^T.
+//
+// Frequently Dbdsqr is used in conjuction with Dgebrd which reduces a general
+// matrix A into bidiagonal form. In this case, the SVD of A is
+//  A = (U * Q) * S * (P^T * VT)
+//
+// This routine may also compute Q^T * C.
+//
+// d and e contain the elements of the bidiagonal matrix b. d must have length at
+// least n, and e must have length at least n-1. Dbdsqr will panic if there is
+// insufficient length. On exit, D contains the singular values of B in decreasing
+// order.
+//
+// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
+// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
+//
+// U is a matrix of size nru×n whose elements are stored in u. The elements
+// of u are modified to contain U * Q on exit. U is not used if nru == 0.
+//
+// C is a matrix of size n×ncc whose elements are stored in c. The elements
+// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
+//
+// work contains temporary storage and must have length at least 4*n. Dbdsqr
+// will panic if there is insufficient working memory.
+//
+// Dbdsqr returns whether the decomposition was successful.
+func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
+	if uplo != blas.Upper && uplo != blas.Lower {
+		panic(badUplo)
+	}
+	if ncvt != 0 {
+		checkMatrix(n, ncvt, vt, ldvt)
+	}
+	if nru != 0 {
+		checkMatrix(nru, n, u, ldu)
+	}
+	if ncc != 0 {
+		checkMatrix(n, ncc, c, ldc)
+	}
+	if len(d) < n {
+		panic(badD)
+	}
+	if len(e) < n-1 {
+		panic(badE)
+	}
+	if len(work) < 4*n {
+		panic(badWork)
+	}
+	var info int
+	bi := blas64.Implementation()
+	const (
+		maxIter = 6
+	)
+	if n == 0 {
+		return true
+	}
+	if n != 1 {
+		// If the singular vectors do not need to be computed, use qd algorithm.
+		if !(ncvt > 0 || nru > 0 || ncc > 0) {
+			info = impl.Dlasq1(n, d, e, work)
+			// If info is 2 dqds didn't finish, and so try to.
+			if info != 2 {
+				return info == 0
+			}
+			info = 0
+		}
+		nm1 := n - 1
+		nm12 := nm1 + nm1
+		nm13 := nm12 + nm1
+		idir := 0
+
+		eps := dlamchE
+		unfl := dlamchS
+		lower := uplo == blas.Lower
+		var cs, sn, r float64
+		if lower {
+			for i := 0; i < n-1; i++ {
+				cs, sn, r = impl.Dlartg(d[i], e[i])
+				d[i] = r
+				e[i] = sn * d[i+1]
+				d[i+1] *= cs
+				work[i] = cs
+				work[nm1+i] = sn
+			}
+			if nru > 0 {
+				impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu)
+			}
+			if ncc > 0 {
+				impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc)
+			}
+		}
+		// Compute singular values to a relative accuracy of tol. If tol is negative
+		// the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b)
+		tolmul := math.Max(10, math.Min(100, eps*(-1.0/8)))
+		tol := tolmul * eps
+		var smax float64
+		for i := 0; i < n; i++ {
+			smax = math.Max(smax, math.Abs(d[i]))
+		}
+		for i := 0; i < n-1; i++ {
+			smax = math.Max(smax, math.Abs(e[i]))
+		}
+		var sminl float64
+		var thresh float64
+		if tol >= 0 {
+			sminoa := math.Abs(d[0])
+			if sminoa != 0 {
+				mu := sminoa
+				for i := 1; i < n; i++ {
+					mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1])))
+					sminoa = math.Min(sminoa, mu)
+					if sminoa == 0 {
+						break
+					}
+				}
+			}
+			sminoa = sminoa / math.Sqrt(float64(n))
+			thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl)
+		} else {
+			thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl)
+		}
+		// Prepare for the main iteration loop for the singular values.
+		maxIt := maxIter * n * n
+		iter := 0
+		oldl2 := -1
+		oldm := -1
+		// m points to the last element of unconverged part of matrix.
+		m := n
+
+	Outer:
+		for m > 1 {
+			if iter > maxIt {
+				info = 0
+				for i := 0; i < n-1; i++ {
+					if e[i] != 0 {
+						info++
+					}
+				}
+				return info == 0
+			}
+			// Find diagonal block of matrix to work on.
+			if tol < 0 && math.Abs(d[m-1]) <= thresh {
+				d[m-1] = 0
+			}
+			smax = math.Abs(d[m-1])
+			smin := smax
+			var l2 int
+			var broke bool
+			for l3 := 0; l3 < m-1; l3++ {
+				l2 = m - l3 - 2
+				abss := math.Abs(d[l2])
+				abse := math.Abs(e[l2])
+				if tol < 0 && abss <= thresh {
+					d[l2] = 0
+				}
+				if abse <= thresh {
+					broke = true
+					break
+				}
+				smin = math.Min(smin, abss)
+				smax = math.Max(math.Max(smax, abss), abse)
+			}
+			if broke {
+				e[l2] = 0
+				if l2 == m-2 {
+					// Convergence of bottom singular value, return to top.
+					m--
+					continue
+				}
+				l2++
+			} else {
+				l2 = 0
+			}
+			// e[ll] through e[m-2] are nonzero, e[ll-1] is zero
+			if l2 == m-2 {
+				// Handle 2×2 block separately.
+				var sinr, cosr, sinl, cosl float64
+				d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1])
+				e[m-2] = 0
+				if ncvt > 0 {
+					bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr)
+				}
+				if nru > 0 {
+					bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl)
+				}
+				if ncc > 0 {
+					bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl)
+				}
+				m -= 2
+				continue
+			}
+			// If working on a new submatrix, choose shift direction from larger end
+			// diagonal element toward smaller.
+			if l2 > oldm-1 || m-1 < oldl2 {
+				if math.Abs(d[l2]) >= math.Abs(d[m-1]) {
+					idir = 1
+				} else {
+					idir = 2
+				}
+			}
+			// Apply convergence tests.
+			// TODO(btracey): There is a lot of similar looking code here. See
+			// if there is a better way to de-duplicate.
+			if idir == 1 {
+				// Run convergence test in forward direction.
+				// First apply standard test to bottom of matrix.
+				if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) {
+					e[m-2] = 0
+					continue
+				}
+				if tol >= 0 {
+					// If relative accuracy desired, apply convergence criterion forward.
+					mu := math.Abs(d[l2])
+					sminl = mu
+					for l3 := l2; l3 < m-1; l3++ {
+						if math.Abs(e[l3]) <= tol*mu {
+							e[l3] = 0
+							continue Outer
+						}
+						mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3])))
+						sminl = math.Min(sminl, mu)
+					}
+				}
+			} else {
+				// Run convergence test in backward direction.
+				// First apply standard test to top of matrix.
+				if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) {
+					e[l2] = 0
+					continue
+				}
+				if tol >= 0 {
+					// If relative accuracy desired, apply convergence criterion backward.
+					mu := math.Abs(d[m-1])
+					sminl = mu
+					for l3 := m - 2; l3 >= l2; l3-- {
+						if math.Abs(e[l3]) <= tol*mu {
+							e[l3] = 0
+							continue Outer
+						}
+						mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3])))
+						sminl = math.Min(sminl, mu)
+					}
+				}
+			}
+			oldl2 = l2
+			oldm = m
+			// Compute shift. First, test if shifting would ruin relative accuracy,
+			// and if so set the shift to zero.
+			var shift float64
+			if tol >= 0 && float64(n)*tol*(sminl/smax) <= math.Max(eps, (1.0/100)*tol) {
+				shift = 0
+			} else {
+				var sl2 float64
+				if idir == 1 {
+					sl2 = math.Abs(d[l2])
+					shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1])
+				} else {
+					sl2 = math.Abs(d[m-1])
+					shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1])
+				}
+				// Test if shift is negligible
+				if sl2 > 0 {
+					if (shift/sl2)*(shift/sl2) < eps {
+						shift = 0
+					}
+				}
+			}
+			iter += m - l2 + 1
+			// If no shift, do simplified QR iteration.
+			if shift == 0 {
+				if idir == 1 {
+					cs := 1.0
+					oldcs := 1.0
+					var sn, r, oldsn float64
+					for i := l2; i < m-1; i++ {
+						cs, sn, r = impl.Dlartg(d[i]*cs, e[i])
+						if i > l2 {
+							e[i-1] = oldsn * r
+						}
+						oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn)
+						work[i-l2] = cs
+						work[i-l2+nm1] = sn
+						work[i-l2+nm12] = oldcs
+						work[i-l2+nm13] = oldsn
+					}
+					h := d[m-1] * cs
+					d[m-1] = h * oldcs
+					e[m-2] = h * oldsn
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[m-2]) < thresh {
+						e[m-2] = 0
+					}
+				} else {
+					cs := 1.0
+					oldcs := 1.0
+					var sn, r, oldsn float64
+					for i := m - 1; i >= l2+1; i-- {
+						cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1])
+						if i < m-1 {
+							e[i] = oldsn * r
+						}
+						oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn)
+						work[i-l2-1] = cs
+						work[i-l2+nm1-1] = -sn
+						work[i-l2+nm12-1] = oldcs
+						work[i-l2+nm13-1] = -oldsn
+					}
+					h := d[l2] * cs
+					d[l2] = h * oldcs
+					e[l2] = h * oldsn
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[l2]) <= thresh {
+						e[l2] = 0
+					}
+				}
+			} else {
+				// Use nonzero shift.
+				if idir == 1 {
+					// Chase bulge from top to bottom. Save cosines and sines for
+					// later singular vector updates.
+					f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2])
+					g := e[l2]
+					var cosl, sinl float64
+					for i := l2; i < m-1; i++ {
+						cosr, sinr, r := impl.Dlartg(f, g)
+						if i > l2 {
+							e[i-1] = r
+						}
+						f = cosr*d[i] + sinr*e[i]
+						e[i] = cosr*e[i] - sinr*d[i]
+						g = sinr * d[i+1]
+						d[i+1] *= cosr
+						cosl, sinl, r = impl.Dlartg(f, g)
+						d[i] = r
+						f = cosl*e[i] + sinl*d[i+1]
+						d[i+1] = cosl*d[i+1] - sinl*e[i]
+						if i < m-2 {
+							g = sinl * e[i+1]
+							e[i+1] = cosl * e[i+1]
+						}
+						work[i-l2] = cosr
+						work[i-l2+nm1] = sinr
+						work[i-l2+nm12] = cosl
+						work[i-l2+nm13] = sinl
+					}
+					e[m-2] = f
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[m-2]) <= thresh {
+						e[m-2] = 0
+					}
+				} else {
+					// Chase bulge from top to bottom. Save cosines and sines for
+					// later singular vector updates.
+					f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1])
+					g := e[m-2]
+					for i := m - 1; i > l2; i-- {
+						cosr, sinr, r := impl.Dlartg(f, g)
+						if i < m-1 {
+							e[i] = r
+						}
+						f = cosr*d[i] + sinr*e[i-1]
+						e[i-1] = cosr*e[i-1] - sinr*d[i]
+						g = sinr * d[i-1]
+						d[i-1] *= cosr
+						cosl, sinl, r := impl.Dlartg(f, g)
+						d[i] = r
+						f = cosl*e[i-1] + sinl*d[i-1]
+						d[i-1] = cosl*d[i-1] - sinl*e[i-1]
+						if i > l2+1 {
+							g = sinl * e[i-2]
+							e[i-2] *= cosl
+						}
+						work[i-l2-1] = cosr
+						work[i-l2+nm1-1] = -sinr
+						work[i-l2+nm12-1] = cosl
+						work[i-l2+nm13-1] = -sinl
+					}
+					e[l2] = f
+					if math.Abs(e[l2]) <= thresh {
+						e[l2] = 0
+					}
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
+					}
+				}
+			}
+		}
+	}
+
+	// All singular values converged, make them positive.
+	for i := 0; i < n; i++ {
+		if d[i] < 0 {
+			d[i] *= -1
+			if ncvt > 0 {
+				bi.Dscal(ncvt, -1, vt[i*ldvt:], 1)
+			}
+		}
+	}
+
+	// Sort the singular values in decreasing order.
+	for i := 0; i < n-1; i++ {
+		isub := 0
+		smin := d[0]
+		for j := 1; j < n-i; j++ {
+			if d[j] <= smin {
+				isub = j
+				smin = d[j]
+			}
+		}
+		if isub != n-i {
+			// Swap singular values and vectors.
+			d[isub] = d[n-i-1]
+			d[n-i-1] = smin
+			if ncvt > 0 {
+				bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1)
+			}
+			if nru > 0 {
+				bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu)
+			}
+			if ncc > 0 {
+				bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1)
+			}
+		}
+	}
+	info = 0
+	for i := 0; i < n-1; i++ {
+		if e[i] != 0 {
+			info++
+		}
+	}
+	return info == 0
+}
--- a/native/lapack_test.go
+++ b/native/lapack_test.go
@@ -12,6 +12,10 @@ import (

 var impl = Implementation{}

+func TestDbdsqr(t *testing.T) {
+	testlapack.DbdsqrTest(t, impl)
+}
+
 func TestDgebd2(t *testing.T) {
 	testlapack.Dgebd2Test(t, impl)
 }
--- a/testlapack/dbdsqr.go
+++ b/testlapack/dbdsqr.go
@@ -0,0 +1,195 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"fmt"
+	"math/rand"
+	"sort"
+	"testing"
+
+	"github.com/gonum/blas"
+	"github.com/gonum/blas/blas64"
+	"github.com/gonum/floats"
+)
+
+type Dbdsqrer interface {
+	Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool)
+}
+
+func DbdsqrTest(t *testing.T, impl Dbdsqrer) {
+	bi := blas64.Implementation()
+	_ = bi
+	for _, uplo := range []blas.Uplo{blas.Upper, blas.Lower} {
+		for _, test := range []struct {
+			n, ncvt, nru, ncc, ldvt, ldu, ldc int
+		}{
+			{5, 5, 5, 5, 0, 0, 0},
+			{10, 10, 10, 10, 0, 0, 0},
+			{10, 11, 12, 13, 0, 0, 0},
+			{20, 13, 12, 11, 0, 0, 0},
+
+			{5, 5, 5, 5, 6, 7, 8},
+			{10, 10, 10, 10, 30, 40, 50},
+			{10, 12, 11, 13, 30, 40, 50},
+			{20, 12, 13, 11, 30, 40, 50},
+		} {
+			for cas := 0; cas < 100; cas++ {
+				n := test.n
+				ncvt := test.ncvt
+				nru := test.nru
+				ncc := test.ncc
+				ldvt := test.ldvt
+				ldu := test.ldu
+				ldc := test.ldc
+				if ldvt == 0 {
+					ldvt = ncvt
+				}
+				if ldu == 0 {
+					ldu = n
+				}
+				if ldc == 0 {
+					ldc = ncc
+				}
+
+				d := make([]float64, n)
+				for i := range d {
+					d[i] = rand.NormFloat64()
+				}
+				e := make([]float64, n-1)
+				for i := range e {
+					e[i] = rand.NormFloat64()
+				}
+				dCopy := make([]float64, len(d))
+				copy(dCopy, d)
+				eCopy := make([]float64, len(e))
+				copy(eCopy, e)
+				work := make([]float64, 4*n)
+				for i := range work {
+					work[i] = rand.NormFloat64()
+				}
+
+				// First test the decomposition of the bidiagonal matrix. Set
+				// pt and u equal to I with the correct size. At the result
+				// of Dbdsqr, p and u  will contain the data of P^T and Q, which
+				// will be used in the next step to test the multiplication
+				// with Q and VT.
+
+				q := make([]float64, n*n)
+				ldq := n
+				pt := make([]float64, n*n)
+				ldpt := n
+				for i := 0; i < n; i++ {
+					q[i*ldq+i] = 1
+				}
+				for i := 0; i < n; i++ {
+					pt[i*ldpt+i] = 1
+				}
+
+				ok := impl.Dbdsqr(uplo, n, n, n, 0, d, e, pt, ldpt, q, ldq, nil, 0, work)
+
+				isUpper := uplo == blas.Upper
+				errStr := fmt.Sprintf("isUpper = %v, n = %v, ncvt = %v, nru = %v, ncc = %v", isUpper, n, ncvt, nru, ncc)
+				if !ok {
+					t.Errorf("Unexpected Dbdsqr failure: %s", errStr)
+				}
+
+				bMat := constructBidiagonal(uplo, n, dCopy, eCopy)
+				sMat := constructBidiagonal(uplo, n, d, e)
+
+				tmp := blas64.General{
+					Rows:   n,
+					Cols:   n,
+					Stride: n,
+					Data:   make([]float64, n*n),
+				}
+				ansMat := blas64.General{
+					Rows:   n,
+					Cols:   n,
+					Stride: n,
+					Data:   make([]float64, n*n),
+				}
+
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, n, n, n, 1, q, ldq, sMat.Data, sMat.Stride, 0, tmp.Data, tmp.Stride)
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, n, n, n, 1, tmp.Data, tmp.Stride, pt, ldpt, 0, ansMat.Data, ansMat.Stride)
+
+				same := true
+				for i := 0; i < n; i++ {
+					for j := 0; j < n; j++ {
+						if !floats.EqualWithinAbsOrRel(ansMat.Data[i*ansMat.Stride+j], bMat.Data[i*bMat.Stride+j], 1e-8, 1e-8) {
+							same = false
+						}
+					}
+				}
+				if !same {
+					t.Errorf("Bidiagonal mismatch. %s", errStr)
+				}
+				if !sort.IsSorted(sort.Reverse(sort.Float64Slice(d))) {
+					t.Errorf("D is not sorted. %s", errStr)
+				}
+
+				// The above computed the real P and Q. Now input data for V^T,
+				// U, and C to check that the multiplications happen properly.
+				dAns := make([]float64, len(d))
+				copy(dAns, d)
+				eAns := make([]float64, len(e))
+				copy(eAns, e)
+
+				u := make([]float64, nru*ldu)
+				for i := range u {
+					u[i] = rand.NormFloat64()
+				}
+				uCopy := make([]float64, len(u))
+				copy(uCopy, u)
+				vt := make([]float64, n*ldvt)
+				for i := range vt {
+					vt[i] = rand.NormFloat64()
+				}
+				vtCopy := make([]float64, len(vt))
+				copy(vtCopy, vt)
+				c := make([]float64, n*ldc)
+				for i := range c {
+					c[i] = rand.NormFloat64()
+				}
+				cCopy := make([]float64, len(c))
+				copy(cCopy, c)
+
+				// Reset input data
+				copy(d, dCopy)
+				copy(e, eCopy)
+				impl.Dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work)
+
+				// Check result.
+				if !floats.EqualApprox(d, dAns, 1e-14) {
+					t.Errorf("D mismatch second time. %s", errStr)
+				}
+				if !floats.EqualApprox(e, eAns, 1e-14) {
+					t.Errorf("E mismatch second time. %s", errStr)
+				}
+				ans := make([]float64, len(vtCopy))
+				copy(ans, vtCopy)
+				ldans := ldvt
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, n, ncvt, n, 1, pt, ldpt, vtCopy, ldvt, 0, ans, ldans)
+				if !floats.EqualApprox(ans, vt, 1e-10) {
+					t.Errorf("Vt result mismatch. %s", errStr)
+				}
+				ans = make([]float64, len(uCopy))
+				copy(ans, uCopy)
+				ldans = ldu
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, nru, n, n, 1, uCopy, ldu, q, ldq, 0, ans, ldans)
+				if !floats.EqualApprox(ans, u, 1e-10) {
+					t.Errorf("U result mismatch. %s", errStr)
+				}
+				ans = make([]float64, len(cCopy))
+				copy(ans, cCopy)
+				ldans = ldc
+				bi.Dgemm(blas.Trans, blas.NoTrans, n, ncc, n, 1, q, ldq, cCopy, ldc, 0, ans, ldans)
+				if !floats.EqualApprox(ans, c, 1e-10) {
+					t.Errorf("C result mismatch. %s", errStr)
+				}
+			}
+		}
+	}
+}
--- a/testlapack/dgetri.go
+++ b/testlapack/dgetri.go
@@ -67,11 +67,11 @@ func DgetriTest(t *testing.T, impl Dgetrier) {
 				if i == j {
 					// This tolerance is so high because computing matrix inverses
 					// is very unstable.
-					if math.Abs(ans[i*lda+j]-1) > 2e-2 {
+					if math.Abs(ans[i*lda+j]-1) > 5e-2 {
 						isEye = false
 					}
 				} else {
-					if math.Abs(ans[i*lda+j]) > 2e-2 {
+					if math.Abs(ans[i*lda+j]) > 5e-2 {
 						isEye = false
 					}
 				}
--- a/testlapack/dgetrs.go
+++ b/testlapack/dgetrs.go
@@ -34,11 +34,11 @@ func DgetrsTest(t *testing.T, impl Dgetrser) {

 			{300, 300, 0, 0, 1e-8},
 			{300, 500, 0, 0, 1e-8},
-			{500, 300, 0, 0, 1e-8},
+			{500, 300, 0, 0, 1e-6},

 			{300, 300, 700, 600, 1e-8},
 			{300, 500, 700, 600, 1e-8},
-			{500, 300, 700, 600, 1e-8},
+			{500, 300, 700, 600, 1e-6},
 		} {
 			n := test.n
 			nrhs := test.nrhs
--- a/testlapack/general.go
+++ b/testlapack/general.go
@@ -97,6 +97,28 @@ func extractVMat(m, n int, a []float64, lda int, direct lapack.Direct, store lap
 	}
 }

+// constructBidiagonal constructs a bidiagonal matrix with the given diagonal
+// and off-diagonal elements.
+func constructBidiagonal(uplo blas.Uplo, n int, d, e []float64) blas64.General {
+	bMat := blas64.General{
+		Rows:   n,
+		Cols:   n,
+		Stride: n,
+		Data:   make([]float64, n*n),
+	}
+
+	for i := 0; i < n-1; i++ {
+		bMat.Data[i*bMat.Stride+i] = d[i]
+		if uplo == blas.Upper {
+			bMat.Data[i*bMat.Stride+i+1] = e[i]
+		} else {
+			bMat.Data[(i+1)*bMat.Stride+i] = e[i]
+		}
+	}
+	bMat.Data[(n-1)*bMat.Stride+n-1] = d[n-1]
+	return bMat
+}
+
 // constructVMat transforms the v matrix based on the storage.
 func constructVMat(vMat blas64.General, store lapack.StoreV, direct lapack.Direct) blas64.General {
 	m := vMat.Rows