Files
gonum/lapack/native/dbdsqr.go
2017-05-23 00:03:03 -06:00

489 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/blas64"
"gonum.org/v1/gonum/lapack"
)
// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
//
// The SVD of the bidiagonal matrix B is
// B = Q * S * P^T
// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
// left singular vectors, and P is an orthogonal matrix of right singular vectors.
//
// Q and P are only computed if requested. If left singular vectors are requested,
// this routine returns U * Q instead of Q, and if right singular vectors are
// requested P^T * VT is returned instead of P^T.
//
// Frequently Dbdsqr is used in conjunction with Dgebrd which reduces a general
// matrix A into bidiagonal form. In this case, the SVD of A is
// A = (U * Q) * S * (P^T * VT)
//
// This routine may also compute Q^T * C.
//
// d and e contain the elements of the bidiagonal matrix b. d must have length at
// least n, and e must have length at least n-1. Dbdsqr will panic if there is
// insufficient length. On exit, D contains the singular values of B in decreasing
// order.
//
// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
//
// U is a matrix of size nru×n whose elements are stored in u. The elements
// of u are modified to contain U * Q on exit. U is not used if nru == 0.
//
// C is a matrix of size n×ncc whose elements are stored in c. The elements
// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
//
// work contains temporary storage and must have length at least 4*n. Dbdsqr
// will panic if there is insufficient working memory.
//
// Dbdsqr returns whether the decomposition was successful.
//
// Dbdsqr is an internal routine. It is exported for testing purposes.
func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
if uplo != blas.Upper && uplo != blas.Lower {
panic(badUplo)
}
if ncvt != 0 {
checkMatrix(n, ncvt, vt, ldvt)
}
if nru != 0 {
checkMatrix(nru, n, u, ldu)
}
if ncc != 0 {
checkMatrix(n, ncc, c, ldc)
}
if len(d) < n {
panic(badD)
}
if len(e) < n-1 {
panic(badE)
}
if len(work) < 4*n {
panic(badWork)
}
var info int
bi := blas64.Implementation()
const (
maxIter = 6
)
if n == 0 {
return true
}
if n != 1 {
// If the singular vectors do not need to be computed, use qd algorithm.
if !(ncvt > 0 || nru > 0 || ncc > 0) {
info = impl.Dlasq1(n, d, e, work)
// If info is 2 dqds didn't finish, and so try to.
if info != 2 {
return info == 0
}
info = 0
}
nm1 := n - 1
nm12 := nm1 + nm1
nm13 := nm12 + nm1
idir := 0
eps := dlamchE
unfl := dlamchS
lower := uplo == blas.Lower
var cs, sn, r float64
if lower {
for i := 0; i < n-1; i++ {
cs, sn, r = impl.Dlartg(d[i], e[i])
d[i] = r
e[i] = sn * d[i+1]
d[i+1] *= cs
work[i] = cs
work[nm1+i] = sn
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc)
}
}
// Compute singular values to a relative accuracy of tol. If tol is negative
// the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b)
tolmul := math.Max(10, math.Min(100, math.Pow(eps, -1.0/8)))
tol := tolmul * eps
var smax float64
for i := 0; i < n; i++ {
smax = math.Max(smax, math.Abs(d[i]))
}
for i := 0; i < n-1; i++ {
smax = math.Max(smax, math.Abs(e[i]))
}
var sminl float64
var thresh float64
if tol >= 0 {
sminoa := math.Abs(d[0])
if sminoa != 0 {
mu := sminoa
for i := 1; i < n; i++ {
mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1])))
sminoa = math.Min(sminoa, mu)
if sminoa == 0 {
break
}
}
}
sminoa = sminoa / math.Sqrt(float64(n))
thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl)
} else {
thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl)
}
// Prepare for the main iteration loop for the singular values.
maxIt := maxIter * n * n
iter := 0
oldl2 := -1
oldm := -1
// m points to the last element of unconverged part of matrix.
m := n
Outer:
for m > 1 {
if iter > maxIt {
info = 0
for i := 0; i < n-1; i++ {
if e[i] != 0 {
info++
}
}
return info == 0
}
// Find diagonal block of matrix to work on.
if tol < 0 && math.Abs(d[m-1]) <= thresh {
d[m-1] = 0
}
smax = math.Abs(d[m-1])
smin := smax
var l2 int
var broke bool
for l3 := 0; l3 < m-1; l3++ {
l2 = m - l3 - 2
abss := math.Abs(d[l2])
abse := math.Abs(e[l2])
if tol < 0 && abss <= thresh {
d[l2] = 0
}
if abse <= thresh {
broke = true
break
}
smin = math.Min(smin, abss)
smax = math.Max(math.Max(smax, abss), abse)
}
if broke {
e[l2] = 0
if l2 == m-2 {
// Convergence of bottom singular value, return to top.
m--
continue
}
l2++
} else {
l2 = 0
}
// e[ll] through e[m-2] are nonzero, e[ll-1] is zero
if l2 == m-2 {
// Handle 2×2 block separately.
var sinr, cosr, sinl, cosl float64
d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1])
e[m-2] = 0
if ncvt > 0 {
bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr)
}
if nru > 0 {
bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl)
}
if ncc > 0 {
bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl)
}
m -= 2
continue
}
// If working on a new submatrix, choose shift direction from larger end
// diagonal element toward smaller.
if l2 > oldm-1 || m-1 < oldl2 {
if math.Abs(d[l2]) >= math.Abs(d[m-1]) {
idir = 1
} else {
idir = 2
}
}
// Apply convergence tests.
// TODO(btracey): There is a lot of similar looking code here. See
// if there is a better way to de-duplicate.
if idir == 1 {
// Run convergence test in forward direction.
// First apply standard test to bottom of matrix.
if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) {
e[m-2] = 0
continue
}
if tol >= 0 {
// If relative accuracy desired, apply convergence criterion forward.
mu := math.Abs(d[l2])
sminl = mu
for l3 := l2; l3 < m-1; l3++ {
if math.Abs(e[l3]) <= tol*mu {
e[l3] = 0
continue Outer
}
mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3])))
sminl = math.Min(sminl, mu)
}
}
} else {
// Run convergence test in backward direction.
// First apply standard test to top of matrix.
if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) {
e[l2] = 0
continue
}
if tol >= 0 {
// If relative accuracy desired, apply convergence criterion backward.
mu := math.Abs(d[m-1])
sminl = mu
for l3 := m - 2; l3 >= l2; l3-- {
if math.Abs(e[l3]) <= tol*mu {
e[l3] = 0
continue Outer
}
mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3])))
sminl = math.Min(sminl, mu)
}
}
}
oldl2 = l2
oldm = m
// Compute shift. First, test if shifting would ruin relative accuracy,
// and if so set the shift to zero.
var shift float64
if tol >= 0 && float64(n)*tol*(sminl/smax) <= math.Max(eps, (1.0/100)*tol) {
shift = 0
} else {
var sl2 float64
if idir == 1 {
sl2 = math.Abs(d[l2])
shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1])
} else {
sl2 = math.Abs(d[m-1])
shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1])
}
// Test if shift is negligible
if sl2 > 0 {
if (shift/sl2)*(shift/sl2) < eps {
shift = 0
}
}
}
iter += m - l2 + 1
// If no shift, do simplified QR iteration.
if shift == 0 {
if idir == 1 {
cs := 1.0
oldcs := 1.0
var sn, r, oldsn float64
for i := l2; i < m-1; i++ {
cs, sn, r = impl.Dlartg(d[i]*cs, e[i])
if i > l2 {
e[i-1] = oldsn * r
}
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn)
work[i-l2] = cs
work[i-l2+nm1] = sn
work[i-l2+nm12] = oldcs
work[i-l2+nm13] = oldsn
}
h := d[m-1] * cs
d[m-1] = h * oldcs
e[m-2] = h * oldsn
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
}
if math.Abs(e[m-2]) < thresh {
e[m-2] = 0
}
} else {
cs := 1.0
oldcs := 1.0
var sn, r, oldsn float64
for i := m - 1; i >= l2+1; i-- {
cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1])
if i < m-1 {
e[i] = oldsn * r
}
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn)
work[i-l2-1] = cs
work[i-l2+nm1-1] = -sn
work[i-l2+nm12-1] = oldcs
work[i-l2+nm13-1] = -oldsn
}
h := d[l2] * cs
d[l2] = h * oldcs
e[l2] = h * oldsn
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
}
if math.Abs(e[l2]) <= thresh {
e[l2] = 0
}
}
} else {
// Use nonzero shift.
if idir == 1 {
// Chase bulge from top to bottom. Save cosines and sines for
// later singular vector updates.
f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2])
g := e[l2]
var cosl, sinl float64
for i := l2; i < m-1; i++ {
cosr, sinr, r := impl.Dlartg(f, g)
if i > l2 {
e[i-1] = r
}
f = cosr*d[i] + sinr*e[i]
e[i] = cosr*e[i] - sinr*d[i]
g = sinr * d[i+1]
d[i+1] *= cosr
cosl, sinl, r = impl.Dlartg(f, g)
d[i] = r
f = cosl*e[i] + sinl*d[i+1]
d[i+1] = cosl*d[i+1] - sinl*e[i]
if i < m-2 {
g = sinl * e[i+1]
e[i+1] = cosl * e[i+1]
}
work[i-l2] = cosr
work[i-l2+nm1] = sinr
work[i-l2+nm12] = cosl
work[i-l2+nm13] = sinl
}
e[m-2] = f
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
}
if math.Abs(e[m-2]) <= thresh {
e[m-2] = 0
}
} else {
// Chase bulge from top to bottom. Save cosines and sines for
// later singular vector updates.
f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1])
g := e[m-2]
for i := m - 1; i > l2; i-- {
cosr, sinr, r := impl.Dlartg(f, g)
if i < m-1 {
e[i] = r
}
f = cosr*d[i] + sinr*e[i-1]
e[i-1] = cosr*e[i-1] - sinr*d[i]
g = sinr * d[i-1]
d[i-1] *= cosr
cosl, sinl, r := impl.Dlartg(f, g)
d[i] = r
f = cosl*e[i-1] + sinl*d[i-1]
d[i-1] = cosl*d[i-1] - sinl*e[i-1]
if i > l2+1 {
g = sinl * e[i-2]
e[i-2] *= cosl
}
work[i-l2-1] = cosr
work[i-l2+nm1-1] = -sinr
work[i-l2+nm12-1] = cosl
work[i-l2+nm13-1] = -sinl
}
e[l2] = f
if math.Abs(e[l2]) <= thresh {
e[l2] = 0
}
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
}
}
}
}
}
// All singular values converged, make them positive.
for i := 0; i < n; i++ {
if d[i] < 0 {
d[i] *= -1
if ncvt > 0 {
bi.Dscal(ncvt, -1, vt[i*ldvt:], 1)
}
}
}
// Sort the singular values in decreasing order.
for i := 0; i < n-1; i++ {
isub := 0
smin := d[0]
for j := 1; j < n-i; j++ {
if d[j] <= smin {
isub = j
smin = d[j]
}
}
if isub != n-i {
// Swap singular values and vectors.
d[isub] = d[n-i-1]
d[n-i-1] = smin
if ncvt > 0 {
bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1)
}
if nru > 0 {
bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu)
}
if ncc > 0 {
bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1)
}
}
}
info = 0
for i := 0; i < n-1; i++ {
if e[i] != 0 {
info++
}
}
return info == 0
}