remove orig

This commit is contained in:
btracey
2016-01-06 21:38:42 -07:00
parent 7016112fd0
commit 909081d32b
13 changed files with 1059 additions and 1473 deletions

View File

@@ -498,6 +498,8 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
return clapack.Dgels(trans, m, n, nrhs, a, lda, b, ldb) return clapack.Dgels(trans, m, n, nrhs, a, lda, b, ldb)
} }
const noSVDO = "dgesvd: not coded for overwrite"
// Dgesvd computes the singular value decomposition of the input matrix A. // Dgesvd computes the singular value decomposition of the input matrix A.
// //
// The singular value decomposition is // The singular value decomposition is
@@ -509,12 +511,12 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
// //
// jobU and jobVT are options for computing the singular vectors. The behavior // jobU and jobVT are options for computing the singular vectors. The behavior
// is as follows // is as follows
// jobU == lapack.SVDAll All M columns of U are returned in u // jobU == lapack.SVDAll All m columns of U are returned in u
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u // jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a // jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
// jobU == lapack.SVDNone The columns of U are not computed. // jobU == lapack.SVDNone The columns of U are not computed.
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU // The behavior is the same for jobVT and the rows of V^T. At most one of jobU
// and jobVT can equal lapack.SVDOverwrite. // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
// //
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
// the data is overwritten. On exit, A contains the appropriate singular vectors // the data is overwritten. On exit, A contains the appropriate singular vectors
@@ -529,12 +531,12 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
// not used. // not used.
// //
// vt contains the left singular vectors on exit, stored rowwise. If // vt contains the left singular vectors on exit, stored rowwise. If
// jobV == lapack.SVDAll, vt is of size n×m. If jobV == lapack.SVDInPlace vt is // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
// of size min(m,n)×n. If jobU == lapack.SVDOverwrite or lapack.SVDNone, vt is // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
// not used. // not used.
// //
// The C interface does not support providing temporary storage. To provide compatibility // The C interface does not support providing temporary storage. To provide compatibility
// with native, lwork == -1 will not run Dgeqrf but will instead write the minimum // with native, lwork == -1 will not run Dgesvd but will instead write the minimum
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic. // work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic.
// //
// Dgesvd returns whether the decomposition successfully completed. // Dgesvd returns whether the decomposition successfully completed.
@@ -551,13 +553,13 @@ func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float
checkMatrix(min(m, n), n, vt, ldvt) checkMatrix(min(m, n), n, vt, ldvt)
} }
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite { if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
panic("lapack: both jobU and jobVT are lapack.SVDOverwrite") panic(noSVDO)
} }
if len(s) < min(m, n) { if len(s) < min(m, n) {
panic(badS) panic(badS)
} }
if jobU != lapack.SVDAll || jobVT != lapack.SVDAll { if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
panic("lapack: SVD only coded for SVDAll job inputs") panic("lapack: SVD not coded to overwrite original matrix")
} }
minWork := max(5*min(m, n), 3*min(m, n)+max(m, n)) minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
if lwork != -1 { if lwork != -1 {
@@ -572,7 +574,7 @@ func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float
work[0] = float64(minWork) work[0] = float64(minWork)
return true return true
} }
return clapack.Dgesvd(byte(jobU), byte(jobVT), m, n, a, lda, s, u, ldu, vt, ldvt, work) return clapack.Dgesvd(lapack.Job(jobU), lapack.Job(jobVT), m, n, a, lda, s, u, ldu, vt, ldvt, work[1:])
} }
// Dgetf2 computes the LU decomposition of the m×n matrix A. // Dgetf2 computes the LU decomposition of the m×n matrix A.

View File

@@ -88,6 +88,10 @@ func TestDgeqrf(t *testing.T) {
testlapack.DgeqrfTest(t, impl) testlapack.DgeqrfTest(t, impl)
} }
func TestDgesvd(t *testing.T) {
testlapack.DgesvdTest(t, impl)
}
func TestDgetf2(t *testing.T) { func TestDgetf2(t *testing.T) {
testlapack.Dgetf2Test(t, impl) testlapack.Dgetf2Test(t, impl)
} }

View File

@@ -104,7 +104,7 @@ type SVDJob byte
const ( const (
SVDAll SVDJob = 'A' // Compute all singular vectors SVDAll SVDJob = 'A' // Compute all singular vectors
SVDInPlace = 'S' // Compute the first singular vectors and store in provided storage. SVDInPlace = 'S' // Compute the first singular vectors and store them in provided storage.
SVDOverwrite = 'O' // Compute the singular vectors and store in input matrix SVDOverwrite = 'O' // Compute the singular vectors and store them in input matrix
SVDNone = 'N' // Do not compute singular vectors SVDNone = 'N' // Do not compute singular vectors
) )

View File

@@ -1,497 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/lapack"
)
// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
//
// The SVD of the bidiagonal matrix B is
// B = Q * S * P^T
// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
// left singular vectors, and P is an orthogonal matrix of right singular vectors.
//
// Q and P are only computed if requested. If left singular vectors are requested,
// this routine returns U * Q instead of Q, and if right singular vectors are
// requested P^T * VT is returned instead of P^T.
//
// Frequently Dbdsqr is used in conjuction with Dgebrd which reduces a general
// matrix A into bidiagonal form. In this case, the SVD of A is
// A = (U * Q) * S * (P^T * VT)
//
// This routine may also compute Q^T * C.
//
// d and e contain the elements of the bidiagonal matrix b. d must have length at
// least n, and e must have length at least n-1. Dbdsqr will panic if there is
// insufficient length. On exit, D contains the singular values of B in decreasing
// order.
//
// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
//
// U is a matrix of size nru×n whose elements are stored in u. The elements
// of u are modified to contain U * Q on exit. U is not used if nru == 0.
//
// C is a matrix of size n×ncc whose elements are stored in c. The elements
// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
//
// work contains temporary storage and must have length at least 4*n. Dbdsqr
// will panic if there is insufficient working memory.
//
// Dbdsqr returns whether the decomposition was successful.
func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
if uplo != blas.Upper && uplo != blas.Lower {
panic(badUplo)
}
if ncvt != 0 {
checkMatrix(n, ncvt, vt, ldvt)
}
if nru != 0 {
checkMatrix(nru, n, u, ldu)
}
if ncc != 0 {
checkMatrix(n, ncc, c, ldc)
}
if len(d) < n {
panic(badD)
}
if len(e) < n-1 {
panic(badE)
}
if len(work) < 4*n {
panic(badWork)
}
var info int
bi := blas64.Implementation()
const (
maxIter = 6
)
if n == 0 {
return true
}
if n != 1 {
// If the singular vectors do not need to be computed, use qd algorithm.
if !(ncvt > 0 || nru > 0 || ncc > 0) {
info = impl.Dlasq1(n, d, e, work)
// If info is 2 dqds didn't finish, and so try to.
if info != 2 {
return info == 0
}
info = 0
}
nm1 := n - 1
nm12 := nm1 + nm1
nm13 := nm12 + nm1
idir := 0
eps := dlamchE
unfl := dlamchS
lower := uplo == blas.Lower
var cs, sn, r float64
if lower {
for i := 0; i < n-1; i++ {
cs, sn, r = impl.Dlartg(d[i], e[i])
d[i] = r
e[i] = sn * d[i+1]
d[i+1] *= cs
work[i] = cs
work[nm1+i] = sn
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc)
}
}
// Compute singular values to a relative accuracy of tol. If tol is negative
// the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b)
tolmul := math.Max(10, math.Min(100, eps*(-1.0/8)))
tol := tolmul * eps
var smax float64
for i := 0; i < n; i++ {
smax = math.Max(smax, math.Abs(d[i]))
}
for i := 0; i < n-1; i++ {
smax = math.Max(smax, math.Abs(e[i]))
}
var sminl float64
var thresh float64
if tol >= 0 {
sminoa := math.Abs(d[0])
if sminoa != 0 {
mu := sminoa
for i := 1; i < n; i++ {
mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1])))
sminoa = math.Min(sminoa, mu)
if sminoa == 0 {
break
}
}
}
sminoa = sminoa / math.Sqrt(float64(n))
thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl)
} else {
thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl)
}
// Prepare for the main iteration loop for the singular values.
maxIt := maxIter * n * n
iter := 0
oldl2 := -1
oldm := -1
// m points to the last element of unconverged part of matrix.
m := n
Outer:
for m > 1 {
if iter > maxIt {
info = 0
for i := 0; i < n-1; i++ {
if e[i] != 0 {
info++
}
}
return info == 0
}
// Find diagonal block of matrix to work on.
if tol < 0 && math.Abs(d[m-1]) <= thresh {
d[m-1] = 0
}
smax = math.Abs(d[m-1])
smin := smax
var l2 int
var broke bool
for l3 := 0; l3 < m-1; l3++ {
l2 = m - l3 - 2
abss := math.Abs(d[l2])
abse := math.Abs(e[l2])
if tol < 0 && abss <= thresh {
d[l2] = 0
}
if abse <= thresh {
broke = true
e[l2] = 0
if l2 == m-2 {
// Convergence of bottom singular value, return to top.
m--
continue Outer
}
l2++
break
}
smin = math.Min(smin, abss)
smax = math.Max(math.Max(smax, abss), abse)
}
<<<<<<< HEAD
if broke {
e[l2] = 0
if l2 == m-2 {
// Convergence of bottom singular value, return to top.
m--
continue
}
l2++
} else {
=======
if !broke {
>>>>>>> 618d4f1... simplify loop condition
l2 = 0
}
// e[ll] through e[m-2] are nonzero, e[ll-1] is zero
if l2 == m-2 {
// Handle 2×2 block separately.
var sinr, cosr, sinl, cosl float64
d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1])
e[m-2] = 0
if ncvt > 0 {
bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr)
}
if nru > 0 {
bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl)
}
if ncc > 0 {
bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl)
}
m -= 2
continue
}
// If working on a new submatrix, choose shift direction from larger end
// diagonal element toward smaller.
if l2 > oldm-1 || m-1 < oldl2 {
if math.Abs(d[l2]) >= math.Abs(d[m-1]) {
idir = 1
} else {
idir = 2
}
}
// Apply convergence tests.
// TODO(btracey): There is a lot of similar looking code here. See
// if there is a better way to de-duplicate.
if idir == 1 {
// Run convergence test in forward direction.
// First apply standard test to bottom of matrix.
if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) {
e[m-2] = 0
continue
}
if tol >= 0 {
// If relative accuracy desired, apply convergence criterion forward.
mu := math.Abs(d[l2])
sminl = mu
for l3 := l2; l3 < m-1; l3++ {
if math.Abs(e[l3]) <= tol*mu {
e[l3] = 0
continue Outer
}
mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3])))
sminl = math.Min(sminl, mu)
}
}
} else {
// Run convergence test in backward direction.
// First apply standard test to top of matrix.
if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) {
e[l2] = 0
continue
}
if tol >= 0 {
// If relative accuracy desired, apply convergence criterion backward.
mu := math.Abs(d[m-1])
sminl = mu
for l3 := m - 2; l3 >= l2; l3-- {
if math.Abs(e[l3]) <= tol*mu {
e[l3] = 0
continue Outer
}
mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3])))
sminl = math.Min(sminl, mu)
}
}
}
oldl2 = l2
oldm = m
// Compute shift. First, test if shifting would ruin relative accuracy,
// and if so set the shift to zero.
var shift float64
if tol >= 0 && float64(n)*tol*(sminl/smax) <= math.Max(eps, (1.0/100)*tol) {
shift = 0
} else {
var sl2 float64
if idir == 1 {
sl2 = math.Abs(d[l2])
shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1])
} else {
sl2 = math.Abs(d[m-1])
shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1])
}
// Test if shift is negligible
if sl2 > 0 {
if (shift/sl2)*(shift/sl2) < eps {
shift = 0
}
}
}
iter += m - l2 + 1
// If no shift, do simplified QR iteration.
if shift == 0 {
if idir == 1 {
cs := 1.0
oldcs := 1.0
var sn, r, oldsn float64
for i := l2; i < m-1; i++ {
cs, sn, r = impl.Dlartg(d[i]*cs, e[i])
if i > l2 {
e[i-1] = oldsn * r
}
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn)
work[i-l2] = cs
work[i-l2+nm1] = sn
work[i-l2+nm12] = oldcs
work[i-l2+nm13] = oldsn
}
h := d[m-1] * cs
d[m-1] = h * oldcs
e[m-2] = h * oldsn
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
}
if math.Abs(e[m-2]) < thresh {
e[m-2] = 0
}
} else {
cs := 1.0
oldcs := 1.0
var sn, r, oldsn float64
for i := m - 1; i >= l2+1; i-- {
cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1])
if i < m-1 {
e[i] = oldsn * r
}
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn)
work[i-l2-1] = cs
work[i-l2+nm1-1] = -sn
work[i-l2+nm12-1] = oldcs
work[i-l2+nm13-1] = -oldsn
}
h := d[l2] * cs
d[l2] = h * oldcs
e[l2] = h * oldsn
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
}
if math.Abs(e[l2]) <= thresh {
e[l2] = 0
}
}
} else {
// Use nonzero shift.
if idir == 1 {
// Chase bulge from top to bottom. Save cosines and sines for
// later singular vector updates.
f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2])
g := e[l2]
var cosl, sinl float64
for i := l2; i < m-1; i++ {
cosr, sinr, r := impl.Dlartg(f, g)
if i > l2 {
e[i-1] = r
}
f = cosr*d[i] + sinr*e[i]
e[i] = cosr*e[i] - sinr*d[i]
g = sinr * d[i+1]
d[i+1] *= cosr
cosl, sinl, r = impl.Dlartg(f, g)
d[i] = r
f = cosl*e[i] + sinl*d[i+1]
d[i+1] = cosl*d[i+1] - sinl*e[i]
if i < m-2 {
g = sinl * e[i+1]
e[i+1] = cosl * e[i+1]
}
work[i-l2] = cosr
work[i-l2+nm1] = sinr
work[i-l2+nm12] = cosl
work[i-l2+nm13] = sinl
}
e[m-2] = f
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
}
if math.Abs(e[m-2]) <= thresh {
e[m-2] = 0
}
} else {
// Chase bulge from top to bottom. Save cosines and sines for
// later singular vector updates.
f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1])
g := e[m-2]
for i := m - 1; i > l2; i-- {
cosr, sinr, r := impl.Dlartg(f, g)
if i < m-1 {
e[i] = r
}
f = cosr*d[i] + sinr*e[i-1]
e[i-1] = cosr*e[i-1] - sinr*d[i]
g = sinr * d[i-1]
d[i-1] *= cosr
cosl, sinl, r := impl.Dlartg(f, g)
d[i] = r
f = cosl*e[i-1] + sinl*d[i-1]
d[i-1] = cosl*d[i-1] - sinl*e[i-1]
if i > l2+1 {
g = sinl * e[i-2]
e[i-2] *= cosl
}
work[i-l2-1] = cosr
work[i-l2+nm1-1] = -sinr
work[i-l2+nm12-1] = cosl
work[i-l2+nm13-1] = -sinl
}
e[l2] = f
if math.Abs(e[l2]) <= thresh {
e[l2] = 0
}
if ncvt > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
}
if nru > 0 {
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
}
if ncc > 0 {
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
}
}
}
}
}
// All singular values converged, make them positive.
for i := 0; i < n; i++ {
if d[i] < 0 {
d[i] *= -1
if ncvt > 0 {
bi.Dscal(ncvt, -1, vt[i*ldvt:], 1)
}
}
}
// Sort the singular values in decreasing order.
for i := 0; i < n-1; i++ {
isub := 0
smin := d[0]
for j := 1; j < n-i; j++ {
if d[j] <= smin {
isub = j
smin = d[j]
}
}
if isub != n-i {
// Swap singular values and vectors.
d[isub] = d[n-i-1]
d[n-i-1] = smin
if ncvt > 0 {
bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1)
}
if nru > 0 {
bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu)
}
if ncc > 0 {
bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1)
}
}
}
info = 0
for i := 0; i < n-1; i++ {
if e[i] != 0 {
info++
}
}
return info == 0
}

View File

@@ -86,7 +86,7 @@ func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, ta
if lwork >= (m+n)*nbmin { if lwork >= (m+n)*nbmin {
nb = lwork / (m + n) nb = lwork / (m + n)
} else { } else {
nb = 1 nb = minmn
nx = minmn nx = minmn
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -1,721 +0,0 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/lapack"
)
// Dgesvd computes the singular value decomposition of the input matrix A.
//
// Only coded for jobU == lapack.SVDAll and jobVT == lapack.SVDAll.
//
// The singular value decomposition is
// A = U * Sigma * V^T
// where Sigma is an m×n diagonal matrix containing the singular values of A,
// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
// min(m,n) columns of U and V are the left and right singular vectors of A
// respectively.
//
// jobU and jobVT are options for computing the singular vectors. The behavior
// is as follows
// jobU == lapack.SVDAll All M columns of U are returned in u
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
// jobU == lapack.SVDNone The columns of U are not computed.
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
// and jobVT can equal lapack.SVDOverwrite.
//
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
// the data is overwritten. On exit, A contains the appropriate singular vectors
// if either job is lapack.SVDOverwrite.
//
// s is a slice of length at least min(m,n) and on exit contains the singular
// values in decreasing order.
//
// u contains the left singular vectors on exit, stored columnwise. If
// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
// not used.
//
// vt contains the left singular vectors on exit, stored rowwise. If
// jobV == lapack.SVDAll, vt is of size n×m. If jobV == lapack.SVDInPlace vt is
// of size min(m,n)×n. If jobU == lapack.SVDOverwrite or lapack.SVDNone, vt is
// not used.
//
// work is a slice for storing temporary memory, and lwork is the usable size of
// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
// If lwork == -1, instead of performing Dgesvd, the optimal work length will be
// stored into work[0]. Dgesvd will panic if the working memory has insufficient
// storage.
//
// Dgesvd returns whether the decomposition successfully completed.
func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
checkMatrix(m, n, a, lda)
if jobU == lapack.SVDAll {
checkMatrix(m, m, u, ldu)
} else if jobU == lapack.SVDInPlace {
checkMatrix(m, min(m, n), u, ldu)
}
if jobVT == lapack.SVDAll {
checkMatrix(n, n, vt, ldvt)
} else if jobVT == lapack.SVDInPlace {
checkMatrix(min(m, n), n, vt, ldvt)
}
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
}
if len(s) < min(m, n) {
panic(badS)
}
if jobU != lapack.SVDAll || jobVT != lapack.SVDAll {
panic("lapack: SVD only coded for SVDAll job inputs")
}
minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
if lwork != -1 {
if len(work) < lwork {
panic(badWork)
}
if lwork < minWork {
panic(badWork)
}
}
if m == 0 || n == 0 {
return true
}
minmn := min(m, n)
wantua := jobU == lapack.SVDAll
wantus := jobU == lapack.SVDInPlace
wantuas := wantua || wantus
wantuo := jobU == lapack.SVDOverwrite
wantun := jobU == lapack.None
wantva := jobVT == lapack.SVDAll
wantvs := jobVT == lapack.SVDInPlace
wantvas := wantva || wantvs
wantvo := jobVT == lapack.SVDOverwrite
wantvn := jobVT == lapack.None
bi := blas64.Implementation()
var mnthr int
// The netlib implementation checks has this at only length 1. Our implementation
// checks all input sizes before examining the l == -1 case.
dum := make([]float64, m*n)
// Compute optimal space for subroutines.
maxwrk := 1
opts := string(jobU) + string(jobVT)
var wrkbl, bdspac int
if m >= n {
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
bdspac = 5 * n
impl.Dgeqrf(m, n, a, lda, dum, dum, -1)
lwork_dgeqrf := int(dum[0])
impl.Dorgqr(m, n, n, a, lda, dum, dum, -1)
lwork_dorgqr_n := int(dum[0])
impl.Dorgqr(m, m, n, a, lda, dum, dum, -1)
lwork_dorgqr_m := int(dum[0])
impl.Dgebrd(n, n, a, lda, s, dum, dum, dum, dum, -1)
lwork_dgebrd := int(dum[0])
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, dum, dum, -1)
lwork_dorgbr_p := int(dum[0])
impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, dum, dum, -1)
lwork_dorgbr_q := int(dum[0])
if m >= mnthr {
// m >> n
if wantun {
// Path 1
maxwrk = n + lwork_dgeqrf
maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
if wantvo || wantvas {
maxWork = max(maxwrk, 3*n+lwork_dorgbr_p)
}
maxwrk = max(maxwrk, bdspac)
} else if wantuo && wantvn {
// Path 2
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
} else if wantuo && wantvs {
// Path 3
// or lapack.All
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
} else if wantus && wantvn {
// Path 4
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantus && wantvo {
// Path 5
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*n*n + wrkbl
} else if wantus && wantvas {
// Path 6
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantua && wantvn {
// Path 7
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantua && wantvo {
// Path 8
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*n*n + wrkbl
} else if wantua && wantvas {
// Path 9
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
}
} else {
// Path 10: m > n
impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
lwork_dgebrd := int(dum[0])
maxwrk = 3*n + lwork_dgebrd
if wantus || wantuo {
impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, dum, dum, -1)
lwork_dorgbr_q = int(dum[0])
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
}
if wantua {
impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, dum, dum, -1)
lwork_dorgbr_q := int(dum[0])
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
}
if !wantvn {
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
}
maxwrk = max(maxwrk, bdspac)
}
} else {
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
bdspac = 5 * m
impl.Dgelqf(m, n, a, lda, dum, dum, -1)
lwork_dgelqf := int(dum[0])
impl.Dorglq(n, n, m, dum, n, dum, dum, -1)
lwork_dorglq_n := int(dum[0])
impl.Dorglq(m, n, m, a, lda, dum, dum, -1)
lwork_dorglq_m := int(dum[0])
impl.Dgebrd(m, m, a, lda, s, dum, dum, dum, dum, -1)
lwork_dgebrd := int(dum[0])
impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, dum, dum, -1)
lwork_dorgbr_p := int(dum[0])
impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, dum, dum, -1)
lwork_dorgbr_q := int(dum[0])
if n >= mnthr {
// n >> m
if wantvn {
// Path 1t
maxwrk = m + lwork_dgelqf
maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
if wntuo.OR.wntuas {
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
}
maxwrk = max(maxwrk, bdspac)
} else if wantvo && wantun {
// Path 2t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
} else if wantvo && wantuas {
// Path 3t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
} else if wantvs && wantun {
// Path 4t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantvs && wantuo {
// Path 5t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*m*m + wrkbl
} else if wantvs && wantuas {
// Path 6t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantva && wantun {
// Path 7t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantva && wantuo {
// Path 8t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*m*m + wrkbl
} else if wantva && wantuas {
// Path 9t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
}
} else {
// Path 10t, n > m
impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
lwork_dgebrd = int(dum[0])
maxwrk := 3*m + lwork_dgebrd
if wantvs || wantvo {
impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, dum, dum, -1)
lwork_dorgbr_p = int(dum[0])
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
}
if wantva {
impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, dum, dum, -1)
lwork_dorgbr_p = int(dum[0])
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
}
if !wantun {
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
}
maxwrk = max(maxwrk, bdspac)
}
}
maxwrk = max(maxwrk, minWork)
work[0] = maxwrk
if lwork == -1 {
return true
}
// Perform decomposition.
eps := dlamchE
smlnum := math.Sqrt(dlamchS) / eps
bignum := 1 / smlnum
// Scale A if max element outside range [smlnum, bignum]
anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, dum)
iscl := 0
if anrm > 0 && anrm < smlnum {
iscl = 1
impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
} else if anrm > bignum {
iscl = 1
impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
}
var ie int
if m >= n {
// If A has sufficiently more rows than columns, use the QR decomposition.
if m >= mnthr {
if wantun {
// Path 1
<<<<<<< HEAD
itau = 1
iwo
}
}
}
=======
itau := 0
iwork := itau + n
// Compute A = Q * R
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Zero out below R
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
ie = 0
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in A
impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
ncvt := 0
if wantvo || wantvas {
// Generate P^T.
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:], work[iwork:], lwork-iwork)
ncvt = n
}
iwork = ie + n
// Perform bidiagonal QR iteration computing right singular vectors
// of A in A if desired.
ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], a, lda, dum, 1, dum, 1, work[iwork:])
// If right singular vectors desired in VT, copy them there.
if wantvas {
impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
}
} else if wantuo && wantvn {
// Path 2.
/*
if lwork >= n*n+max(4*n, bdspac) {
// Sufficient workspace for a fast algorithm
ir := 1
var ldworku, ldworkr int
if lwork >= max(workbl, lda*n+n)+lda*n {
// work(iu) and work(ir) are n×lda
ldworku = lda
ldworkr = lda
} else if lwork >= max(workbl, lda*n+n)+n*n {
// work(iu) is ldwrku×n
}
itau := ir + ldworkr*n
iwork := itau + n
// Compute A = Q*R
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy R to work(ir) and zero out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, b, ldb)
}
*/
panic("not implemented")
} else if wantua {
if wantvas {
// Path 9
if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
// Sufficient workspace for a fast algorithm
iu := 0
var ldworku int
if lwork >= wrkbl+lda*n {
ldworku = lda
} else {
ldworku = n
}
itau := iu + ldworku*n
iwork := itau + n
// Compute A = Q * R, copying result to U
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
// Copy R from A to VT, zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
if n > 1 {
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
}
ie = itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in VT
impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left bidiagonalizing vectors in VT
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in VT
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
}
}
}
} else {
// Path 10.
ie = 0
itauq := ie + n
itaup := itauq + n
iwork := itaup + n
// Bidiagonalize A
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
if wantuas {
// Left singular vectors are desired in U. Copy result to U and
// generate left biadiagonalizing vectors in U.
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
var ncu int
if wantus {
ncu = n
}
if wantua {
ncu = m
}
impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
}
if wantvas {
// Right singular vectors are desired in VT. Copy result to VT and
// generate left biadiagonalizing vectors in VT.
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
}
if wantuo {
panic("not implemented")
}
if wantvo {
panic("not implemented")
}
iwork = ie + n
var nru, ncvt int
if wantuas || wantuo {
nru = m
}
if wantun {
nru = 0
}
if wantvas || wantvo {
ncvt = n
}
if wantvn {
ncvt = 0
}
if !wantuo && !wantvo {
// Perform bidiagonal QR iteration, if desired, computing left
// singular vectors in U and right singular vectors in VT.
ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
} else {
panic("not implemented")
}
}
} else {
// A has more columns than rows. If A has sufficiently more columns than
// rows, first reduce using the LQ decomposition.
if n >= mnthr {
if wantva {
if wantuas {
// Path 9t
if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
// Sufficient workspace for a fast algorithm.
iu := 0
var ldworku int
if lwork >= wrkbl+lda*m {
ldworku = lda
} else {
ldworku = m
}
itau := iu + ldworku*m
iwork := itau + m
// Generate A = L * Q copying result to VT
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
// Copy L to work[iu], zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+ldworku:], ldworku)
ie = itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in work[iu], copying result to U
impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
// Generate right bidiagonalizing vectors in work[iu]
impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku, work[itaup:], work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U.
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of L in U and computing right singular vectors
// of L in work[iu]
ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], work[iu:], ldworku, u, ldu, dum, 1, work[iwork:])
// Multiply right singular vectors of L in work[iu:]
// Q in VT, storing result in A.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, work[iu:], ldworku, vt, ldvt, 0, a, lda)
// Copy right singular vectors of A from A to VT
impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + m
// Compute A = L * Q, copying result to VT
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
// Copy L to U, zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
ie = itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in U
impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply right bidiagonalizing vectors in U by Q in VT.
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
}
} else {
panic("not implemented")
}
}
} else {
// Path 10t
ie = 0
itauq := ie + m
itaup := itauq + m
iwork := itaup + m
// Bidiagonalize A
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
if wantuas {
// If left singular vectors desired in U, copy result to U and
// generate left bidiagonalizing vectors in U.
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
}
if wantvas {
// If right singular vectors desired in VT, copy result to VT
// and generate right bidiagonalizing vectors in VT.
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
var nrvt int
if wantva {
nrvt = n
} else {
nrvt = m
}
impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
}
if wantuo {
panic("not implemented")
}
if wantvo {
panic("not implemented")
}
iwork = ie + m
var nru, ncvt int
if wantuas || wantuo {
nru = m
}
if wantvas || wantvo {
ncvt = n
}
if !wantvo && !wantvo {
// Perform bidiagonal QR iteration, if desired, computing left
// singular vectors in U and computing right singular vectors in
// VT.
ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
} else {
panic("not implemented")
}
}
}
minmn := min(m, n)
if !ok {
if ie > 1 {
for i := 0; i < minmn-1; i++ {
work[i+1] = work[i+ie]
}
}
if ie < 1 {
for i := minmn - 2; i >= 0; i-- {
work[i+1] = work[i+ie]
}
}
}
// Undo scaling if necessary
if iscl == 1 {
if anrm > bignum {
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
}
if ok && anrm > bignum {
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
}
if anrm < smlnum {
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
}
if ok && anrm < smlnum {
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
}
}
work[0] = float64(maxwrk)
return ok
>>>>>>> fd73640... Add a partial implementation of Dgesvd and test
}

View File

@@ -24,11 +24,10 @@ func (impl Implementation) Dlacpy(uplo blas.Uplo, m, n int, a []float64, lda int
case blas.Lower: case blas.Lower:
for i := 0; i < m; i++ { for i := 0; i < m; i++ {
for j := 0; j < min(i, n); j++ { for j := 0; j < min(i+1, n); j++ {
b[i*ldb+j] = a[i*lda+j] b[i*ldb+j] = a[i*lda+j]
} }
} }
case blas.All: case blas.All:
for i := 0; i < m; i++ { for i := 0; i < m; i++ {
for j := 0; j < n; j++ { for j := 0; j < n; j++ {

View File

@@ -20,7 +20,7 @@ func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64,
} }
} else if uplo == blas.Lower { } else if uplo == blas.Lower {
for i := 0; i < m; i++ { for i := 0; i < m; i++ {
for j := 0; j < i; j++ { for j := 0; j < min(i+1, n); j++ {
a[i*lda+j] = alpha a[i*lda+j] = alpha
} }
} }

View File

@@ -355,7 +355,7 @@ func (Implementation) Ilaenv(ispec int, s string, opts string, n1, n2, n3, n4 in
return 2 return 2
case 6: case 6:
// Used by xGELSS and xGESVD // Used by xGELSS and xGESVD
return min(n1, n2) * 1e6 return int(float64(min(n1, n2)) * 1.6)
case 7: case 7:
// Not used // Not used
return 1 return 1

View File

@@ -35,6 +35,8 @@ func DbdsqrTest(t *testing.T, impl Dbdsqrer) {
{10, 10, 10, 10, 30, 40, 50}, {10, 10, 10, 10, 30, 40, 50},
{10, 12, 11, 13, 30, 40, 50}, {10, 12, 11, 13, 30, 40, 50},
{20, 12, 13, 11, 30, 40, 50}, {20, 12, 13, 11, 30, 40, 50},
{130, 130, 130, 500, 900, 900, 500},
} { } {
for cas := 0; cas < 100; cas++ { for cas := 0; cas < 100; cas++ {
n := test.n n := test.n

View File

@@ -46,8 +46,9 @@ func DgebrdTest(t *testing.T, impl Dgebrder) {
for i := range a { for i := range a {
a[i] = rand.NormFloat64() a[i] = rand.NormFloat64()
} }
d := make([]float64, minmn) d := make([]float64, minmn)
e := make([]float64, minmn) e := make([]float64, minmn-1)
tauP := make([]float64, minmn) tauP := make([]float64, minmn)
tauQ := make([]float64, minmn) tauQ := make([]float64, minmn)
work := make([]float64, max(m, n)) work := make([]float64, max(m, n))
@@ -78,6 +79,21 @@ func DgebrdTest(t *testing.T, impl Dgebrder) {
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork) impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
work = make([]float64, int(work[0])) work = make([]float64, int(work[0]))
lwork = len(work) lwork = len(work)
for i := range work {
work[i] = math.NaN()
}
for i := range d {
d[i] = math.NaN()
}
for i := range e {
e[i] = math.NaN()
}
for i := range tauQ {
tauQ[i] = math.NaN()
}
for i := range tauP {
tauP[i] = math.NaN()
}
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork) impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
// Test answers // Test answers

View File

@@ -25,19 +25,37 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
// TODO(btracey): Add tests for m > mnthr and n > mnthr when other SVD // TODO(btracey): Add tests for m > mnthr and n > mnthr when other SVD
// conditions are implemented. Right now mnthr is 5,000,000 which is too // conditions are implemented. Right now mnthr is 5,000,000 which is too
// large to create a square matrix of that size. // large to create a square matrix of that size.
for _, jobU := range []lapack.SVDJob{lapack.SVDAll} {
for _, jobVT := range []lapack.SVDJob{lapack.SVDAll} {
for _, test := range []struct { for _, test := range []struct {
m, n, lda, ldu, ldvt int m, n, lda, ldu, ldvt int
}{ }{
{5, 5, 0, 0, 0}, {5, 5, 0, 0, 0},
{5, 7, 0, 0, 0}, {5, 6, 0, 0, 0},
{7, 5, 0, 0, 0}, {6, 5, 0, 0, 0},
{5, 9, 0, 0, 0},
{9, 5, 0, 0, 0},
{5, 5, 10, 11, 12}, {5, 5, 10, 11, 12},
{5, 7, 10, 11, 12}, {5, 6, 10, 11, 12},
{7, 5, 10, 11, 12}, {6, 5, 10, 11, 12},
{5, 5, 10, 11, 12},
{5, 9, 10, 11, 12},
{9, 5, 10, 11, 12},
{300, 300, 0, 0, 0},
{300, 400, 0, 0, 0},
{400, 300, 0, 0, 0},
{300, 600, 0, 0, 0},
{600, 300, 0, 0, 0},
{300, 300, 400, 450, 460},
{300, 400, 500, 550, 560},
{400, 300, 550, 550, 560},
{300, 600, 700, 750, 760},
{600, 300, 700, 750, 760},
} { } {
jobU := lapack.SVDAll
jobVT := lapack.SVDAll
m := test.m m := test.m
n := test.n n := test.n
lda := test.lda lda := test.lda
@@ -68,6 +86,10 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
vt[i] = rand.NormFloat64() vt[i] = rand.NormFloat64()
} }
uAllOrig := make([]float64, len(u))
copy(uAllOrig, u)
vtAllOrig := make([]float64, len(vt))
copy(vtAllOrig, vt)
aCopy := make([]float64, len(a)) aCopy := make([]float64, len(a))
copy(aCopy, a) copy(aCopy, a)
@@ -76,10 +98,114 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
work := make([]float64, 1) work := make([]float64, 1)
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, -1) impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, -1)
if !floats.Equal(a, aCopy) {
t.Errorf("a changed during call to get work length")
}
work = make([]float64, int(work[0])) work = make([]float64, int(work[0]))
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work)) impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work))
// Test the decomposition errStr := fmt.Sprintf("m = %v, n = %v, lda = %v, ldu = %v, ldv = %v", m, n, lda, ldu, ldvt)
svdCheck(t, false, errStr, m, n, s, a, u, ldu, vt, ldvt, aCopy, lda)
svdCheckPartial(t, impl, lapack.SVDAll, errStr, uAllOrig, vtAllOrig, aCopy, m, n, a, lda, s, u, ldu, vt, ldvt, work, false)
// Test InPlace
jobU = lapack.SVDInPlace
jobVT = lapack.SVDInPlace
copy(a, aCopy)
copy(u, uAllOrig)
copy(vt, vtAllOrig)
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work))
svdCheck(t, true, errStr, m, n, s, a, u, ldu, vt, ldvt, aCopy, lda)
svdCheckPartial(t, impl, lapack.SVDInPlace, errStr, uAllOrig, vtAllOrig, aCopy, m, n, a, lda, s, u, ldu, vt, ldvt, work, false)
}
}
// svdCheckPartial checks that the singular values and vectors are computed when
// not all of them are computed.
func svdCheckPartial(t *testing.T, impl Dgesvder, job lapack.SVDJob, errStr string, uAllOrig, vtAllOrig, aCopy []float64, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, shortWork bool) {
jobU := job
jobVT := job
// Compare the singular values when computed with {SVDNone, SVDNone.}
sCopy := make([]float64, len(s))
copy(sCopy, s)
copy(a, aCopy)
for i := range s {
s[i] = rand.Float64()
}
tmp1 := make([]float64, 1)
tmp2 := make([]float64, 1)
jobU = lapack.SVDNone
jobVT = lapack.SVDNone
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, tmp2, ldvt, work, -1)
work = make([]float64, int(work[0]))
lwork := len(work)
if shortWork {
lwork--
}
ok := impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, tmp2, ldvt, work, lwork)
if !ok {
t.Errorf("Dgesvd did not complete successfully")
}
if !floats.EqualApprox(s, sCopy, 1e-10) {
t.Errorf("Singular value mismatch when singular vectors not computed: %s", errStr)
}
// Check that the singular vectors are correctly computed when the other
// is none.
uAll := make([]float64, len(u))
copy(uAll, u)
vtAll := make([]float64, len(vt))
copy(vtAll, vt)
// Copy the original vectors so the data outside the matrix bounds is the same.
copy(u, uAllOrig)
copy(vt, vtAllOrig)
jobU = job
jobVT = lapack.SVDNone
copy(a, aCopy)
for i := range s {
s[i] = rand.Float64()
}
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, tmp2, ldvt, work, -1)
work = make([]float64, int(work[0]))
lwork = len(work)
if shortWork {
lwork--
}
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, tmp2, ldvt, work, len(work))
if !floats.EqualApprox(uAll, u, 1e-10) {
t.Errorf("U mismatch when VT is not computed: %s", errStr)
}
if !floats.EqualApprox(s, sCopy, 1e-10) {
t.Errorf("Singular value mismatch when U computed VT not")
}
jobU = lapack.SVDNone
jobVT = job
copy(a, aCopy)
for i := range s {
s[i] = rand.Float64()
}
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, vt, ldvt, work, -1)
work = make([]float64, int(work[0]))
lwork = len(work)
if shortWork {
lwork--
}
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, vt, ldvt, work, len(work))
if !floats.EqualApprox(vtAll, vt, 1e-10) {
t.Errorf("VT mismatch when U is not computed: %s", errStr)
}
if !floats.EqualApprox(s, sCopy, 1e-10) {
t.Errorf("Singular value mismatch when VT computed U not")
}
}
// svdCheck checks that the singular value decomposition correctly multiplies back
// to the original matrix.
func svdCheck(t *testing.T, thin bool, errStr string, m, n int, s, a, u []float64, ldu int, vt []float64, ldvt int, aCopy []float64, lda int) {
sigma := blas64.General{ sigma := blas64.General{
Rows: m, Rows: m,
Cols: n, Cols: n,
@@ -102,6 +228,12 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
Stride: ldvt, Stride: ldvt,
Data: vt, Data: vt,
} }
if thin {
sigma.Rows = min(m, n)
sigma.Cols = min(m, n)
uMat.Cols = min(m, n)
vTMat.Rows = min(m, n)
}
tmp := blas64.General{ tmp := blas64.General{
Rows: m, Rows: m,
@@ -120,12 +252,12 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uMat, sigma, 0, tmp) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uMat, sigma, 0, tmp)
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, vTMat, 0, ans) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, vTMat, 0, ans)
errStr := fmt.Sprintf("jobU = %v, jobVT = %v, m = %v, n = %v, lda = %v, ldu = %v, ldv = %v", jobU, jobVT, m, n, lda, ldu, ldvt)
if !floats.EqualApprox(ans.Data, aCopy, 1e-8) { if !floats.EqualApprox(ans.Data, aCopy, 1e-8) {
t.Errorf("Decomposition mismatch %s", errStr) t.Errorf("Decomposition mismatch. Trim = %v, %s", thin, errStr)
} }
// Check that U and V are orthogonal if !thin {
// Check that U and V are orthogonal.
for i := 0; i < uMat.Rows; i++ { for i := 0; i < uMat.Rows; i++ {
for j := i + 1; j < uMat.Rows; j++ { for j := i + 1; j < uMat.Rows; j++ {
dot := blas64.Dot(uMat.Cols, dot := blas64.Dot(uMat.Cols,
@@ -149,6 +281,5 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
} }
} }
} }
}
}
} }