mirror of
https://github.com/gonum/gonum.git
synced 2025-10-20 21:59:25 +08:00
remove orig
This commit is contained in:
@@ -498,6 +498,8 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
|
|||||||
return clapack.Dgels(trans, m, n, nrhs, a, lda, b, ldb)
|
return clapack.Dgels(trans, m, n, nrhs, a, lda, b, ldb)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const noSVDO = "dgesvd: not coded for overwrite"
|
||||||
|
|
||||||
// Dgesvd computes the singular value decomposition of the input matrix A.
|
// Dgesvd computes the singular value decomposition of the input matrix A.
|
||||||
//
|
//
|
||||||
// The singular value decomposition is
|
// The singular value decomposition is
|
||||||
@@ -509,12 +511,12 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
|
|||||||
//
|
//
|
||||||
// jobU and jobVT are options for computing the singular vectors. The behavior
|
// jobU and jobVT are options for computing the singular vectors. The behavior
|
||||||
// is as follows
|
// is as follows
|
||||||
// jobU == lapack.SVDAll All M columns of U are returned in u
|
// jobU == lapack.SVDAll All m columns of U are returned in u
|
||||||
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
|
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
|
||||||
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
|
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
|
||||||
// jobU == lapack.SVDNone The columns of U are not computed.
|
// jobU == lapack.SVDNone The columns of U are not computed.
|
||||||
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
|
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
|
||||||
// and jobVT can equal lapack.SVDOverwrite.
|
// and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
|
||||||
//
|
//
|
||||||
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
|
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
|
||||||
// the data is overwritten. On exit, A contains the appropriate singular vectors
|
// the data is overwritten. On exit, A contains the appropriate singular vectors
|
||||||
@@ -529,12 +531,12 @@ func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float
|
|||||||
// not used.
|
// not used.
|
||||||
//
|
//
|
||||||
// vt contains the left singular vectors on exit, stored rowwise. If
|
// vt contains the left singular vectors on exit, stored rowwise. If
|
||||||
// jobV == lapack.SVDAll, vt is of size n×m. If jobV == lapack.SVDInPlace vt is
|
// jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
|
||||||
// of size min(m,n)×n. If jobU == lapack.SVDOverwrite or lapack.SVDNone, vt is
|
// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
|
||||||
// not used.
|
// not used.
|
||||||
//
|
//
|
||||||
// The C interface does not support providing temporary storage. To provide compatibility
|
// The C interface does not support providing temporary storage. To provide compatibility
|
||||||
// with native, lwork == -1 will not run Dgeqrf but will instead write the minimum
|
// with native, lwork == -1 will not run Dgesvd but will instead write the minimum
|
||||||
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic.
|
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic.
|
||||||
//
|
//
|
||||||
// Dgesvd returns whether the decomposition successfully completed.
|
// Dgesvd returns whether the decomposition successfully completed.
|
||||||
@@ -551,13 +553,13 @@ func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float
|
|||||||
checkMatrix(min(m, n), n, vt, ldvt)
|
checkMatrix(min(m, n), n, vt, ldvt)
|
||||||
}
|
}
|
||||||
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
|
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
|
||||||
panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
|
panic(noSVDO)
|
||||||
}
|
}
|
||||||
if len(s) < min(m, n) {
|
if len(s) < min(m, n) {
|
||||||
panic(badS)
|
panic(badS)
|
||||||
}
|
}
|
||||||
if jobU != lapack.SVDAll || jobVT != lapack.SVDAll {
|
if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
|
||||||
panic("lapack: SVD only coded for SVDAll job inputs")
|
panic("lapack: SVD not coded to overwrite original matrix")
|
||||||
}
|
}
|
||||||
minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
|
minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
|
||||||
if lwork != -1 {
|
if lwork != -1 {
|
||||||
@@ -572,7 +574,7 @@ func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float
|
|||||||
work[0] = float64(minWork)
|
work[0] = float64(minWork)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return clapack.Dgesvd(byte(jobU), byte(jobVT), m, n, a, lda, s, u, ldu, vt, ldvt, work)
|
return clapack.Dgesvd(lapack.Job(jobU), lapack.Job(jobVT), m, n, a, lda, s, u, ldu, vt, ldvt, work[1:])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dgetf2 computes the LU decomposition of the m×n matrix A.
|
// Dgetf2 computes the LU decomposition of the m×n matrix A.
|
||||||
|
@@ -88,6 +88,10 @@ func TestDgeqrf(t *testing.T) {
|
|||||||
testlapack.DgeqrfTest(t, impl)
|
testlapack.DgeqrfTest(t, impl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDgesvd(t *testing.T) {
|
||||||
|
testlapack.DgesvdTest(t, impl)
|
||||||
|
}
|
||||||
|
|
||||||
func TestDgetf2(t *testing.T) {
|
func TestDgetf2(t *testing.T) {
|
||||||
testlapack.Dgetf2Test(t, impl)
|
testlapack.Dgetf2Test(t, impl)
|
||||||
}
|
}
|
||||||
|
@@ -104,7 +104,7 @@ type SVDJob byte
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
SVDAll SVDJob = 'A' // Compute all singular vectors
|
SVDAll SVDJob = 'A' // Compute all singular vectors
|
||||||
SVDInPlace = 'S' // Compute the first singular vectors and store in provided storage.
|
SVDInPlace = 'S' // Compute the first singular vectors and store them in provided storage.
|
||||||
SVDOverwrite = 'O' // Compute the singular vectors and store in input matrix
|
SVDOverwrite = 'O' // Compute the singular vectors and store them in input matrix
|
||||||
SVDNone = 'N' // Do not compute singular vectors
|
SVDNone = 'N' // Do not compute singular vectors
|
||||||
)
|
)
|
||||||
|
@@ -1,497 +0,0 @@
|
|||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package native
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
"github.com/gonum/lapack"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
|
|
||||||
//
|
|
||||||
// The SVD of the bidiagonal matrix B is
|
|
||||||
// B = Q * S * P^T
|
|
||||||
// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
|
|
||||||
// left singular vectors, and P is an orthogonal matrix of right singular vectors.
|
|
||||||
//
|
|
||||||
// Q and P are only computed if requested. If left singular vectors are requested,
|
|
||||||
// this routine returns U * Q instead of Q, and if right singular vectors are
|
|
||||||
// requested P^T * VT is returned instead of P^T.
|
|
||||||
//
|
|
||||||
// Frequently Dbdsqr is used in conjuction with Dgebrd which reduces a general
|
|
||||||
// matrix A into bidiagonal form. In this case, the SVD of A is
|
|
||||||
// A = (U * Q) * S * (P^T * VT)
|
|
||||||
//
|
|
||||||
// This routine may also compute Q^T * C.
|
|
||||||
//
|
|
||||||
// d and e contain the elements of the bidiagonal matrix b. d must have length at
|
|
||||||
// least n, and e must have length at least n-1. Dbdsqr will panic if there is
|
|
||||||
// insufficient length. On exit, D contains the singular values of B in decreasing
|
|
||||||
// order.
|
|
||||||
//
|
|
||||||
// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
|
|
||||||
// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
|
|
||||||
//
|
|
||||||
// U is a matrix of size nru×n whose elements are stored in u. The elements
|
|
||||||
// of u are modified to contain U * Q on exit. U is not used if nru == 0.
|
|
||||||
//
|
|
||||||
// C is a matrix of size n×ncc whose elements are stored in c. The elements
|
|
||||||
// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
|
|
||||||
//
|
|
||||||
// work contains temporary storage and must have length at least 4*n. Dbdsqr
|
|
||||||
// will panic if there is insufficient working memory.
|
|
||||||
//
|
|
||||||
// Dbdsqr returns whether the decomposition was successful.
|
|
||||||
func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
|
|
||||||
if uplo != blas.Upper && uplo != blas.Lower {
|
|
||||||
panic(badUplo)
|
|
||||||
}
|
|
||||||
if ncvt != 0 {
|
|
||||||
checkMatrix(n, ncvt, vt, ldvt)
|
|
||||||
}
|
|
||||||
if nru != 0 {
|
|
||||||
checkMatrix(nru, n, u, ldu)
|
|
||||||
}
|
|
||||||
if ncc != 0 {
|
|
||||||
checkMatrix(n, ncc, c, ldc)
|
|
||||||
}
|
|
||||||
if len(d) < n {
|
|
||||||
panic(badD)
|
|
||||||
}
|
|
||||||
if len(e) < n-1 {
|
|
||||||
panic(badE)
|
|
||||||
}
|
|
||||||
if len(work) < 4*n {
|
|
||||||
panic(badWork)
|
|
||||||
}
|
|
||||||
var info int
|
|
||||||
bi := blas64.Implementation()
|
|
||||||
const (
|
|
||||||
maxIter = 6
|
|
||||||
)
|
|
||||||
if n == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if n != 1 {
|
|
||||||
// If the singular vectors do not need to be computed, use qd algorithm.
|
|
||||||
if !(ncvt > 0 || nru > 0 || ncc > 0) {
|
|
||||||
info = impl.Dlasq1(n, d, e, work)
|
|
||||||
// If info is 2 dqds didn't finish, and so try to.
|
|
||||||
if info != 2 {
|
|
||||||
return info == 0
|
|
||||||
}
|
|
||||||
info = 0
|
|
||||||
}
|
|
||||||
nm1 := n - 1
|
|
||||||
nm12 := nm1 + nm1
|
|
||||||
nm13 := nm12 + nm1
|
|
||||||
idir := 0
|
|
||||||
|
|
||||||
eps := dlamchE
|
|
||||||
unfl := dlamchS
|
|
||||||
lower := uplo == blas.Lower
|
|
||||||
var cs, sn, r float64
|
|
||||||
if lower {
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
cs, sn, r = impl.Dlartg(d[i], e[i])
|
|
||||||
d[i] = r
|
|
||||||
e[i] = sn * d[i+1]
|
|
||||||
d[i+1] *= cs
|
|
||||||
work[i] = cs
|
|
||||||
work[nm1+i] = sn
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Compute singular values to a relative accuracy of tol. If tol is negative
|
|
||||||
// the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b)
|
|
||||||
tolmul := math.Max(10, math.Min(100, eps*(-1.0/8)))
|
|
||||||
tol := tolmul * eps
|
|
||||||
var smax float64
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
smax = math.Max(smax, math.Abs(d[i]))
|
|
||||||
}
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
smax = math.Max(smax, math.Abs(e[i]))
|
|
||||||
}
|
|
||||||
|
|
||||||
var sminl float64
|
|
||||||
var thresh float64
|
|
||||||
if tol >= 0 {
|
|
||||||
sminoa := math.Abs(d[0])
|
|
||||||
if sminoa != 0 {
|
|
||||||
mu := sminoa
|
|
||||||
for i := 1; i < n; i++ {
|
|
||||||
mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1])))
|
|
||||||
sminoa = math.Min(sminoa, mu)
|
|
||||||
if sminoa == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sminoa = sminoa / math.Sqrt(float64(n))
|
|
||||||
thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl)
|
|
||||||
} else {
|
|
||||||
thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl)
|
|
||||||
}
|
|
||||||
// Prepare for the main iteration loop for the singular values.
|
|
||||||
maxIt := maxIter * n * n
|
|
||||||
iter := 0
|
|
||||||
oldl2 := -1
|
|
||||||
oldm := -1
|
|
||||||
// m points to the last element of unconverged part of matrix.
|
|
||||||
m := n
|
|
||||||
|
|
||||||
Outer:
|
|
||||||
for m > 1 {
|
|
||||||
if iter > maxIt {
|
|
||||||
info = 0
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
if e[i] != 0 {
|
|
||||||
info++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return info == 0
|
|
||||||
}
|
|
||||||
// Find diagonal block of matrix to work on.
|
|
||||||
if tol < 0 && math.Abs(d[m-1]) <= thresh {
|
|
||||||
d[m-1] = 0
|
|
||||||
}
|
|
||||||
smax = math.Abs(d[m-1])
|
|
||||||
smin := smax
|
|
||||||
var l2 int
|
|
||||||
var broke bool
|
|
||||||
for l3 := 0; l3 < m-1; l3++ {
|
|
||||||
l2 = m - l3 - 2
|
|
||||||
abss := math.Abs(d[l2])
|
|
||||||
abse := math.Abs(e[l2])
|
|
||||||
if tol < 0 && abss <= thresh {
|
|
||||||
d[l2] = 0
|
|
||||||
}
|
|
||||||
if abse <= thresh {
|
|
||||||
broke = true
|
|
||||||
e[l2] = 0
|
|
||||||
if l2 == m-2 {
|
|
||||||
// Convergence of bottom singular value, return to top.
|
|
||||||
m--
|
|
||||||
continue Outer
|
|
||||||
}
|
|
||||||
l2++
|
|
||||||
break
|
|
||||||
}
|
|
||||||
smin = math.Min(smin, abss)
|
|
||||||
smax = math.Max(math.Max(smax, abss), abse)
|
|
||||||
}
|
|
||||||
<<<<<<< HEAD
|
|
||||||
if broke {
|
|
||||||
e[l2] = 0
|
|
||||||
if l2 == m-2 {
|
|
||||||
// Convergence of bottom singular value, return to top.
|
|
||||||
m--
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
l2++
|
|
||||||
} else {
|
|
||||||
=======
|
|
||||||
if !broke {
|
|
||||||
>>>>>>> 618d4f1... simplify loop condition
|
|
||||||
l2 = 0
|
|
||||||
}
|
|
||||||
// e[ll] through e[m-2] are nonzero, e[ll-1] is zero
|
|
||||||
if l2 == m-2 {
|
|
||||||
// Handle 2×2 block separately.
|
|
||||||
var sinr, cosr, sinl, cosl float64
|
|
||||||
d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1])
|
|
||||||
e[m-2] = 0
|
|
||||||
if ncvt > 0 {
|
|
||||||
bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl)
|
|
||||||
}
|
|
||||||
m -= 2
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// If working on a new submatrix, choose shift direction from larger end
|
|
||||||
// diagonal element toward smaller.
|
|
||||||
if l2 > oldm-1 || m-1 < oldl2 {
|
|
||||||
if math.Abs(d[l2]) >= math.Abs(d[m-1]) {
|
|
||||||
idir = 1
|
|
||||||
} else {
|
|
||||||
idir = 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Apply convergence tests.
|
|
||||||
// TODO(btracey): There is a lot of similar looking code here. See
|
|
||||||
// if there is a better way to de-duplicate.
|
|
||||||
if idir == 1 {
|
|
||||||
// Run convergence test in forward direction.
|
|
||||||
// First apply standard test to bottom of matrix.
|
|
||||||
if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) {
|
|
||||||
e[m-2] = 0
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if tol >= 0 {
|
|
||||||
// If relative accuracy desired, apply convergence criterion forward.
|
|
||||||
mu := math.Abs(d[l2])
|
|
||||||
sminl = mu
|
|
||||||
for l3 := l2; l3 < m-1; l3++ {
|
|
||||||
if math.Abs(e[l3]) <= tol*mu {
|
|
||||||
e[l3] = 0
|
|
||||||
continue Outer
|
|
||||||
}
|
|
||||||
mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3])))
|
|
||||||
sminl = math.Min(sminl, mu)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Run convergence test in backward direction.
|
|
||||||
// First apply standard test to top of matrix.
|
|
||||||
if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) {
|
|
||||||
e[l2] = 0
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if tol >= 0 {
|
|
||||||
// If relative accuracy desired, apply convergence criterion backward.
|
|
||||||
mu := math.Abs(d[m-1])
|
|
||||||
sminl = mu
|
|
||||||
for l3 := m - 2; l3 >= l2; l3-- {
|
|
||||||
if math.Abs(e[l3]) <= tol*mu {
|
|
||||||
e[l3] = 0
|
|
||||||
continue Outer
|
|
||||||
}
|
|
||||||
mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3])))
|
|
||||||
sminl = math.Min(sminl, mu)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
oldl2 = l2
|
|
||||||
oldm = m
|
|
||||||
// Compute shift. First, test if shifting would ruin relative accuracy,
|
|
||||||
// and if so set the shift to zero.
|
|
||||||
var shift float64
|
|
||||||
if tol >= 0 && float64(n)*tol*(sminl/smax) <= math.Max(eps, (1.0/100)*tol) {
|
|
||||||
shift = 0
|
|
||||||
} else {
|
|
||||||
var sl2 float64
|
|
||||||
if idir == 1 {
|
|
||||||
sl2 = math.Abs(d[l2])
|
|
||||||
shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1])
|
|
||||||
} else {
|
|
||||||
sl2 = math.Abs(d[m-1])
|
|
||||||
shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1])
|
|
||||||
}
|
|
||||||
// Test if shift is negligible
|
|
||||||
if sl2 > 0 {
|
|
||||||
if (shift/sl2)*(shift/sl2) < eps {
|
|
||||||
shift = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
iter += m - l2 + 1
|
|
||||||
// If no shift, do simplified QR iteration.
|
|
||||||
if shift == 0 {
|
|
||||||
if idir == 1 {
|
|
||||||
cs := 1.0
|
|
||||||
oldcs := 1.0
|
|
||||||
var sn, r, oldsn float64
|
|
||||||
for i := l2; i < m-1; i++ {
|
|
||||||
cs, sn, r = impl.Dlartg(d[i]*cs, e[i])
|
|
||||||
if i > l2 {
|
|
||||||
e[i-1] = oldsn * r
|
|
||||||
}
|
|
||||||
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn)
|
|
||||||
work[i-l2] = cs
|
|
||||||
work[i-l2+nm1] = sn
|
|
||||||
work[i-l2+nm12] = oldcs
|
|
||||||
work[i-l2+nm13] = oldsn
|
|
||||||
}
|
|
||||||
h := d[m-1] * cs
|
|
||||||
d[m-1] = h * oldcs
|
|
||||||
e[m-2] = h * oldsn
|
|
||||||
if ncvt > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
|
|
||||||
}
|
|
||||||
if math.Abs(e[m-2]) < thresh {
|
|
||||||
e[m-2] = 0
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cs := 1.0
|
|
||||||
oldcs := 1.0
|
|
||||||
var sn, r, oldsn float64
|
|
||||||
for i := m - 1; i >= l2+1; i-- {
|
|
||||||
cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1])
|
|
||||||
if i < m-1 {
|
|
||||||
e[i] = oldsn * r
|
|
||||||
}
|
|
||||||
oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn)
|
|
||||||
work[i-l2-1] = cs
|
|
||||||
work[i-l2+nm1-1] = -sn
|
|
||||||
work[i-l2+nm12-1] = oldcs
|
|
||||||
work[i-l2+nm13-1] = -oldsn
|
|
||||||
}
|
|
||||||
h := d[l2] * cs
|
|
||||||
d[l2] = h * oldcs
|
|
||||||
e[l2] = h * oldsn
|
|
||||||
if ncvt > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
|
|
||||||
}
|
|
||||||
if math.Abs(e[l2]) <= thresh {
|
|
||||||
e[l2] = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Use nonzero shift.
|
|
||||||
if idir == 1 {
|
|
||||||
// Chase bulge from top to bottom. Save cosines and sines for
|
|
||||||
// later singular vector updates.
|
|
||||||
f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2])
|
|
||||||
g := e[l2]
|
|
||||||
var cosl, sinl float64
|
|
||||||
for i := l2; i < m-1; i++ {
|
|
||||||
cosr, sinr, r := impl.Dlartg(f, g)
|
|
||||||
if i > l2 {
|
|
||||||
e[i-1] = r
|
|
||||||
}
|
|
||||||
f = cosr*d[i] + sinr*e[i]
|
|
||||||
e[i] = cosr*e[i] - sinr*d[i]
|
|
||||||
g = sinr * d[i+1]
|
|
||||||
d[i+1] *= cosr
|
|
||||||
cosl, sinl, r = impl.Dlartg(f, g)
|
|
||||||
d[i] = r
|
|
||||||
f = cosl*e[i] + sinl*d[i+1]
|
|
||||||
d[i+1] = cosl*d[i+1] - sinl*e[i]
|
|
||||||
if i < m-2 {
|
|
||||||
g = sinl * e[i+1]
|
|
||||||
e[i+1] = cosl * e[i+1]
|
|
||||||
}
|
|
||||||
work[i-l2] = cosr
|
|
||||||
work[i-l2+nm1] = sinr
|
|
||||||
work[i-l2+nm12] = cosl
|
|
||||||
work[i-l2+nm13] = sinl
|
|
||||||
}
|
|
||||||
e[m-2] = f
|
|
||||||
if ncvt > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
|
|
||||||
}
|
|
||||||
if math.Abs(e[m-2]) <= thresh {
|
|
||||||
e[m-2] = 0
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Chase bulge from top to bottom. Save cosines and sines for
|
|
||||||
// later singular vector updates.
|
|
||||||
f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1])
|
|
||||||
g := e[m-2]
|
|
||||||
for i := m - 1; i > l2; i-- {
|
|
||||||
cosr, sinr, r := impl.Dlartg(f, g)
|
|
||||||
if i < m-1 {
|
|
||||||
e[i] = r
|
|
||||||
}
|
|
||||||
f = cosr*d[i] + sinr*e[i-1]
|
|
||||||
e[i-1] = cosr*e[i-1] - sinr*d[i]
|
|
||||||
g = sinr * d[i-1]
|
|
||||||
d[i-1] *= cosr
|
|
||||||
cosl, sinl, r := impl.Dlartg(f, g)
|
|
||||||
d[i] = r
|
|
||||||
f = cosl*e[i-1] + sinl*d[i-1]
|
|
||||||
d[i-1] = cosl*d[i-1] - sinl*e[i-1]
|
|
||||||
if i > l2+1 {
|
|
||||||
g = sinl * e[i-2]
|
|
||||||
e[i-2] *= cosl
|
|
||||||
}
|
|
||||||
work[i-l2-1] = cosr
|
|
||||||
work[i-l2+nm1-1] = -sinr
|
|
||||||
work[i-l2+nm12-1] = cosl
|
|
||||||
work[i-l2+nm13-1] = -sinl
|
|
||||||
}
|
|
||||||
e[l2] = f
|
|
||||||
if math.Abs(e[l2]) <= thresh {
|
|
||||||
e[l2] = 0
|
|
||||||
}
|
|
||||||
if ncvt > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All singular values converged, make them positive.
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
if d[i] < 0 {
|
|
||||||
d[i] *= -1
|
|
||||||
if ncvt > 0 {
|
|
||||||
bi.Dscal(ncvt, -1, vt[i*ldvt:], 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort the singular values in decreasing order.
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
isub := 0
|
|
||||||
smin := d[0]
|
|
||||||
for j := 1; j < n-i; j++ {
|
|
||||||
if d[j] <= smin {
|
|
||||||
isub = j
|
|
||||||
smin = d[j]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if isub != n-i {
|
|
||||||
// Swap singular values and vectors.
|
|
||||||
d[isub] = d[n-i-1]
|
|
||||||
d[n-i-1] = smin
|
|
||||||
if ncvt > 0 {
|
|
||||||
bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1)
|
|
||||||
}
|
|
||||||
if nru > 0 {
|
|
||||||
bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu)
|
|
||||||
}
|
|
||||||
if ncc > 0 {
|
|
||||||
bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
info = 0
|
|
||||||
for i := 0; i < n-1; i++ {
|
|
||||||
if e[i] != 0 {
|
|
||||||
info++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return info == 0
|
|
||||||
}
|
|
@@ -86,7 +86,7 @@ func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, ta
|
|||||||
if lwork >= (m+n)*nbmin {
|
if lwork >= (m+n)*nbmin {
|
||||||
nb = lwork / (m + n)
|
nb = lwork / (m + n)
|
||||||
} else {
|
} else {
|
||||||
nb = 1
|
nb = minmn
|
||||||
nx = minmn
|
nx = minmn
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
876
native/dgesvd.go
876
native/dgesvd.go
File diff suppressed because it is too large
Load Diff
@@ -1,721 +0,0 @@
|
|||||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package native
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
|
|
||||||
"github.com/gonum/blas"
|
|
||||||
"github.com/gonum/blas/blas64"
|
|
||||||
"github.com/gonum/lapack"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Dgesvd computes the singular value decomposition of the input matrix A.
|
|
||||||
//
|
|
||||||
// Only coded for jobU == lapack.SVDAll and jobVT == lapack.SVDAll.
|
|
||||||
//
|
|
||||||
// The singular value decomposition is
|
|
||||||
// A = U * Sigma * V^T
|
|
||||||
// where Sigma is an m×n diagonal matrix containing the singular values of A,
|
|
||||||
// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
|
|
||||||
// min(m,n) columns of U and V are the left and right singular vectors of A
|
|
||||||
// respectively.
|
|
||||||
//
|
|
||||||
// jobU and jobVT are options for computing the singular vectors. The behavior
|
|
||||||
// is as follows
|
|
||||||
// jobU == lapack.SVDAll All M columns of U are returned in u
|
|
||||||
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
|
|
||||||
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
|
|
||||||
// jobU == lapack.SVDNone The columns of U are not computed.
|
|
||||||
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
|
|
||||||
// and jobVT can equal lapack.SVDOverwrite.
|
|
||||||
//
|
|
||||||
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
|
|
||||||
// the data is overwritten. On exit, A contains the appropriate singular vectors
|
|
||||||
// if either job is lapack.SVDOverwrite.
|
|
||||||
//
|
|
||||||
// s is a slice of length at least min(m,n) and on exit contains the singular
|
|
||||||
// values in decreasing order.
|
|
||||||
//
|
|
||||||
// u contains the left singular vectors on exit, stored columnwise. If
|
|
||||||
// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
|
|
||||||
// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
|
|
||||||
// not used.
|
|
||||||
//
|
|
||||||
// vt contains the left singular vectors on exit, stored rowwise. If
|
|
||||||
// jobV == lapack.SVDAll, vt is of size n×m. If jobV == lapack.SVDInPlace vt is
|
|
||||||
// of size min(m,n)×n. If jobU == lapack.SVDOverwrite or lapack.SVDNone, vt is
|
|
||||||
// not used.
|
|
||||||
//
|
|
||||||
// work is a slice for storing temporary memory, and lwork is the usable size of
|
|
||||||
// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
|
|
||||||
// If lwork == -1, instead of performing Dgesvd, the optimal work length will be
|
|
||||||
// stored into work[0]. Dgesvd will panic if the working memory has insufficient
|
|
||||||
// storage.
|
|
||||||
//
|
|
||||||
// Dgesvd returns whether the decomposition successfully completed.
|
|
||||||
func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
|
|
||||||
checkMatrix(m, n, a, lda)
|
|
||||||
if jobU == lapack.SVDAll {
|
|
||||||
checkMatrix(m, m, u, ldu)
|
|
||||||
} else if jobU == lapack.SVDInPlace {
|
|
||||||
checkMatrix(m, min(m, n), u, ldu)
|
|
||||||
}
|
|
||||||
if jobVT == lapack.SVDAll {
|
|
||||||
checkMatrix(n, n, vt, ldvt)
|
|
||||||
} else if jobVT == lapack.SVDInPlace {
|
|
||||||
checkMatrix(min(m, n), n, vt, ldvt)
|
|
||||||
}
|
|
||||||
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
|
|
||||||
panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
|
|
||||||
}
|
|
||||||
if len(s) < min(m, n) {
|
|
||||||
panic(badS)
|
|
||||||
}
|
|
||||||
if jobU != lapack.SVDAll || jobVT != lapack.SVDAll {
|
|
||||||
panic("lapack: SVD only coded for SVDAll job inputs")
|
|
||||||
}
|
|
||||||
minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
|
|
||||||
if lwork != -1 {
|
|
||||||
if len(work) < lwork {
|
|
||||||
panic(badWork)
|
|
||||||
}
|
|
||||||
if lwork < minWork {
|
|
||||||
panic(badWork)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if m == 0 || n == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
minmn := min(m, n)
|
|
||||||
|
|
||||||
wantua := jobU == lapack.SVDAll
|
|
||||||
wantus := jobU == lapack.SVDInPlace
|
|
||||||
wantuas := wantua || wantus
|
|
||||||
wantuo := jobU == lapack.SVDOverwrite
|
|
||||||
wantun := jobU == lapack.None
|
|
||||||
|
|
||||||
wantva := jobVT == lapack.SVDAll
|
|
||||||
wantvs := jobVT == lapack.SVDInPlace
|
|
||||||
wantvas := wantva || wantvs
|
|
||||||
wantvo := jobVT == lapack.SVDOverwrite
|
|
||||||
wantvn := jobVT == lapack.None
|
|
||||||
|
|
||||||
bi := blas64.Implementation()
|
|
||||||
var mnthr int
|
|
||||||
|
|
||||||
// The netlib implementation checks has this at only length 1. Our implementation
|
|
||||||
// checks all input sizes before examining the l == -1 case.
|
|
||||||
dum := make([]float64, m*n)
|
|
||||||
|
|
||||||
// Compute optimal space for subroutines.
|
|
||||||
maxwrk := 1
|
|
||||||
opts := string(jobU) + string(jobVT)
|
|
||||||
var wrkbl, bdspac int
|
|
||||||
if m >= n {
|
|
||||||
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
|
|
||||||
bdspac = 5 * n
|
|
||||||
impl.Dgeqrf(m, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dgeqrf := int(dum[0])
|
|
||||||
impl.Dorgqr(m, n, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgqr_n := int(dum[0])
|
|
||||||
impl.Dorgqr(m, m, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgqr_m := int(dum[0])
|
|
||||||
impl.Dgebrd(n, n, a, lda, s, dum, dum, dum, dum, -1)
|
|
||||||
lwork_dgebrd := int(dum[0])
|
|
||||||
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgbr_p := int(dum[0])
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgbr_q := int(dum[0])
|
|
||||||
|
|
||||||
if m >= mnthr {
|
|
||||||
// m >> n
|
|
||||||
if wantun {
|
|
||||||
// Path 1
|
|
||||||
maxwrk = n + lwork_dgeqrf
|
|
||||||
maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
|
|
||||||
if wantvo || wantvas {
|
|
||||||
maxWork = max(maxwrk, 3*n+lwork_dorgbr_p)
|
|
||||||
}
|
|
||||||
maxwrk = max(maxwrk, bdspac)
|
|
||||||
} else if wantuo && wantvn {
|
|
||||||
// Path 2
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
|
|
||||||
} else if wantuo && wantvs {
|
|
||||||
// Path 3
|
|
||||||
// or lapack.All
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
|
|
||||||
} else if wantus && wantvn {
|
|
||||||
// Path 4
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = n*n + wrkbl
|
|
||||||
} else if wantus && wantvo {
|
|
||||||
// Path 5
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = 2*n*n + wrkbl
|
|
||||||
} else if wantus && wantvas {
|
|
||||||
// Path 6
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = n*n + wrkbl
|
|
||||||
} else if wantua && wantvn {
|
|
||||||
// Path 7
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = n*n + wrkbl
|
|
||||||
} else if wantua && wantvo {
|
|
||||||
// Path 8
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = 2*n*n + wrkbl
|
|
||||||
} else if wantua && wantvas {
|
|
||||||
// Path 9
|
|
||||||
wrkbl = n + lwork_dgeqrf
|
|
||||||
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = n*n + wrkbl
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Path 10: m > n
|
|
||||||
impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
|
|
||||||
lwork_dgebrd := int(dum[0])
|
|
||||||
maxwrk = 3*n + lwork_dgebrd
|
|
||||||
if wantus || wantuo {
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgbr_q = int(dum[0])
|
|
||||||
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
|
|
||||||
}
|
|
||||||
if wantua {
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorgbr_q := int(dum[0])
|
|
||||||
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
|
|
||||||
}
|
|
||||||
if !wantvn {
|
|
||||||
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
|
|
||||||
}
|
|
||||||
maxwrk = max(maxwrk, bdspac)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
|
|
||||||
bdspac = 5 * m
|
|
||||||
impl.Dgelqf(m, n, a, lda, dum, dum, -1)
|
|
||||||
lwork_dgelqf := int(dum[0])
|
|
||||||
impl.Dorglq(n, n, m, dum, n, dum, dum, -1)
|
|
||||||
lwork_dorglq_n := int(dum[0])
|
|
||||||
impl.Dorglq(m, n, m, a, lda, dum, dum, -1)
|
|
||||||
lwork_dorglq_m := int(dum[0])
|
|
||||||
impl.Dgebrd(m, m, a, lda, s, dum, dum, dum, dum, -1)
|
|
||||||
lwork_dgebrd := int(dum[0])
|
|
||||||
impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, dum, dum, -1)
|
|
||||||
lwork_dorgbr_p := int(dum[0])
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, dum, dum, -1)
|
|
||||||
lwork_dorgbr_q := int(dum[0])
|
|
||||||
if n >= mnthr {
|
|
||||||
// n >> m
|
|
||||||
if wantvn {
|
|
||||||
// Path 1t
|
|
||||||
maxwrk = m + lwork_dgelqf
|
|
||||||
maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
|
|
||||||
if wntuo.OR.wntuas {
|
|
||||||
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
|
|
||||||
}
|
|
||||||
maxwrk = max(maxwrk, bdspac)
|
|
||||||
} else if wantvo && wantun {
|
|
||||||
// Path 2t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_m)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
|
|
||||||
} else if wantvo && wantuas {
|
|
||||||
// Path 3t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_m)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
|
|
||||||
} else if wantvs && wantun {
|
|
||||||
// Path 4t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_m)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = m*m + wrkbl
|
|
||||||
} else if wantvs && wantuo {
|
|
||||||
// Path 5t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_m)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = 2*m*m + wrkbl
|
|
||||||
} else if wantvs && wantuas {
|
|
||||||
// Path 6t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_m)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = m*m + wrkbl
|
|
||||||
} else if wantva && wantun {
|
|
||||||
// Path 7t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_n)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = m*m + wrkbl
|
|
||||||
} else if wantva && wantuo {
|
|
||||||
// Path 8t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_n)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = 2*m*m + wrkbl
|
|
||||||
} else if wantva && wantuas {
|
|
||||||
// Path 9t
|
|
||||||
wrkbl = m + lwork_dgelqf
|
|
||||||
wrkbl = max(wrkbl, m+lwork_dorglq_n)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
|
|
||||||
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
|
|
||||||
wrkbl = max(wrkbl, bdspac)
|
|
||||||
maxwrk = m*m + wrkbl
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Path 10t, n > m
|
|
||||||
impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
|
|
||||||
lwork_dgebrd = int(dum[0])
|
|
||||||
maxwrk := 3*m + lwork_dgebrd
|
|
||||||
if wantvs || wantvo {
|
|
||||||
impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, dum, dum, -1)
|
|
||||||
lwork_dorgbr_p = int(dum[0])
|
|
||||||
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
|
|
||||||
}
|
|
||||||
if wantva {
|
|
||||||
impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, dum, dum, -1)
|
|
||||||
lwork_dorgbr_p = int(dum[0])
|
|
||||||
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
|
|
||||||
}
|
|
||||||
if !wantun {
|
|
||||||
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
|
|
||||||
}
|
|
||||||
maxwrk = max(maxwrk, bdspac)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
maxwrk = max(maxwrk, minWork)
|
|
||||||
work[0] = maxwrk
|
|
||||||
if lwork == -1 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Perform decomposition.
|
|
||||||
eps := dlamchE
|
|
||||||
smlnum := math.Sqrt(dlamchS) / eps
|
|
||||||
bignum := 1 / smlnum
|
|
||||||
|
|
||||||
// Scale A if max element outside range [smlnum, bignum]
|
|
||||||
anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, dum)
|
|
||||||
iscl := 0
|
|
||||||
if anrm > 0 && anrm < smlnum {
|
|
||||||
iscl = 1
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
|
|
||||||
} else if anrm > bignum {
|
|
||||||
iscl = 1
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
|
|
||||||
}
|
|
||||||
|
|
||||||
var ie int
|
|
||||||
if m >= n {
|
|
||||||
// If A has sufficiently more rows than columns, use the QR decomposition.
|
|
||||||
if m >= mnthr {
|
|
||||||
if wantun {
|
|
||||||
// Path 1
|
|
||||||
<<<<<<< HEAD
|
|
||||||
itau = 1
|
|
||||||
iwo
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
=======
|
|
||||||
itau := 0
|
|
||||||
iwork := itau + n
|
|
||||||
|
|
||||||
// Compute A = Q * R
|
|
||||||
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
// Zero out below R
|
|
||||||
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
|
|
||||||
ie = 0
|
|
||||||
itauq := ie + n
|
|
||||||
itaup := itauq + n
|
|
||||||
iwork = itaup + n
|
|
||||||
// Bidiagonalize R in A
|
|
||||||
impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
ncvt := 0
|
|
||||||
if wantvo || wantvas {
|
|
||||||
// Generate P^T.
|
|
||||||
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
ncvt = n
|
|
||||||
}
|
|
||||||
iwork = ie + n
|
|
||||||
|
|
||||||
// Perform bidiagonal QR iteration computing right singular vectors
|
|
||||||
// of A in A if desired.
|
|
||||||
ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], a, lda, dum, 1, dum, 1, work[iwork:])
|
|
||||||
|
|
||||||
// If right singular vectors desired in VT, copy them there.
|
|
||||||
if wantvas {
|
|
||||||
impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
|
|
||||||
}
|
|
||||||
} else if wantuo && wantvn {
|
|
||||||
// Path 2.
|
|
||||||
/*
|
|
||||||
if lwork >= n*n+max(4*n, bdspac) {
|
|
||||||
// Sufficient workspace for a fast algorithm
|
|
||||||
ir := 1
|
|
||||||
var ldworku, ldworkr int
|
|
||||||
if lwork >= max(workbl, lda*n+n)+lda*n {
|
|
||||||
// work(iu) and work(ir) are n×lda
|
|
||||||
ldworku = lda
|
|
||||||
ldworkr = lda
|
|
||||||
} else if lwork >= max(workbl, lda*n+n)+n*n {
|
|
||||||
// work(iu) is ldwrku×n
|
|
||||||
}
|
|
||||||
itau := ir + ldworkr*n
|
|
||||||
iwork := itau + n
|
|
||||||
|
|
||||||
// Compute A = Q*R
|
|
||||||
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Copy R to work(ir) and zero out below it.
|
|
||||||
impl.Dlacpy(blas.Upper, n, n, a, lda, b, ldb)
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
panic("not implemented")
|
|
||||||
} else if wantua {
|
|
||||||
if wantvas {
|
|
||||||
// Path 9
|
|
||||||
if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
|
|
||||||
// Sufficient workspace for a fast algorithm
|
|
||||||
iu := 0
|
|
||||||
var ldworku int
|
|
||||||
if lwork >= wrkbl+lda*n {
|
|
||||||
ldworku = lda
|
|
||||||
} else {
|
|
||||||
ldworku = n
|
|
||||||
}
|
|
||||||
itau := iu + ldworku*n
|
|
||||||
iwork := itau + n
|
|
||||||
|
|
||||||
// Compute A = Q * R, copying result to U
|
|
||||||
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
|
|
||||||
|
|
||||||
// Generate Q in U
|
|
||||||
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
// Copy R from A to VT, zeroing out below it.
|
|
||||||
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
|
|
||||||
if n > 1 {
|
|
||||||
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
|
|
||||||
}
|
|
||||||
ie = itau
|
|
||||||
itauq := ie + n
|
|
||||||
itaup := itauq + n
|
|
||||||
iwork = itaup + n
|
|
||||||
|
|
||||||
// Bidiagonalize R in VT
|
|
||||||
impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Multiply Q in U by left bidiagonalizing vectors in VT
|
|
||||||
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Generate right bidiagonalizing vectors in VT
|
|
||||||
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
iwork = ie + n
|
|
||||||
|
|
||||||
// Perform bidiagonal QR iteration, computing left singular
|
|
||||||
// vectors of A in U and computing right singular vectors
|
|
||||||
// of A in VT.
|
|
||||||
ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Path 10.
|
|
||||||
ie = 0
|
|
||||||
itauq := ie + n
|
|
||||||
itaup := itauq + n
|
|
||||||
iwork := itaup + n
|
|
||||||
|
|
||||||
// Bidiagonalize A
|
|
||||||
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
if wantuas {
|
|
||||||
// Left singular vectors are desired in U. Copy result to U and
|
|
||||||
// generate left biadiagonalizing vectors in U.
|
|
||||||
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
|
|
||||||
var ncu int
|
|
||||||
if wantus {
|
|
||||||
ncu = n
|
|
||||||
}
|
|
||||||
if wantua {
|
|
||||||
ncu = m
|
|
||||||
}
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
|
|
||||||
}
|
|
||||||
if wantvas {
|
|
||||||
// Right singular vectors are desired in VT. Copy result to VT and
|
|
||||||
// generate left biadiagonalizing vectors in VT.
|
|
||||||
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
|
|
||||||
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
}
|
|
||||||
if wantuo {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
if wantvo {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
iwork = ie + n
|
|
||||||
var nru, ncvt int
|
|
||||||
if wantuas || wantuo {
|
|
||||||
nru = m
|
|
||||||
}
|
|
||||||
if wantun {
|
|
||||||
nru = 0
|
|
||||||
}
|
|
||||||
if wantvas || wantvo {
|
|
||||||
ncvt = n
|
|
||||||
}
|
|
||||||
if wantvn {
|
|
||||||
ncvt = 0
|
|
||||||
}
|
|
||||||
if !wantuo && !wantvo {
|
|
||||||
// Perform bidiagonal QR iteration, if desired, computing left
|
|
||||||
// singular vectors in U and right singular vectors in VT.
|
|
||||||
ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
|
|
||||||
} else {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// A has more columns than rows. If A has sufficiently more columns than
|
|
||||||
// rows, first reduce using the LQ decomposition.
|
|
||||||
if n >= mnthr {
|
|
||||||
if wantva {
|
|
||||||
if wantuas {
|
|
||||||
// Path 9t
|
|
||||||
if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
|
|
||||||
// Sufficient workspace for a fast algorithm.
|
|
||||||
iu := 0
|
|
||||||
var ldworku int
|
|
||||||
if lwork >= wrkbl+lda*m {
|
|
||||||
ldworku = lda
|
|
||||||
} else {
|
|
||||||
ldworku = m
|
|
||||||
}
|
|
||||||
itau := iu + ldworku*m
|
|
||||||
iwork := itau + m
|
|
||||||
|
|
||||||
// Generate A = L * Q copying result to VT
|
|
||||||
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
|
|
||||||
|
|
||||||
// Generate Q in VT
|
|
||||||
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Copy L to work[iu], zeroing out above it.
|
|
||||||
impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
|
|
||||||
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+ldworku:], ldworku)
|
|
||||||
ie = itau
|
|
||||||
itauq := ie + m
|
|
||||||
itaup := itauq + m
|
|
||||||
iwork = itaup + m
|
|
||||||
|
|
||||||
// Bidiagonalize L in work[iu], copying result to U
|
|
||||||
impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
|
|
||||||
|
|
||||||
// Generate right bidiagonalizing vectors in work[iu]
|
|
||||||
impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku, work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Generate left bidiagonalizing vectors in U.
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
|
|
||||||
iwork = ie + m
|
|
||||||
|
|
||||||
// Perform bidiagonal QR iteration, computing left singular
|
|
||||||
// vectors of L in U and computing right singular vectors
|
|
||||||
// of L in work[iu]
|
|
||||||
ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], work[iu:], ldworku, u, ldu, dum, 1, work[iwork:])
|
|
||||||
|
|
||||||
// Multiply right singular vectors of L in work[iu:]
|
|
||||||
// Q in VT, storing result in A.
|
|
||||||
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, work[iu:], ldworku, vt, ldvt, 0, a, lda)
|
|
||||||
// Copy right singular vectors of A from A to VT
|
|
||||||
impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
|
|
||||||
} else {
|
|
||||||
// Insufficient workspace for a fast algorithm.
|
|
||||||
itau := 0
|
|
||||||
iwork := itau + m
|
|
||||||
|
|
||||||
// Compute A = L * Q, copying result to VT
|
|
||||||
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
|
|
||||||
|
|
||||||
// Generate Q in VT
|
|
||||||
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Copy L to U, zeroing out above it.
|
|
||||||
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
|
|
||||||
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
|
|
||||||
|
|
||||||
ie = itau
|
|
||||||
itauq := ie + m
|
|
||||||
itaup := itauq + m
|
|
||||||
iwork = itaup + m
|
|
||||||
|
|
||||||
// Bidiagonalize L in U
|
|
||||||
impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Multiply right bidiagonalizing vectors in U by Q in VT.
|
|
||||||
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
|
|
||||||
|
|
||||||
// Generate left bidiagonalizing vectors in U
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
|
|
||||||
iwork = ie + m
|
|
||||||
|
|
||||||
// Perform bidiagonal QR iteration, computing left singular
|
|
||||||
// vectors of A in U and computing right singular vectors
|
|
||||||
// of A in VT.
|
|
||||||
ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Path 10t
|
|
||||||
ie = 0
|
|
||||||
itauq := ie + m
|
|
||||||
itaup := itauq + m
|
|
||||||
iwork := itaup + m
|
|
||||||
|
|
||||||
// Bidiagonalize A
|
|
||||||
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
if wantuas {
|
|
||||||
// If left singular vectors desired in U, copy result to U and
|
|
||||||
// generate left bidiagonalizing vectors in U.
|
|
||||||
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
|
|
||||||
impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
|
|
||||||
}
|
|
||||||
if wantvas {
|
|
||||||
// If right singular vectors desired in VT, copy result to VT
|
|
||||||
// and generate right bidiagonalizing vectors in VT.
|
|
||||||
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
|
|
||||||
var nrvt int
|
|
||||||
if wantva {
|
|
||||||
nrvt = n
|
|
||||||
} else {
|
|
||||||
nrvt = m
|
|
||||||
}
|
|
||||||
impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
|
|
||||||
}
|
|
||||||
if wantuo {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
if wantvo {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
iwork = ie + m
|
|
||||||
var nru, ncvt int
|
|
||||||
if wantuas || wantuo {
|
|
||||||
nru = m
|
|
||||||
}
|
|
||||||
if wantvas || wantvo {
|
|
||||||
ncvt = n
|
|
||||||
}
|
|
||||||
if !wantvo && !wantvo {
|
|
||||||
// Perform bidiagonal QR iteration, if desired, computing left
|
|
||||||
// singular vectors in U and computing right singular vectors in
|
|
||||||
// VT.
|
|
||||||
ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], vt, ldvt, u, ldu, dum, 1, work[iwork:])
|
|
||||||
} else {
|
|
||||||
panic("not implemented")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
minmn := min(m, n)
|
|
||||||
if !ok {
|
|
||||||
if ie > 1 {
|
|
||||||
for i := 0; i < minmn-1; i++ {
|
|
||||||
work[i+1] = work[i+ie]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ie < 1 {
|
|
||||||
for i := minmn - 2; i >= 0; i-- {
|
|
||||||
work[i+1] = work[i+ie]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Undo scaling if necessary
|
|
||||||
if iscl == 1 {
|
|
||||||
if anrm > bignum {
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
|
|
||||||
}
|
|
||||||
if ok && anrm > bignum {
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
|
|
||||||
}
|
|
||||||
if anrm < smlnum {
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
|
|
||||||
}
|
|
||||||
if ok && anrm < smlnum {
|
|
||||||
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
work[0] = float64(maxwrk)
|
|
||||||
return ok
|
|
||||||
>>>>>>> fd73640... Add a partial implementation of Dgesvd and test
|
|
||||||
}
|
|
@@ -24,11 +24,10 @@ func (impl Implementation) Dlacpy(uplo blas.Uplo, m, n int, a []float64, lda int
|
|||||||
|
|
||||||
case blas.Lower:
|
case blas.Lower:
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < m; i++ {
|
||||||
for j := 0; j < min(i, n); j++ {
|
for j := 0; j < min(i+1, n); j++ {
|
||||||
b[i*ldb+j] = a[i*lda+j]
|
b[i*ldb+j] = a[i*lda+j]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case blas.All:
|
case blas.All:
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < m; i++ {
|
||||||
for j := 0; j < n; j++ {
|
for j := 0; j < n; j++ {
|
||||||
|
@@ -20,7 +20,7 @@ func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64,
|
|||||||
}
|
}
|
||||||
} else if uplo == blas.Lower {
|
} else if uplo == blas.Lower {
|
||||||
for i := 0; i < m; i++ {
|
for i := 0; i < m; i++ {
|
||||||
for j := 0; j < i; j++ {
|
for j := 0; j < min(i+1, n); j++ {
|
||||||
a[i*lda+j] = alpha
|
a[i*lda+j] = alpha
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -355,7 +355,7 @@ func (Implementation) Ilaenv(ispec int, s string, opts string, n1, n2, n3, n4 in
|
|||||||
return 2
|
return 2
|
||||||
case 6:
|
case 6:
|
||||||
// Used by xGELSS and xGESVD
|
// Used by xGELSS and xGESVD
|
||||||
return min(n1, n2) * 1e6
|
return int(float64(min(n1, n2)) * 1.6)
|
||||||
case 7:
|
case 7:
|
||||||
// Not used
|
// Not used
|
||||||
return 1
|
return 1
|
||||||
|
@@ -35,6 +35,8 @@ func DbdsqrTest(t *testing.T, impl Dbdsqrer) {
|
|||||||
{10, 10, 10, 10, 30, 40, 50},
|
{10, 10, 10, 10, 30, 40, 50},
|
||||||
{10, 12, 11, 13, 30, 40, 50},
|
{10, 12, 11, 13, 30, 40, 50},
|
||||||
{20, 12, 13, 11, 30, 40, 50},
|
{20, 12, 13, 11, 30, 40, 50},
|
||||||
|
|
||||||
|
{130, 130, 130, 500, 900, 900, 500},
|
||||||
} {
|
} {
|
||||||
for cas := 0; cas < 100; cas++ {
|
for cas := 0; cas < 100; cas++ {
|
||||||
n := test.n
|
n := test.n
|
||||||
|
@@ -46,8 +46,9 @@ func DgebrdTest(t *testing.T, impl Dgebrder) {
|
|||||||
for i := range a {
|
for i := range a {
|
||||||
a[i] = rand.NormFloat64()
|
a[i] = rand.NormFloat64()
|
||||||
}
|
}
|
||||||
|
|
||||||
d := make([]float64, minmn)
|
d := make([]float64, minmn)
|
||||||
e := make([]float64, minmn)
|
e := make([]float64, minmn-1)
|
||||||
tauP := make([]float64, minmn)
|
tauP := make([]float64, minmn)
|
||||||
tauQ := make([]float64, minmn)
|
tauQ := make([]float64, minmn)
|
||||||
work := make([]float64, max(m, n))
|
work := make([]float64, max(m, n))
|
||||||
@@ -78,6 +79,21 @@ func DgebrdTest(t *testing.T, impl Dgebrder) {
|
|||||||
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
|
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
|
||||||
work = make([]float64, int(work[0]))
|
work = make([]float64, int(work[0]))
|
||||||
lwork = len(work)
|
lwork = len(work)
|
||||||
|
for i := range work {
|
||||||
|
work[i] = math.NaN()
|
||||||
|
}
|
||||||
|
for i := range d {
|
||||||
|
d[i] = math.NaN()
|
||||||
|
}
|
||||||
|
for i := range e {
|
||||||
|
e[i] = math.NaN()
|
||||||
|
}
|
||||||
|
for i := range tauQ {
|
||||||
|
tauQ[i] = math.NaN()
|
||||||
|
}
|
||||||
|
for i := range tauP {
|
||||||
|
tauP[i] = math.NaN()
|
||||||
|
}
|
||||||
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
|
impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
|
||||||
|
|
||||||
// Test answers
|
// Test answers
|
||||||
|
@@ -25,19 +25,37 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
// TODO(btracey): Add tests for m > mnthr and n > mnthr when other SVD
|
// TODO(btracey): Add tests for m > mnthr and n > mnthr when other SVD
|
||||||
// conditions are implemented. Right now mnthr is 5,000,000 which is too
|
// conditions are implemented. Right now mnthr is 5,000,000 which is too
|
||||||
// large to create a square matrix of that size.
|
// large to create a square matrix of that size.
|
||||||
for _, jobU := range []lapack.SVDJob{lapack.SVDAll} {
|
|
||||||
for _, jobVT := range []lapack.SVDJob{lapack.SVDAll} {
|
|
||||||
for _, test := range []struct {
|
for _, test := range []struct {
|
||||||
m, n, lda, ldu, ldvt int
|
m, n, lda, ldu, ldvt int
|
||||||
}{
|
}{
|
||||||
{5, 5, 0, 0, 0},
|
{5, 5, 0, 0, 0},
|
||||||
{5, 7, 0, 0, 0},
|
{5, 6, 0, 0, 0},
|
||||||
{7, 5, 0, 0, 0},
|
{6, 5, 0, 0, 0},
|
||||||
|
{5, 9, 0, 0, 0},
|
||||||
|
{9, 5, 0, 0, 0},
|
||||||
|
|
||||||
{5, 5, 10, 11, 12},
|
{5, 5, 10, 11, 12},
|
||||||
{5, 7, 10, 11, 12},
|
{5, 6, 10, 11, 12},
|
||||||
{7, 5, 10, 11, 12},
|
{6, 5, 10, 11, 12},
|
||||||
|
{5, 5, 10, 11, 12},
|
||||||
|
{5, 9, 10, 11, 12},
|
||||||
|
{9, 5, 10, 11, 12},
|
||||||
|
|
||||||
|
{300, 300, 0, 0, 0},
|
||||||
|
{300, 400, 0, 0, 0},
|
||||||
|
{400, 300, 0, 0, 0},
|
||||||
|
{300, 600, 0, 0, 0},
|
||||||
|
{600, 300, 0, 0, 0},
|
||||||
|
|
||||||
|
{300, 300, 400, 450, 460},
|
||||||
|
{300, 400, 500, 550, 560},
|
||||||
|
{400, 300, 550, 550, 560},
|
||||||
|
{300, 600, 700, 750, 760},
|
||||||
|
{600, 300, 700, 750, 760},
|
||||||
} {
|
} {
|
||||||
|
jobU := lapack.SVDAll
|
||||||
|
jobVT := lapack.SVDAll
|
||||||
|
|
||||||
m := test.m
|
m := test.m
|
||||||
n := test.n
|
n := test.n
|
||||||
lda := test.lda
|
lda := test.lda
|
||||||
@@ -68,6 +86,10 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
vt[i] = rand.NormFloat64()
|
vt[i] = rand.NormFloat64()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uAllOrig := make([]float64, len(u))
|
||||||
|
copy(uAllOrig, u)
|
||||||
|
vtAllOrig := make([]float64, len(vt))
|
||||||
|
copy(vtAllOrig, vt)
|
||||||
aCopy := make([]float64, len(a))
|
aCopy := make([]float64, len(a))
|
||||||
copy(aCopy, a)
|
copy(aCopy, a)
|
||||||
|
|
||||||
@@ -76,10 +98,114 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
work := make([]float64, 1)
|
work := make([]float64, 1)
|
||||||
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, -1)
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, -1)
|
||||||
|
|
||||||
|
if !floats.Equal(a, aCopy) {
|
||||||
|
t.Errorf("a changed during call to get work length")
|
||||||
|
}
|
||||||
|
|
||||||
work = make([]float64, int(work[0]))
|
work = make([]float64, int(work[0]))
|
||||||
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work))
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work))
|
||||||
|
|
||||||
// Test the decomposition
|
errStr := fmt.Sprintf("m = %v, n = %v, lda = %v, ldu = %v, ldv = %v", m, n, lda, ldu, ldvt)
|
||||||
|
svdCheck(t, false, errStr, m, n, s, a, u, ldu, vt, ldvt, aCopy, lda)
|
||||||
|
svdCheckPartial(t, impl, lapack.SVDAll, errStr, uAllOrig, vtAllOrig, aCopy, m, n, a, lda, s, u, ldu, vt, ldvt, work, false)
|
||||||
|
|
||||||
|
// Test InPlace
|
||||||
|
jobU = lapack.SVDInPlace
|
||||||
|
jobVT = lapack.SVDInPlace
|
||||||
|
copy(a, aCopy)
|
||||||
|
copy(u, uAllOrig)
|
||||||
|
copy(vt, vtAllOrig)
|
||||||
|
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, vt, ldvt, work, len(work))
|
||||||
|
svdCheck(t, true, errStr, m, n, s, a, u, ldu, vt, ldvt, aCopy, lda)
|
||||||
|
svdCheckPartial(t, impl, lapack.SVDInPlace, errStr, uAllOrig, vtAllOrig, aCopy, m, n, a, lda, s, u, ldu, vt, ldvt, work, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// svdCheckPartial checks that the singular values and vectors are computed when
|
||||||
|
// not all of them are computed.
|
||||||
|
func svdCheckPartial(t *testing.T, impl Dgesvder, job lapack.SVDJob, errStr string, uAllOrig, vtAllOrig, aCopy []float64, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, shortWork bool) {
|
||||||
|
jobU := job
|
||||||
|
jobVT := job
|
||||||
|
// Compare the singular values when computed with {SVDNone, SVDNone.}
|
||||||
|
sCopy := make([]float64, len(s))
|
||||||
|
copy(sCopy, s)
|
||||||
|
copy(a, aCopy)
|
||||||
|
for i := range s {
|
||||||
|
s[i] = rand.Float64()
|
||||||
|
}
|
||||||
|
tmp1 := make([]float64, 1)
|
||||||
|
tmp2 := make([]float64, 1)
|
||||||
|
jobU = lapack.SVDNone
|
||||||
|
jobVT = lapack.SVDNone
|
||||||
|
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, tmp2, ldvt, work, -1)
|
||||||
|
work = make([]float64, int(work[0]))
|
||||||
|
lwork := len(work)
|
||||||
|
if shortWork {
|
||||||
|
lwork--
|
||||||
|
}
|
||||||
|
ok := impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, tmp2, ldvt, work, lwork)
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("Dgesvd did not complete successfully")
|
||||||
|
}
|
||||||
|
if !floats.EqualApprox(s, sCopy, 1e-10) {
|
||||||
|
t.Errorf("Singular value mismatch when singular vectors not computed: %s", errStr)
|
||||||
|
}
|
||||||
|
// Check that the singular vectors are correctly computed when the other
|
||||||
|
// is none.
|
||||||
|
uAll := make([]float64, len(u))
|
||||||
|
copy(uAll, u)
|
||||||
|
vtAll := make([]float64, len(vt))
|
||||||
|
copy(vtAll, vt)
|
||||||
|
|
||||||
|
// Copy the original vectors so the data outside the matrix bounds is the same.
|
||||||
|
copy(u, uAllOrig)
|
||||||
|
copy(vt, vtAllOrig)
|
||||||
|
|
||||||
|
jobU = job
|
||||||
|
jobVT = lapack.SVDNone
|
||||||
|
copy(a, aCopy)
|
||||||
|
for i := range s {
|
||||||
|
s[i] = rand.Float64()
|
||||||
|
}
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, tmp2, ldvt, work, -1)
|
||||||
|
work = make([]float64, int(work[0]))
|
||||||
|
lwork = len(work)
|
||||||
|
if shortWork {
|
||||||
|
lwork--
|
||||||
|
}
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, u, ldu, tmp2, ldvt, work, len(work))
|
||||||
|
if !floats.EqualApprox(uAll, u, 1e-10) {
|
||||||
|
t.Errorf("U mismatch when VT is not computed: %s", errStr)
|
||||||
|
}
|
||||||
|
if !floats.EqualApprox(s, sCopy, 1e-10) {
|
||||||
|
t.Errorf("Singular value mismatch when U computed VT not")
|
||||||
|
}
|
||||||
|
jobU = lapack.SVDNone
|
||||||
|
jobVT = job
|
||||||
|
copy(a, aCopy)
|
||||||
|
for i := range s {
|
||||||
|
s[i] = rand.Float64()
|
||||||
|
}
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, vt, ldvt, work, -1)
|
||||||
|
work = make([]float64, int(work[0]))
|
||||||
|
lwork = len(work)
|
||||||
|
if shortWork {
|
||||||
|
lwork--
|
||||||
|
}
|
||||||
|
impl.Dgesvd(jobU, jobVT, m, n, a, lda, s, tmp1, ldu, vt, ldvt, work, len(work))
|
||||||
|
if !floats.EqualApprox(vtAll, vt, 1e-10) {
|
||||||
|
t.Errorf("VT mismatch when U is not computed: %s", errStr)
|
||||||
|
}
|
||||||
|
if !floats.EqualApprox(s, sCopy, 1e-10) {
|
||||||
|
t.Errorf("Singular value mismatch when VT computed U not")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// svdCheck checks that the singular value decomposition correctly multiplies back
|
||||||
|
// to the original matrix.
|
||||||
|
func svdCheck(t *testing.T, thin bool, errStr string, m, n int, s, a, u []float64, ldu int, vt []float64, ldvt int, aCopy []float64, lda int) {
|
||||||
sigma := blas64.General{
|
sigma := blas64.General{
|
||||||
Rows: m,
|
Rows: m,
|
||||||
Cols: n,
|
Cols: n,
|
||||||
@@ -102,6 +228,12 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
Stride: ldvt,
|
Stride: ldvt,
|
||||||
Data: vt,
|
Data: vt,
|
||||||
}
|
}
|
||||||
|
if thin {
|
||||||
|
sigma.Rows = min(m, n)
|
||||||
|
sigma.Cols = min(m, n)
|
||||||
|
uMat.Cols = min(m, n)
|
||||||
|
vTMat.Rows = min(m, n)
|
||||||
|
}
|
||||||
|
|
||||||
tmp := blas64.General{
|
tmp := blas64.General{
|
||||||
Rows: m,
|
Rows: m,
|
||||||
@@ -120,12 +252,12 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uMat, sigma, 0, tmp)
|
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uMat, sigma, 0, tmp)
|
||||||
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, vTMat, 0, ans)
|
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, vTMat, 0, ans)
|
||||||
|
|
||||||
errStr := fmt.Sprintf("jobU = %v, jobVT = %v, m = %v, n = %v, lda = %v, ldu = %v, ldv = %v", jobU, jobVT, m, n, lda, ldu, ldvt)
|
|
||||||
if !floats.EqualApprox(ans.Data, aCopy, 1e-8) {
|
if !floats.EqualApprox(ans.Data, aCopy, 1e-8) {
|
||||||
t.Errorf("Decomposition mismatch %s", errStr)
|
t.Errorf("Decomposition mismatch. Trim = %v, %s", thin, errStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that U and V are orthogonal
|
if !thin {
|
||||||
|
// Check that U and V are orthogonal.
|
||||||
for i := 0; i < uMat.Rows; i++ {
|
for i := 0; i < uMat.Rows; i++ {
|
||||||
for j := i + 1; j < uMat.Rows; j++ {
|
for j := i + 1; j < uMat.Rows; j++ {
|
||||||
dot := blas64.Dot(uMat.Cols,
|
dot := blas64.Dot(uMat.Cols,
|
||||||
@@ -149,6 +281,5 @@ func DgesvdTest(t *testing.T, impl Dgesvder) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user