Files
gonum/lapack/native/dgesvd.go
2017-05-23 00:03:03 -06:00

1357 lines
43 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/blas64"
"gonum.org/v1/gonum/lapack"
)
const noSVDO = "dgesvd: not coded for overwrite"
// Dgesvd computes the singular value decomposition of the input matrix A.
//
// The singular value decomposition is
// A = U * Sigma * V^T
// where Sigma is an m×n diagonal matrix containing the singular values of A,
// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
// min(m,n) columns of U and V are the left and right singular vectors of A
// respectively.
//
// jobU and jobVT are options for computing the singular vectors. The behavior
// is as follows
// jobU == lapack.SVDAll All m columns of U are returned in u
// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
// jobU == lapack.SVDNone The columns of U are not computed.
// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
// and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
//
// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
// the data is overwritten. On exit, A contains the appropriate singular vectors
// if either job is lapack.SVDOverwrite.
//
// s is a slice of length at least min(m,n) and on exit contains the singular
// values in decreasing order.
//
// u contains the left singular vectors on exit, stored column-wise. If
// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
// not used.
//
// vt contains the left singular vectors on exit, stored row-wise. If
// jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
// not used.
//
// work is a slice for storing temporary memory, and lwork is the usable size of
// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
// If lwork == -1, instead of performing Dgesvd, the optimal work length will be
// stored into work[0]. Dgesvd will panic if the working memory has insufficient
// storage.
//
// Dgesvd returns whether the decomposition successfully completed.
func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
minmn := min(m, n)
checkMatrix(m, n, a, lda)
if jobU == lapack.SVDAll {
checkMatrix(m, m, u, ldu)
} else if jobU == lapack.SVDInPlace {
checkMatrix(m, minmn, u, ldu)
}
if jobVT == lapack.SVDAll {
checkMatrix(n, n, vt, ldvt)
} else if jobVT == lapack.SVDInPlace {
checkMatrix(minmn, n, vt, ldvt)
}
if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
}
if len(s) < minmn {
panic(badS)
}
if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
panic(noSVDO)
}
if m == 0 || n == 0 {
return true
}
wantua := jobU == lapack.SVDAll
wantus := jobU == lapack.SVDInPlace
wantuas := wantua || wantus
wantuo := jobU == lapack.SVDOverwrite
wantun := jobU == lapack.None
wantva := jobVT == lapack.SVDAll
wantvs := jobVT == lapack.SVDInPlace
wantvas := wantva || wantvs
wantvo := jobVT == lapack.SVDOverwrite
wantvn := jobVT == lapack.None
bi := blas64.Implementation()
var mnthr int
// Compute optimal space for subroutines.
maxwrk := 1
opts := string(jobU) + string(jobVT)
var wrkbl, bdspac int
if m >= n {
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
bdspac = 5 * n
impl.Dgeqrf(m, n, a, lda, nil, work, -1)
lwork_dgeqrf := int(work[0])
impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
lwork_dorgqr_n := int(work[0])
impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
lwork_dorgqr_m := int(work[0])
impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
lwork_dgebrd := int(work[0])
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, nil, work, -1)
lwork_dorgbr_p := int(work[0])
impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, nil, work, -1)
lwork_dorgbr_q := int(work[0])
if m >= mnthr {
// m >> n
if wantun {
// Path 1
maxwrk = n + lwork_dgeqrf
maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
if wantvo || wantvas {
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
}
maxwrk = max(maxwrk, bdspac)
} else if wantuo && wantvn {
// Path 2
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
} else if wantuo && wantvs {
// Path 3
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(n*n+wrkbl, n*n+m*n+n)
} else if wantus && wantvn {
// Path 4
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantus && wantvo {
// Path 5
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*n*n + wrkbl
} else if wantus && wantvas {
// Path 6
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_n)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantua && wantvn {
// Path 7
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
} else if wantua && wantvo {
// Path 8
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*n*n + wrkbl
} else if wantua && wantvas {
// Path 9
wrkbl = n + lwork_dgeqrf
wrkbl = max(wrkbl, n+lwork_dorgqr_m)
wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = n*n + wrkbl
}
} else {
// Path 10: m > n
impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
lwork_dgebrd := int(work[0])
maxwrk = 3*n + lwork_dgebrd
if wantus || wantuo {
impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, nil, work, -1)
lwork_dorgbr_q = int(work[0])
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
}
if wantua {
impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, nil, work, -1)
lwork_dorgbr_q := int(work[0])
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
}
if !wantvn {
maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
}
maxwrk = max(maxwrk, bdspac)
}
} else {
mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
bdspac = 5 * m
impl.Dgelqf(m, n, a, lda, nil, work, -1)
lwork_dgelqf := int(work[0])
impl.Dorglq(n, n, m, nil, n, nil, work, -1)
lwork_dorglq_n := int(work[0])
impl.Dorglq(m, n, m, a, lda, nil, work, -1)
lwork_dorglq_m := int(work[0])
impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
lwork_dgebrd := int(work[0])
impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, nil, work, -1)
lwork_dorgbr_p := int(work[0])
impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, nil, work, -1)
lwork_dorgbr_q := int(work[0])
if n >= mnthr {
// n >> m
if wantvn {
// Path 1t
maxwrk = m + lwork_dgelqf
maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
if wantuo || wantuas {
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
}
maxwrk = max(maxwrk, bdspac)
} else if wantvo && wantun {
// Path 2t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
} else if wantvo && wantuas {
// Path 3t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = max(m*m+wrkbl, m*m+m*n+m)
} else if wantvs && wantun {
// Path 4t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantvs && wantuo {
// Path 5t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*m*m + wrkbl
} else if wantvs && wantuas {
// Path 6t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_m)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantva && wantun {
// Path 7t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
} else if wantva && wantuo {
// Path 8t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = 2*m*m + wrkbl
} else if wantva && wantuas {
// Path 9t
wrkbl = m + lwork_dgelqf
wrkbl = max(wrkbl, m+lwork_dorglq_n)
wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
wrkbl = max(wrkbl, bdspac)
maxwrk = m*m + wrkbl
}
} else {
// Path 10t, n > m
impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
lwork_dgebrd = int(work[0])
maxwrk := 3*m + lwork_dgebrd
if wantvs || wantvo {
impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, nil, work, -1)
lwork_dorgbr_p = int(work[0])
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
}
if wantva {
impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, nil, work, -1)
lwork_dorgbr_p = int(work[0])
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
}
if !wantun {
maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
}
maxwrk = max(maxwrk, bdspac)
}
}
minWork := max(1, 5*minmn)
if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) {
minWork = max(minWork, 3*minmn+max(m, n))
}
if lwork != -1 {
if len(work) < lwork {
panic(badWork)
}
if lwork < minWork {
panic(badWork)
}
}
if m == 0 || n == 0 {
return true
}
maxwrk = max(maxwrk, minWork)
work[0] = float64(maxwrk)
if lwork == -1 {
return true
}
// Perform decomposition.
eps := dlamchE
smlnum := math.Sqrt(dlamchS) / eps
bignum := 1 / smlnum
// Scale A if max element outside range [smlnum, bignum].
anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
var iscl bool
if anrm > 0 && anrm < smlnum {
iscl = true
impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
} else if anrm > bignum {
iscl = true
impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
}
var ie int
if m >= n {
// If A has sufficiently more rows than columns, use the QR decomposition.
if m >= mnthr {
// m >> n
if wantun {
// Path 1.
itau := 0
iwork := itau + n
// Compute A = Q * R.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Zero out below R.
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
ie = 0
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in A.
impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
work[itaup:], work[iwork:], lwork-iwork)
ncvt := 0
if wantvo || wantvas {
// Generate P^T.
impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:],
work[iwork:], lwork-iwork)
ncvt = n
}
iwork = ie + n
// Perform bidiagonal QR iteration computing right singular vectors
// of A in A if desired.
ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
a, lda, work, 1, work, 1, work[iwork:])
// If right singular vectors desired in VT, copy them there.
if wantvas {
impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
}
} else if wantuo && wantvn {
// Path 2
panic(noSVDO)
} else if wantuo && wantvas {
// Path 3
panic(noSVDO)
} else if wantus {
if wantvn {
// Path 4
if lwork >= n*n+max(4*n, bdspac) {
// Sufficient workspace for a fast algorithm.
ir := 0
var ldworkr int
if lwork >= wrkbl+lda*n {
ldworkr = lda
} else {
ldworkr = n
}
itau := ir + ldworkr*n
iwork := itau + n
// Compute A = Q * R.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy R to work[ir:], zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
// Generate Q in A.
impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in work[ir:].
impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Generate left vectors bidiagonalizing R in work[ir:].
impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, compuing left singular
// vectors of R in work[ir:].
ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
work[ir:], ldworkr, work, 1, work[iwork:])
// Multiply Q in A by left singular vectors of R in
// work[ir:], storing result in U.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
work[ir:], ldworkr, 0, u, ldu)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + n
// Compute A = Q*R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U.
impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Zero out below R in A.
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
// Bidiagonalize R in A.
impl.Dgebrd(n, n, a, lda, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left vectors bidiagonalizing R.
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left
// singular vectors of A in U.
ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
u, ldu, work, 1, work[iwork:])
}
} else if wantvo {
// Path 5
panic(noSVDO)
} else if wantvas {
// Path 6
if lwork >= n*n+max(4*n, bdspac) {
// Sufficient workspace for a fast algorithm.
iu := 0
var ldworku int
if lwork >= wrkbl+lda*n {
ldworku = lda
} else {
ldworku = n
}
itau := iu + ldworku*n
iwork := itau + n
// Compute A = Q * R.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy R to work[iu:], zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
// Generate Q in A.
impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in work[iu:], copying result to VT.
impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
// Generate left bidiagonalizing vectors in work[iu:].
impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
work[itauq:], work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in VT.
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of R in work[iu:], and computing right singular
// vectors of R in VT.
ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
// Multiply Q in A by left singular vectors of R in
// work[iu:], storing result in U.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
work[iu:], ldworku, 0, u, ldu)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + n
// Compute A = Q * R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U.
impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
// Copy R to VT, zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in VT.
impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left bidiagonalizing vectors in VT.
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in VT.
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
}
}
} else if wantua {
if wantvn {
// Path 7
if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
// Sufficient workspace for a fast algorithm.
ir := 0
var ldworkr int
if lwork >= wrkbl+lda*n {
ldworkr = lda
} else {
ldworkr = n
}
itau := ir + ldworkr*n
iwork := itau + n
// Compute A = Q*R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Copy R to work[ir:], zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
// Generate Q in U.
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in work[ir:].
impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in work[ir:].
impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of R in work[ir:].
ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
work[ir:], ldworkr, work, 1, work[iwork:])
// Multiply Q in U by left singular vectors of R in
// work[ir:], storing result in A.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
work[ir:], ldworkr, 0, a, lda)
// Copy left singular vectors of A from A to U.
impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + n
// Compute A = Q*R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U.
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Zero out below R in A.
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
// Bidiagonalize R in A.
impl.Dgebrd(n, n, a, lda, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left bidiagonalizing vectors in A.
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left
// singular vectors of A in U.
ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
work, 1, u, ldu, work, 1, work[iwork:])
}
} else if wantvo {
// Path 8.
panic(noSVDO)
} else if wantvas {
// Path 9.
if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
// Sufficient workspace for a fast algorithm.
iu := 0
var ldworku int
if lwork >= wrkbl+lda*n {
ldworku = lda
} else {
ldworku = n
}
itau := iu + ldworku*n
iwork := itau + n
// Compute A = Q * R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U.
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
// Copy R to work[iu:], zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
ie = itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in work[iu:], copying result to VT.
impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
// Generate left bidiagonalizing vectors in work[iu:].
impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
work[itauq:], work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in VT.
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of R in work[iu:] and computing right
// singular vectors of R in VT.
ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
// Multiply Q in U by left singular vectors of R in
// work[iu:], storing result in A.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
u, ldu, work[iu:], ldworku, 0, a, lda)
// Copy left singular vectors of A from A to U.
impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
/*
// Bidiagonalize R in VT.
impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left bidiagonalizing vectors in VT.
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in VT.
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
*/
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + n
// Compute A = Q*R, copying result to U.
impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
// Generate Q in U.
impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
// Copy R from A to VT, zeroing out below it.
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
ie := itau
itauq := ie + n
itaup := itauq + n
iwork = itaup + n
// Bidiagonalize R in VT.
impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply Q in U by left bidiagonalizing vectors in VT.
impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
// Generate right bidiagonizing vectors in VT.
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + n
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
}
}
}
} else {
// Path 10.
// M at least N, but not much larger.
ie = 0
itauq := ie + n
itaup := itauq + n
iwork := itaup + n
// Bidiagonalize A.
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
work[itaup:], work[iwork:], lwork-iwork)
if wantuas {
// Left singular vectors are desired in U. Copy result to U and
// generate left biadiagonalizing vectors in U.
impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
var ncu int
if wantus {
ncu = n
}
if wantua {
ncu = m
}
impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
}
if wantvas {
// Right singular vectors are desired in VT. Copy result to VT and
// generate left biadiagonalizing vectors in VT.
impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
}
if wantuo {
panic(noSVDO)
}
if wantvo {
panic(noSVDO)
}
iwork = ie + n
var nru, ncvt int
if wantuas || wantuo {
nru = m
}
if wantun {
nru = 0
}
if wantvas || wantvo {
ncvt = n
}
if wantvn {
ncvt = 0
}
if !wantuo && !wantvo {
// Perform bidiagonal QR iteration, if desired, computing left
// singular vectors in U and right singular vectors in VT.
ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
} else {
// There will be two branches when the implementation is complete.
panic(noSVDO)
}
}
} else {
// A has more columns than rows. If A has sufficiently more columns than
// rows, first reduce using the LQ decomposition.
if n >= mnthr {
// n >> m.
if wantvn {
// Path 1t.
itau := 0
iwork := itau + m
// Compute A = L*Q.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Zero out above L.
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
ie := 0
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in A.
impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
if wantuo || wantuas {
impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda,
work[itauq:], work[iwork:], lwork-iwork)
}
iwork = ie + m
nru := 0
if wantuo || wantuas {
nru = m
}
// Perform bidiagonal QR iteration, computing left singular vectors
// of A in A if desired.
ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
work, 1, a, lda, work, 1, work[iwork:])
// If left singular vectors desired in U, copy them there.
if wantuas {
impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
}
} else if wantvo && wantun {
// Path 2t.
panic(noSVDO)
} else if wantvo && wantuas {
// Path 3t.
panic(noSVDO)
} else if wantvs {
if wantun {
// Path 4t.
if lwork >= m*m+max(4*m, bdspac) {
// Sufficient workspace for a fast algorithm.
ir := 0
var ldworkr int
if lwork >= wrkbl+lda*m {
ldworkr = lda
} else {
ldworkr = m
}
itau := ir + ldworkr*m
iwork := itau + m
// Compute A = L*Q.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy L to work[ir:], zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
// Generate Q in A.
impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in work[ir:].
impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Generate right vectors bidiagonalizing L in work[ir:].
impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing right singular
// vectors of L in work[ir:].
ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
// Multiply right singular vectors of L in work[ir:] by
// Q in A, storing result in VT.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
work[ir:], ldworkr, a, lda, 0, vt, ldvt)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + m
// Compute A = L*Q.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy result to VT.
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT.
impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Zero out above L in A.
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
// Bidiagonalize L in A.
impl.Dgebrd(m, m, a, lda, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply right vectors bidiagonalizing L by Q in VT.
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing right
// singular vectors of A in VT.
ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
vt, ldvt, work, 1, work, 1, work[iwork:])
}
} else if wantuo {
// Path 5t.
panic(noSVDO)
} else if wantuas {
// Path 6t.
if lwork >= m*m+max(4*m, bdspac) {
// Sufficient workspace for a fast algorithm.
iu := 0
var ldworku int
if lwork >= wrkbl+lda*m {
ldworku = lda
} else {
ldworku = m
}
itau := iu + ldworku*m
iwork := itau + m
// Compute A = L*Q.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
// Copy L to work[iu:], zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
// Generate Q in A.
impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in work[iu:], copying result to U.
impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
// Generate right bidiagionalizing vectors in work[iu:].
impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
work[itaup:], work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U.
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of L in U and computing right singular vectors of
// L in work[iu:].
ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
// Multiply right singular vectors of L in work[iu:] by
// Q in A, storing result in VT.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
work[iu:], ldworku, a, lda, 0, vt, ldvt)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + m
// Compute A = L*Q, copying result to VT.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT.
impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
// Copy L to U, zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in U.
impl.Dgebrd(m, m, u, ldu, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Multiply right bidiagonalizing vectors in U by Q in VT.
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U.
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
u, ldu, work, 1, work[iwork:])
}
}
} else if wantva {
if wantun {
// Path 7t.
if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
// Sufficient workspace for a fast algorithm.
ir := 0
var ldworkr int
if lwork >= wrkbl+lda*m {
ldworkr = lda
} else {
ldworkr = m
}
itau := ir + ldworkr*m
iwork := itau + m
// Compute A = L*Q, copying result to VT.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Copy L to work[ir:], zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
// Generate Q in VT.
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in work[ir:].
impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
// Generate right bidiagonalizing vectors in work[ir:].
impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
work[itaup:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing right
// singular vectors of L in work[ir:].
ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
// Multiply right singular vectors of L in work[ir:] by
// Q in VT, storing result in A.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
work[ir:], ldworkr, vt, ldvt, 0, a, lda)
// Copy right singular vectors of A from A to VT.
impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + m
// Compute A = L * Q, copying result to VT.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT.
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
ie := itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Zero out above L in A.
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
// Bidiagonalize L in A.
impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
work[itaup:], work[iwork:], lwork-iwork)
// Multiply right bidiagonalizing vectors in A by Q in VT.
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing right singular
// vectors of A in VT.
ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
vt, ldvt, work, 1, work, 1, work[iwork:])
}
} else if wantuo {
panic(noSVDO)
} else if wantuas {
// Path 9t.
if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
// Sufficient workspace for a fast algorithm.
iu := 0
var ldworku int
if lwork >= wrkbl+lda*m {
ldworku = lda
} else {
ldworku = m
}
itau := iu + ldworku*m
iwork := itau + m
// Generate A = L * Q copying result to VT.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT.
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
// Copy L to work[iu:], zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
ie = itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in work[iu:], copying result to U.
impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
// Generate right bidiagonalizing vectors in work[iu:].
impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
work[itaup:], work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U.
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of L in U and computing right singular vectors
// of L in work[iu:].
ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
// Multiply right singular vectors of L in work[iu:]
// Q in VT, storing result in A.
bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
work[iu:], ldworku, vt, ldvt, 0, a, lda)
// Copy right singular vectors of A from A to VT.
impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
} else {
// Insufficient workspace for a fast algorithm.
itau := 0
iwork := itau + m
// Compute A = L * Q, copying result to VT.
impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
// Generate Q in VT.
impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
// Copy L to U, zeroing out above it.
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
ie = itau
itauq := ie + m
itaup := itauq + m
iwork = itaup + m
// Bidiagonalize L in U.
impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
work[itaup:], work[iwork:], lwork-iwork)
// Multiply right bidiagonalizing vectors in U by Q in VT.
impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
// Generate left bidiagonalizing vectors in U.
impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
iwork = ie + m
// Perform bidiagonal QR iteration, computing left singular
// vectors of A in U and computing right singular vectors
// of A in VT.
ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
}
}
}
} else {
// Path 10t.
// N at least M, but not much larger.
ie = 0
itauq := ie + m
itaup := itauq + m
iwork := itaup + m
// Bidiagonalize A.
impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
if wantuas {
// If left singular vectors desired in U, copy result to U and
// generate left bidiagonalizing vectors in U.
impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
}
if wantvas {
// If right singular vectors desired in VT, copy result to VT
// and generate right bidiagonalizing vectors in VT.
impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
var nrvt int
if wantva {
nrvt = n
} else {
nrvt = m
}
impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
}
if wantuo {
panic(noSVDO)
}
if wantvo {
panic(noSVDO)
}
iwork = ie + m
var nru, ncvt int
if wantuas || wantuo {
nru = m
}
if wantvas || wantvo {
ncvt = n
}
if !wantuo && !wantvo {
// Perform bidiagonal QR iteration, if desired, computing left
// singular vectors in U and computing right singular vectors in
// VT.
ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
vt, ldvt, u, ldu, work, 1, work[iwork:])
} else {
// There will be two branches when the implementation is complete.
panic(noSVDO)
}
}
}
if !ok {
if ie > 1 {
for i := 0; i < minmn-1; i++ {
work[i+1] = work[i+ie]
}
}
if ie < 1 {
for i := minmn - 2; i >= 0; i-- {
work[i+1] = work[i+ie]
}
}
}
// Undo scaling if necessary.
if iscl {
if anrm > bignum {
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
}
if !ok && anrm > bignum {
impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
}
if anrm < smlnum {
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
}
if !ok && anrm < smlnum {
impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
}
}
work[0] = float64(maxwrk)
return ok
}