mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-26 08:40:26 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			1362 lines
		
	
	
		
			43 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1362 lines
		
	
	
		
			43 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright ©2015 The gonum Authors. All rights reserved.
 | ||
| // Use of this source code is governed by a BSD-style
 | ||
| // license that can be found in the LICENSE file.
 | ||
| 
 | ||
| package native
 | ||
| 
 | ||
| import (
 | ||
| 	"math"
 | ||
| 
 | ||
| 	"github.com/gonum/blas"
 | ||
| 	"github.com/gonum/blas/blas64"
 | ||
| 	"github.com/gonum/lapack"
 | ||
| )
 | ||
| 
 | ||
| const noSVDO = "dgesvd: not coded for overwrite"
 | ||
| 
 | ||
| // Dgesvd computes the singular value decomposition of the input matrix A.
 | ||
| //
 | ||
| // The singular value decomposition is
 | ||
| //  A = U * Sigma * V^T
 | ||
| // where Sigma is an m×n diagonal matrix containing the singular values of A,
 | ||
| // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
 | ||
| // min(m,n) columns of U and V are the left and right singular vectors of A
 | ||
| // respectively.
 | ||
| //
 | ||
| // jobU and jobVT are options for computing the singular vectors. The behavior
 | ||
| // is as follows
 | ||
| //  jobU == lapack.SVDAll       All m columns of U are returned in u
 | ||
| //  jobU == lapack.SVDInPlace   The first min(m,n) columns are returned in u
 | ||
| //  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
 | ||
| //  jobU == lapack.SVDNone      The columns of U are not computed.
 | ||
| // The behavior is the same for jobVT and the rows of V^T. At most one of jobU
 | ||
| // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
 | ||
| //
 | ||
| // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
 | ||
| // the data is overwritten. On exit, A contains the appropriate singular vectors
 | ||
| // if either job is lapack.SVDOverwrite.
 | ||
| //
 | ||
| // s is a slice of length at least min(m,n) and on exit contains the singular
 | ||
| // values in decreasing order.
 | ||
| //
 | ||
| // u contains the left singular vectors on exit, stored column-wise. If
 | ||
| // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
 | ||
| // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
 | ||
| // not used.
 | ||
| //
 | ||
| // vt contains the left singular vectors on exit, stored row-wise. If
 | ||
| // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
 | ||
| // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
 | ||
| // not used.
 | ||
| //
 | ||
| // work is a slice for storing temporary memory, and lwork is the usable size of
 | ||
| // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
 | ||
| // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
 | ||
| // stored into work[0]. Dgesvd will panic if the working memory has insufficient
 | ||
| // storage.
 | ||
| //
 | ||
| // Dgesvd returns whether the decomposition successfully completed.
 | ||
| func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
 | ||
| 	minmn := min(m, n)
 | ||
| 	checkMatrix(m, n, a, lda)
 | ||
| 	if jobU == lapack.SVDAll {
 | ||
| 		checkMatrix(m, m, u, ldu)
 | ||
| 	} else if jobU == lapack.SVDInPlace {
 | ||
| 		checkMatrix(m, minmn, u, ldu)
 | ||
| 	}
 | ||
| 	if jobVT == lapack.SVDAll {
 | ||
| 		checkMatrix(n, n, vt, ldvt)
 | ||
| 	} else if jobVT == lapack.SVDInPlace {
 | ||
| 		checkMatrix(minmn, n, vt, ldvt)
 | ||
| 	}
 | ||
| 	if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
 | ||
| 		panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
 | ||
| 	}
 | ||
| 	if len(s) < minmn {
 | ||
| 		panic(badS)
 | ||
| 	}
 | ||
| 	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
 | ||
| 		panic(noSVDO)
 | ||
| 	}
 | ||
| 	if m == 0 || n == 0 {
 | ||
| 		return true
 | ||
| 	}
 | ||
| 
 | ||
| 	wantua := jobU == lapack.SVDAll
 | ||
| 	wantus := jobU == lapack.SVDInPlace
 | ||
| 	wantuas := wantua || wantus
 | ||
| 	wantuo := jobU == lapack.SVDOverwrite
 | ||
| 	wantun := jobU == lapack.None
 | ||
| 
 | ||
| 	wantva := jobVT == lapack.SVDAll
 | ||
| 	wantvs := jobVT == lapack.SVDInPlace
 | ||
| 	wantvas := wantva || wantvs
 | ||
| 	wantvo := jobVT == lapack.SVDOverwrite
 | ||
| 	wantvn := jobVT == lapack.None
 | ||
| 
 | ||
| 	bi := blas64.Implementation()
 | ||
| 	var mnthr int
 | ||
| 
 | ||
| 	// TODO(btracey): The netlib implementation checks have this at only length 1.
 | ||
| 	// Our implementation checks all input sizes before examining the l == -1 case.
 | ||
| 	// Fix the failing cases to reduce the needed memory here.
 | ||
| 	dum := make([]float64, m*n)
 | ||
| 
 | ||
| 	// Compute optimal space for subroutines.
 | ||
| 	maxwrk := 1
 | ||
| 	opts := string(jobU) + string(jobVT)
 | ||
| 	var wrkbl, bdspac int
 | ||
| 	if m >= n {
 | ||
| 		mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
 | ||
| 		bdspac = 5 * n
 | ||
| 		impl.Dgeqrf(m, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dgeqrf := int(dum[0])
 | ||
| 		impl.Dorgqr(m, n, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dorgqr_n := int(dum[0])
 | ||
| 		impl.Dorgqr(m, m, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dorgqr_m := int(dum[0])
 | ||
| 		impl.Dgebrd(n, n, a, lda, s, dum, dum, dum, dum, -1)
 | ||
| 		lwork_dgebrd := int(dum[0])
 | ||
| 		impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dorgbr_p := int(dum[0])
 | ||
| 		impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dorgbr_q := int(dum[0])
 | ||
| 
 | ||
| 		if m >= mnthr {
 | ||
| 			// m >> n
 | ||
| 			if wantun {
 | ||
| 				// Path 1
 | ||
| 				maxwrk = n + lwork_dgeqrf
 | ||
| 				maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
 | ||
| 				if wantvo || wantvas {
 | ||
| 					maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
 | ||
| 				}
 | ||
| 				maxwrk = max(maxwrk, bdspac)
 | ||
| 			} else if wantuo && wantvn {
 | ||
| 				// Path 2
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
 | ||
| 			} else if wantuo && wantvs {
 | ||
| 				// Path 3
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
 | ||
| 			} else if wantus && wantvn {
 | ||
| 				// Path 4
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = n*n + wrkbl
 | ||
| 			} else if wantus && wantvo {
 | ||
| 				// Path 5
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = 2*n*n + wrkbl
 | ||
| 			} else if wantus && wantvas {
 | ||
| 				// Path 6
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = n*n + wrkbl
 | ||
| 			} else if wantua && wantvn {
 | ||
| 				// Path 7
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = n*n + wrkbl
 | ||
| 			} else if wantua && wantvo {
 | ||
| 				// Path 8
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = 2*n*n + wrkbl
 | ||
| 			} else if wantua && wantvas {
 | ||
| 				// Path 9
 | ||
| 				wrkbl = n + lwork_dgeqrf
 | ||
| 				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = n*n + wrkbl
 | ||
| 			}
 | ||
| 		} else {
 | ||
| 			// Path 10: m > n
 | ||
| 			impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
 | ||
| 			lwork_dgebrd := int(dum[0])
 | ||
| 			maxwrk = 3*n + lwork_dgebrd
 | ||
| 			if wantus || wantuo {
 | ||
| 				impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, dum, dum, -1)
 | ||
| 				lwork_dorgbr_q = int(dum[0])
 | ||
| 				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
 | ||
| 			}
 | ||
| 			if wantua {
 | ||
| 				impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, dum, dum, -1)
 | ||
| 				lwork_dorgbr_q := int(dum[0])
 | ||
| 				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
 | ||
| 			}
 | ||
| 			if !wantvn {
 | ||
| 				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
 | ||
| 			}
 | ||
| 			maxwrk = max(maxwrk, bdspac)
 | ||
| 		}
 | ||
| 	} else {
 | ||
| 		mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
 | ||
| 
 | ||
| 		bdspac = 5 * m
 | ||
| 		impl.Dgelqf(m, n, a, lda, dum, dum, -1)
 | ||
| 		lwork_dgelqf := int(dum[0])
 | ||
| 		impl.Dorglq(n, n, m, dum, n, dum, dum, -1)
 | ||
| 		lwork_dorglq_n := int(dum[0])
 | ||
| 		impl.Dorglq(m, n, m, a, lda, dum, dum, -1)
 | ||
| 		lwork_dorglq_m := int(dum[0])
 | ||
| 		impl.Dgebrd(m, m, a, lda, s, dum, dum, dum, dum, -1)
 | ||
| 		lwork_dgebrd := int(dum[0])
 | ||
| 		impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, dum, dum, -1)
 | ||
| 		lwork_dorgbr_p := int(dum[0])
 | ||
| 		impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, dum, dum, -1)
 | ||
| 		lwork_dorgbr_q := int(dum[0])
 | ||
| 		if n >= mnthr {
 | ||
| 			// n >> m
 | ||
| 			if wantvn {
 | ||
| 				// Path 1t
 | ||
| 				maxwrk = m + lwork_dgelqf
 | ||
| 				maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
 | ||
| 				if wantuo || wantuas {
 | ||
| 					maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
 | ||
| 				}
 | ||
| 				maxwrk = max(maxwrk, bdspac)
 | ||
| 			} else if wantvo && wantun {
 | ||
| 				// Path 2t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_m)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
 | ||
| 			} else if wantvo && wantuas {
 | ||
| 				// Path 3t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_m)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
 | ||
| 			} else if wantvs && wantun {
 | ||
| 				// Path 4t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_m)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = m*m + wrkbl
 | ||
| 			} else if wantvs && wantuo {
 | ||
| 				// Path 5t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_m)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = 2*m*m + wrkbl
 | ||
| 			} else if wantvs && wantuas {
 | ||
| 				// Path 6t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_m)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = m*m + wrkbl
 | ||
| 			} else if wantva && wantun {
 | ||
| 				// Path 7t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_n)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = m*m + wrkbl
 | ||
| 			} else if wantva && wantuo {
 | ||
| 				// Path 8t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_n)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = 2*m*m + wrkbl
 | ||
| 			} else if wantva && wantuas {
 | ||
| 				// Path 9t
 | ||
| 				wrkbl = m + lwork_dgelqf
 | ||
| 				wrkbl = max(wrkbl, m+lwork_dorglq_n)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
 | ||
| 				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
 | ||
| 				wrkbl = max(wrkbl, bdspac)
 | ||
| 				maxwrk = m*m + wrkbl
 | ||
| 			}
 | ||
| 		} else {
 | ||
| 			// Path 10t, n > m
 | ||
| 			impl.Dgebrd(m, n, a, lda, s, dum, dum, dum, dum, -1)
 | ||
| 			lwork_dgebrd = int(dum[0])
 | ||
| 			maxwrk := 3*m + lwork_dgebrd
 | ||
| 			if wantvs || wantvo {
 | ||
| 				impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, dum, dum, -1)
 | ||
| 				lwork_dorgbr_p = int(dum[0])
 | ||
| 				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
 | ||
| 			}
 | ||
| 			if wantva {
 | ||
| 				impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, dum, dum, -1)
 | ||
| 				lwork_dorgbr_p = int(dum[0])
 | ||
| 				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
 | ||
| 			}
 | ||
| 			if !wantun {
 | ||
| 				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
 | ||
| 			}
 | ||
| 			maxwrk = max(maxwrk, bdspac)
 | ||
| 		}
 | ||
| 	}
 | ||
| 
 | ||
| 	minWork := max(1, 5*minmn)
 | ||
| 	if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) {
 | ||
| 		minWork = max(minWork, 3*minmn+max(m, n))
 | ||
| 	}
 | ||
| 
 | ||
| 	if lwork != -1 {
 | ||
| 		if len(work) < lwork {
 | ||
| 			panic(badWork)
 | ||
| 		}
 | ||
| 		if lwork < minWork {
 | ||
| 			panic(badWork)
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if m == 0 || n == 0 {
 | ||
| 		return true
 | ||
| 	}
 | ||
| 
 | ||
| 	maxwrk = max(maxwrk, minWork)
 | ||
| 	work[0] = float64(maxwrk)
 | ||
| 	if lwork == -1 {
 | ||
| 		return true
 | ||
| 	}
 | ||
| 
 | ||
| 	// Perform decomposition.
 | ||
| 	eps := dlamchE
 | ||
| 	smlnum := math.Sqrt(dlamchS) / eps
 | ||
| 	bignum := 1 / smlnum
 | ||
| 
 | ||
| 	// Scale A if max element outside range [smlnum, bignum].
 | ||
| 	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, dum)
 | ||
| 	var iscl bool
 | ||
| 	if anrm > 0 && anrm < smlnum {
 | ||
| 		iscl = true
 | ||
| 		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
 | ||
| 	} else if anrm > bignum {
 | ||
| 		iscl = true
 | ||
| 		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
 | ||
| 	}
 | ||
| 
 | ||
| 	var ie int
 | ||
| 	if m >= n {
 | ||
| 		// If A has sufficiently more rows than columns, use the QR decomposition.
 | ||
| 		if m >= mnthr {
 | ||
| 			// m >> n
 | ||
| 			if wantun {
 | ||
| 				// Path 1.
 | ||
| 				itau := 0
 | ||
| 				iwork := itau + n
 | ||
| 
 | ||
| 				// Compute A = Q * R.
 | ||
| 				impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 				// Zero out below R.
 | ||
| 				impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
 | ||
| 				ie = 0
 | ||
| 				itauq := ie + n
 | ||
| 				itaup := itauq + n
 | ||
| 				iwork = itaup + n
 | ||
| 				// Bidiagonalize R in A.
 | ||
| 				impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
 | ||
| 					work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 				ncvt := 0
 | ||
| 				if wantvo || wantvas {
 | ||
| 					// Generate P^T.
 | ||
| 					impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:],
 | ||
| 						work[iwork:], lwork-iwork)
 | ||
| 					ncvt = n
 | ||
| 				}
 | ||
| 				iwork = ie + n
 | ||
| 
 | ||
| 				// Perform bidiagonal QR iteration computing right singular vectors
 | ||
| 				// of A in A if desired.
 | ||
| 				ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
 | ||
| 					a, lda, dum, 1, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 				// If right singular vectors desired in VT, copy them there.
 | ||
| 				if wantvas {
 | ||
| 					impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
 | ||
| 				}
 | ||
| 			} else if wantuo && wantvn {
 | ||
| 				// Path 2
 | ||
| 				panic(noSVDO)
 | ||
| 			} else if wantuo && wantvas {
 | ||
| 				// Path 3
 | ||
| 				panic(noSVDO)
 | ||
| 			} else if wantus {
 | ||
| 				if wantvn {
 | ||
| 					// Path 4
 | ||
| 					if lwork >= n*n+max(4*n, bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						ir := 0
 | ||
| 						var ldworkr int
 | ||
| 						if lwork >= wrkbl+lda*n {
 | ||
| 							ldworkr = lda
 | ||
| 						} else {
 | ||
| 							ldworkr = n
 | ||
| 						}
 | ||
| 						itau := ir + ldworkr*n
 | ||
| 						iwork := itau + n
 | ||
| 						// Compute A = Q * R.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy R to work[ir:], zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
 | ||
| 
 | ||
| 						// Generate Q in A.
 | ||
| 						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in work[ir:].
 | ||
| 						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left vectors bidiagonalizing R in work[ir:].
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
 | ||
| 							work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, compuing left singular
 | ||
| 						// vectors of R in work[ir:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], dum, 1,
 | ||
| 							work[ir:], ldworkr, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply Q in A by left singular vectors of R in
 | ||
| 						// work[ir:], storing result in U.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
 | ||
| 							work[ir:], ldworkr, 0, u, ldu)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q*R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Zero out below R in A.
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
 | ||
| 
 | ||
| 						// Bidiagonalize R in A.
 | ||
| 						impl.Dgebrd(n, n, a, lda, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply Q in U by left vectors bidiagonalizing R.
 | ||
| 						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
 | ||
| 							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left
 | ||
| 						// singular vectors of A in U.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], dum, 1,
 | ||
| 							u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				} else if wantvo {
 | ||
| 					// Path 5
 | ||
| 					panic(noSVDO)
 | ||
| 				} else if wantvas {
 | ||
| 					// Path 6
 | ||
| 					if lwork >= n*n+max(4*n, bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						iu := 0
 | ||
| 						var ldworku int
 | ||
| 						if lwork >= wrkbl+lda*n {
 | ||
| 							ldworku = lda
 | ||
| 						} else {
 | ||
| 							ldworku = n
 | ||
| 						}
 | ||
| 						itau := iu + ldworku*n
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q * R.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						// Copy R to work[iu:], zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
 | ||
| 
 | ||
| 						// Generate Q in A.
 | ||
| 						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in work[iu:], copying result to VT.
 | ||
| 						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in work[iu:].
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
 | ||
| 							work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right bidiagonalizing vectors in VT.
 | ||
| 						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of R in work[iu:], and computing right singular
 | ||
| 						// vectors of R in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
 | ||
| 							vt, ldvt, work[iu:], ldworku, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply Q in A by left singular vectors of R in
 | ||
| 						// work[iu:], storing result in U.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
 | ||
| 							work[iu:], ldworku, 0, u, ldu)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q * R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy R to VT, zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in VT.
 | ||
| 						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply Q in U by left bidiagonalizing vectors in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
 | ||
| 							vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right bidiagonalizing vectors in VT.
 | ||
| 						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of A in U and computing right singular vectors
 | ||
| 						// of A in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
 | ||
| 							vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				}
 | ||
| 			} else if wantua {
 | ||
| 				if wantvn {
 | ||
| 					// Path 7
 | ||
| 					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						ir := 0
 | ||
| 						var ldworkr int
 | ||
| 						if lwork >= wrkbl+lda*n {
 | ||
| 							ldworkr = lda
 | ||
| 						} else {
 | ||
| 							ldworkr = n
 | ||
| 						}
 | ||
| 						itau := ir + ldworkr*n
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q*R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Copy R to work[ir:], zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in work[ir:].
 | ||
| 						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in work[ir:].
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
 | ||
| 							work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of R in work[ir:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], dum, 1,
 | ||
| 							work[ir:], ldworkr, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply Q in U by left singular vectors of R in
 | ||
| 						// work[ir:], storing result in A.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
 | ||
| 							work[ir:], ldworkr, 0, a, lda)
 | ||
| 
 | ||
| 						// Copy left singular vectors of A from A to U.
 | ||
| 						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q*R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Zero out below R in A.
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
 | ||
| 
 | ||
| 						// Bidiagonalize R in A.
 | ||
| 						impl.Dgebrd(n, n, a, lda, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply Q in U by left bidiagonalizing vectors in A.
 | ||
| 						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
 | ||
| 							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left
 | ||
| 						// singular vectors of A in U.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
 | ||
| 							dum, 1, u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				} else if wantvo {
 | ||
| 					// Path 8.
 | ||
| 					panic(noSVDO)
 | ||
| 				} else if wantvas {
 | ||
| 					// Path 9.
 | ||
| 					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						iu := 0
 | ||
| 						var ldworku int
 | ||
| 						if lwork >= wrkbl+lda*n {
 | ||
| 							ldworku = lda
 | ||
| 						} else {
 | ||
| 							ldworku = n
 | ||
| 						}
 | ||
| 						itau := iu + ldworku*n
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q * R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy R to work[iu:], zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
 | ||
| 
 | ||
| 						ie = itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in work[iu:], copying result to VT.
 | ||
| 						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in work[iu:].
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
 | ||
| 							work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right bidiagonalizing vectors in VT.
 | ||
| 						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of R in work[iu:] and computing right
 | ||
| 						// singular vectors of R in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
 | ||
| 							vt, ldvt, work[iu:], ldworku, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply Q in U by left singular vectors of R in
 | ||
| 						// work[iu:], storing result in A.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
 | ||
| 							u, ldu, work[iu:], ldworku, 0, a, lda)
 | ||
| 
 | ||
| 						// Copy left singular vectors of A from A to U.
 | ||
| 						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						/*
 | ||
| 							// Bidiagonalize R in VT.
 | ||
| 							impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
 | ||
| 								work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 							// Multiply Q in U by left bidiagonalizing vectors in VT.
 | ||
| 							impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
 | ||
| 								m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 							// Generate right bidiagonalizing vectors in VT.
 | ||
| 							impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
 | ||
| 								work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 							iwork = ie + n
 | ||
| 
 | ||
| 							// Perform bidiagonal QR iteration, computing left singular
 | ||
| 							// vectors of A in U and computing right singular vectors
 | ||
| 							// of A in VT.
 | ||
| 							ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
 | ||
| 								vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 						*/
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + n
 | ||
| 
 | ||
| 						// Compute A = Q*R, copying result to U.
 | ||
| 						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 
 | ||
| 						// Generate Q in U.
 | ||
| 						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy R from A to VT, zeroing out below it.
 | ||
| 						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
 | ||
| 						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + n
 | ||
| 						itaup := itauq + n
 | ||
| 						iwork = itaup + n
 | ||
| 
 | ||
| 						// Bidiagonalize R in VT.
 | ||
| 						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply Q in U by left bidiagonalizing vectors in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
 | ||
| 							m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right bidiagonizing vectors in VT.
 | ||
| 						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + n
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of A in U and computing right singular vectors
 | ||
| 						// of A in VT.
 | ||
| 						impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
 | ||
| 							vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				}
 | ||
| 			}
 | ||
| 		} else {
 | ||
| 			// Path 10.
 | ||
| 			// M at least N, but not much larger.
 | ||
| 			ie = 0
 | ||
| 			itauq := ie + n
 | ||
| 			itaup := itauq + n
 | ||
| 			iwork := itaup + n
 | ||
| 
 | ||
| 			// Bidiagonalize A.
 | ||
| 			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
 | ||
| 				work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 			if wantuas {
 | ||
| 				// Left singular vectors are desired in U. Copy result to U and
 | ||
| 				// generate left biadiagonalizing vectors in U.
 | ||
| 				impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
 | ||
| 				var ncu int
 | ||
| 				if wantus {
 | ||
| 					ncu = n
 | ||
| 				}
 | ||
| 				if wantua {
 | ||
| 					ncu = m
 | ||
| 				}
 | ||
| 				impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 			}
 | ||
| 			if wantvas {
 | ||
| 				// Right singular vectors are desired in VT. Copy result to VT and
 | ||
| 				// generate left biadiagonalizing vectors in VT.
 | ||
| 				impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
 | ||
| 				impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 			}
 | ||
| 			if wantuo {
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 			if wantvo {
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 			iwork = ie + n
 | ||
| 			var nru, ncvt int
 | ||
| 			if wantuas || wantuo {
 | ||
| 				nru = m
 | ||
| 			}
 | ||
| 			if wantun {
 | ||
| 				nru = 0
 | ||
| 			}
 | ||
| 			if wantvas || wantvo {
 | ||
| 				ncvt = n
 | ||
| 			}
 | ||
| 			if wantvn {
 | ||
| 				ncvt = 0
 | ||
| 			}
 | ||
| 			if !wantuo && !wantvo {
 | ||
| 				// Perform bidiagonal QR iteration, if desired, computing left
 | ||
| 				// singular vectors in U and right singular vectors in VT.
 | ||
| 				ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
 | ||
| 					vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 			} else {
 | ||
| 				// There will be two branches when the implementation is complete.
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 		}
 | ||
| 	} else {
 | ||
| 		// A has more columns than rows. If A has sufficiently more columns than
 | ||
| 		// rows, first reduce using the LQ decomposition.
 | ||
| 		if n >= mnthr {
 | ||
| 			// n >> m.
 | ||
| 			if wantvn {
 | ||
| 				// Path 1t.
 | ||
| 				itau := 0
 | ||
| 				iwork := itau + m
 | ||
| 
 | ||
| 				// Compute A = L*Q.
 | ||
| 				impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 				// Zero out above L.
 | ||
| 				impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
 | ||
| 				ie := 0
 | ||
| 				itauq := ie + m
 | ||
| 				itaup := itauq + m
 | ||
| 				iwork = itaup + m
 | ||
| 
 | ||
| 				// Bidiagonalize L in A.
 | ||
| 				impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
 | ||
| 					work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
 | ||
| 				if wantuo || wantuas {
 | ||
| 					impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda,
 | ||
| 						work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 				}
 | ||
| 				iwork = ie + m
 | ||
| 				nru := 0
 | ||
| 				if wantuo || wantuas {
 | ||
| 					nru = m
 | ||
| 				}
 | ||
| 
 | ||
| 				// Perform bidiagonal QR iteration, computing left singular vectors
 | ||
| 				// of A in A if desired.
 | ||
| 				ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
 | ||
| 					dum, 1, a, lda, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 				// If left singular vectors desired in U, copy them there.
 | ||
| 				if wantuas {
 | ||
| 					impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
 | ||
| 				}
 | ||
| 			} else if wantvo && wantun {
 | ||
| 				// Path 2t.
 | ||
| 				panic(noSVDO)
 | ||
| 			} else if wantvo && wantuas {
 | ||
| 				// Path 3t.
 | ||
| 				panic(noSVDO)
 | ||
| 			} else if wantvs {
 | ||
| 				if wantun {
 | ||
| 					// Path 4t.
 | ||
| 					if lwork >= m*m+max(4*m, bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						ir := 0
 | ||
| 						var ldworkr int
 | ||
| 						if lwork >= wrkbl+lda*m {
 | ||
| 							ldworkr = lda
 | ||
| 						} else {
 | ||
| 							ldworkr = m
 | ||
| 						}
 | ||
| 						itau := ir + ldworkr*m
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L*Q.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy L to work[ir:], zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
 | ||
| 
 | ||
| 						// Generate Q in A.
 | ||
| 						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in work[ir:].
 | ||
| 						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right vectors bidiagonalizing L in work[ir:].
 | ||
| 						impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing right singular
 | ||
| 						// vectors of L in work[ir:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
 | ||
| 							work[ir:], ldworkr, dum, 1, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply right singular vectors of L in work[ir:] by
 | ||
| 						// Q in A, storing result in VT.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
 | ||
| 							work[ir:], ldworkr, a, lda, 0, vt, ldvt)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L*Q.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy result to VT.
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Zero out above L in A.
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
 | ||
| 
 | ||
| 						// Bidiagonalize L in A.
 | ||
| 						impl.Dgebrd(m, m, a, lda, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply right vectors bidiagonalizing L by Q in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
 | ||
| 							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing right
 | ||
| 						// singular vectors of A in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
 | ||
| 							vt, ldvt, dum, 1, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				} else if wantuo {
 | ||
| 					// Path 5t.
 | ||
| 					panic(noSVDO)
 | ||
| 				} else if wantuas {
 | ||
| 					// Path 6t.
 | ||
| 					if lwork >= m*m+max(4*m, bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						iu := 0
 | ||
| 						var ldworku int
 | ||
| 						if lwork >= wrkbl+lda*m {
 | ||
| 							ldworku = lda
 | ||
| 						} else {
 | ||
| 							ldworku = m
 | ||
| 						}
 | ||
| 						itau := iu + ldworku*m
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L*Q.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy L to work[iu:], zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
 | ||
| 
 | ||
| 						// Generate Q in A.
 | ||
| 						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in work[iu:], copying result to U.
 | ||
| 						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
 | ||
| 
 | ||
| 						// Generate right bidiagionalizing vectors in work[iu:].
 | ||
| 						impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in U.
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of L in U and computing right singular vectors of
 | ||
| 						// L in work[iu:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
 | ||
| 							work[iu:], ldworku, u, ldu, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply right singular vectors of L in work[iu:] by
 | ||
| 						// Q in A, storing result in VT.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
 | ||
| 							work[iu:], ldworku, a, lda, 0, vt, ldvt)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L*Q, copying result to VT.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy L to U, zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in U.
 | ||
| 						impl.Dgebrd(m, m, u, ldu, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply right bidiagonalizing vectors in U by Q in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
 | ||
| 							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in U.
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of A in U and computing right singular vectors
 | ||
| 						// of A in VT.
 | ||
| 						impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
 | ||
| 							u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				}
 | ||
| 			} else if wantva {
 | ||
| 				if wantun {
 | ||
| 					// Path 7t.
 | ||
| 					if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						ir := 0
 | ||
| 						var ldworkr int
 | ||
| 						if lwork >= wrkbl+lda*m {
 | ||
| 							ldworkr = lda
 | ||
| 						} else {
 | ||
| 							ldworkr = m
 | ||
| 						}
 | ||
| 						itau := ir + ldworkr*m
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L*Q, copying result to VT.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Copy L to work[ir:], zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in work[ir:].
 | ||
| 						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate right bidiagonalizing vectors in work[ir:].
 | ||
| 						impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing right
 | ||
| 						// singular vectors of L in work[ir:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
 | ||
| 							work[ir:], ldworkr, dum, 1, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply right singular vectors of L in work[ir:] by
 | ||
| 						// Q in VT, storing result in A.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
 | ||
| 							work[ir:], ldworkr, vt, ldvt, 0, a, lda)
 | ||
| 
 | ||
| 						// Copy right singular vectors of A from A to VT.
 | ||
| 						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + m
 | ||
| 						// Compute A = L * Q, copying result to VT.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						ie := itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Zero out above L in A.
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
 | ||
| 
 | ||
| 						// Bidiagonalize L in A.
 | ||
| 						impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply right bidiagonalizing vectors in A by Q in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
 | ||
| 							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing right singular
 | ||
| 						// vectors of A in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
 | ||
| 							vt, ldvt, dum, 1, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				} else if wantuo {
 | ||
| 					panic(noSVDO)
 | ||
| 				} else if wantuas {
 | ||
| 					// Path 9t.
 | ||
| 					if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
 | ||
| 						// Sufficient workspace for a fast algorithm.
 | ||
| 						iu := 0
 | ||
| 
 | ||
| 						var ldworku int
 | ||
| 						if lwork >= wrkbl+lda*m {
 | ||
| 							ldworku = lda
 | ||
| 						} else {
 | ||
| 							ldworku = m
 | ||
| 						}
 | ||
| 						itau := iu + ldworku*m
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Generate A = L * Q copying result to VT.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy L to work[iu:], zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
 | ||
| 						ie = itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in work[iu:], copying result to U.
 | ||
| 						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
 | ||
| 							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
 | ||
| 
 | ||
| 						// Generate right bidiagonalizing vectors in work[iu:].
 | ||
| 						impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in U.
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of L in U and computing right singular vectors
 | ||
| 						// of L in work[iu:].
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
 | ||
| 							work[iu:], ldworku, u, ldu, dum, 1, work[iwork:])
 | ||
| 
 | ||
| 						// Multiply right singular vectors of L in work[iu:]
 | ||
| 						// Q in VT, storing result in A.
 | ||
| 						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
 | ||
| 							work[iu:], ldworku, vt, ldvt, 0, a, lda)
 | ||
| 
 | ||
| 						// Copy right singular vectors of A from A to VT.
 | ||
| 						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
 | ||
| 					} else {
 | ||
| 						// Insufficient workspace for a fast algorithm.
 | ||
| 						itau := 0
 | ||
| 						iwork := itau + m
 | ||
| 
 | ||
| 						// Compute A = L * Q, copying result to VT.
 | ||
| 						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 
 | ||
| 						// Generate Q in VT.
 | ||
| 						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Copy L to U, zeroing out above it.
 | ||
| 						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
 | ||
| 						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
 | ||
| 
 | ||
| 						ie = itau
 | ||
| 						itauq := ie + m
 | ||
| 						itaup := itauq + m
 | ||
| 						iwork = itaup + m
 | ||
| 
 | ||
| 						// Bidiagonalize L in U.
 | ||
| 						impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
 | ||
| 							work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Multiply right bidiagonalizing vectors in U by Q in VT.
 | ||
| 						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
 | ||
| 							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
 | ||
| 
 | ||
| 						// Generate left bidiagonalizing vectors in U.
 | ||
| 						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 						iwork = ie + m
 | ||
| 
 | ||
| 						// Perform bidiagonal QR iteration, computing left singular
 | ||
| 						// vectors of A in U and computing right singular vectors
 | ||
| 						// of A in VT.
 | ||
| 						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
 | ||
| 							vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 					}
 | ||
| 				}
 | ||
| 			}
 | ||
| 		} else {
 | ||
| 			// Path 10t.
 | ||
| 			// N at least M, but not much larger.
 | ||
| 			ie = 0
 | ||
| 			itauq := ie + m
 | ||
| 			itaup := itauq + m
 | ||
| 			iwork := itaup + m
 | ||
| 
 | ||
| 			// Bidiagonalize A.
 | ||
| 			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 			if wantuas {
 | ||
| 				// If left singular vectors desired in U, copy result to U and
 | ||
| 				// generate left bidiagonalizing vectors in U.
 | ||
| 				impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
 | ||
| 				impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
 | ||
| 			}
 | ||
| 			if wantvas {
 | ||
| 				// If right singular vectors desired in VT, copy result to VT
 | ||
| 				// and generate right bidiagonalizing vectors in VT.
 | ||
| 				impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
 | ||
| 				var nrvt int
 | ||
| 				if wantva {
 | ||
| 					nrvt = n
 | ||
| 				} else {
 | ||
| 					nrvt = m
 | ||
| 				}
 | ||
| 				impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
 | ||
| 			}
 | ||
| 			if wantuo {
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 			if wantvo {
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 			iwork = ie + m
 | ||
| 			var nru, ncvt int
 | ||
| 			if wantuas || wantuo {
 | ||
| 				nru = m
 | ||
| 			}
 | ||
| 			if wantvas || wantvo {
 | ||
| 				ncvt = n
 | ||
| 			}
 | ||
| 			if !wantuo && !wantvo {
 | ||
| 				// Perform bidiagonal QR iteration, if desired, computing left
 | ||
| 				// singular vectors in U and computing right singular vectors in
 | ||
| 				// VT.
 | ||
| 				ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
 | ||
| 					vt, ldvt, u, ldu, dum, 1, work[iwork:])
 | ||
| 			} else {
 | ||
| 				// There will be two branches when the implementation is complete.
 | ||
| 				panic(noSVDO)
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if !ok {
 | ||
| 		if ie > 1 {
 | ||
| 			for i := 0; i < minmn-1; i++ {
 | ||
| 				work[i+1] = work[i+ie]
 | ||
| 			}
 | ||
| 		}
 | ||
| 		if ie < 1 {
 | ||
| 			for i := minmn - 2; i >= 0; i-- {
 | ||
| 				work[i+1] = work[i+ie]
 | ||
| 			}
 | ||
| 		}
 | ||
| 	}
 | ||
| 	// Undo scaling if necessary.
 | ||
| 	if iscl {
 | ||
| 		if anrm > bignum {
 | ||
| 			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
 | ||
| 		}
 | ||
| 		if !ok && anrm > bignum {
 | ||
| 			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
 | ||
| 		}
 | ||
| 		if anrm < smlnum {
 | ||
| 			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
 | ||
| 		}
 | ||
| 		if !ok && anrm < smlnum {
 | ||
| 			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
 | ||
| 		}
 | ||
| 	}
 | ||
| 	work[0] = float64(maxwrk)
 | ||
| 	return ok
 | ||
| }
 | 
