mirror of
https://github.com/gonum/gonum.git
synced 2025-10-28 09:31:40 +08:00
so that it is possible to pass all slices as nil to those functions. A workspace query should depend only on the input sizes, the matrices and vectors must not be accessed anyway or may not be available at the call site. Moreover, often the reference LAPACK uses arbitrary placeholders for arrays and relies on the fact that no checking is done.
383 lines
11 KiB
Go
383 lines
11 KiB
Go
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package native
|
||
|
||
import (
|
||
"math"
|
||
|
||
"github.com/gonum/blas"
|
||
"github.com/gonum/blas/blas64"
|
||
"github.com/gonum/lapack"
|
||
)
|
||
|
||
// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper
|
||
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
|
||
// trailing principal submatrix using aggressive early deflation [1].
|
||
//
|
||
// On return, H will be overwritten by a new Hessenberg matrix that is a
|
||
// perturbation of an orthogonal similarity transformation of H. It is hoped
|
||
// that on output H will have many zero subdiagonal entries.
|
||
//
|
||
// If wantt is true, the matrix H will be fully updated so that the
|
||
// quasi-triangular Schur factor can be computed. If wantt is false, then only
|
||
// enough of H will be updated to preserve the eigenvalues.
|
||
//
|
||
// If wantz is true, the orthogonal similarity transformation will be
|
||
// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
|
||
//
|
||
// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
|
||
// of H. It must hold that
|
||
// 0 <= ilo <= ihi < n, if n > 0,
|
||
// ilo == 0 and ihi == -1, if n == 0,
|
||
// and the block must be isolated, that is, it must hold that
|
||
// ktop == 0 or H[ktop,ktop-1] == 0,
|
||
// kbot == n-1 or H[kbot+1,kbot] == 0,
|
||
// otherwise Dlaqr2 will panic.
|
||
//
|
||
// nw is the deflation window size. It must hold that
|
||
// 0 <= nw <= kbot-ktop+1,
|
||
// otherwise Dlaqr2 will panic.
|
||
//
|
||
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
|
||
// will be applied if wantz is true. It must hold that
|
||
// 0 <= iloz <= ktop, and kbot <= ihiz < n,
|
||
// otherwise Dlaqr2 will panic.
|
||
//
|
||
// sr and si must have length kbot+1, otherwise Dlaqr2 will panic.
|
||
//
|
||
// v and ldv represent an nw×nw work matrix.
|
||
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
|
||
// wv and ldwv represent an nv×nw work matrix.
|
||
//
|
||
// work must have length at least lwork and lwork must be at least max(1,2*nw),
|
||
// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater
|
||
// efficiency. On return, work[0] will contain the optimal value of lwork.
|
||
//
|
||
// If lwork is -1, instead of performing Dlaqr2, the function only estimates the
|
||
// optimal workspace size and stores it into work[0]. Neither h nor z are
|
||
// accessed.
|
||
//
|
||
// On return, ns and nd will contain respectively the number of unconverged
|
||
// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
|
||
// sr and si.
|
||
//
|
||
// On return, the real and imaginary parts of approximate eigenvalues that may
|
||
// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
|
||
// and si[kbot-nd-ns+1:kbot-nd+1].
|
||
//
|
||
// On return, the real and imaginary parts of converged eigenvalues will be
|
||
// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
|
||
//
|
||
// References:
|
||
// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
|
||
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
|
||
// URL: http://dx.doi.org/10.1137/S0895479801384585
|
||
//
|
||
func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) {
|
||
switch {
|
||
case ktop < 0 || max(0, n-1) < ktop:
|
||
panic("lapack: invalid value of ktop")
|
||
case kbot < min(ktop, n-1) || n <= kbot:
|
||
panic("lapack: invalid value of kbot")
|
||
case nw < 0 || kbot-ktop+1 < nw:
|
||
panic("lapack: invalid value of nw")
|
||
case nh < nw:
|
||
panic("lapack: invalid value of nh")
|
||
case lwork < max(1, 2*nw) && lwork != -1:
|
||
panic(badWork)
|
||
case len(work) < lwork:
|
||
panic(shortWork)
|
||
}
|
||
if wantz {
|
||
switch {
|
||
case iloz < 0 || ktop < iloz:
|
||
panic("lapack: invalid value of iloz")
|
||
case ihiz < kbot || n <= ihiz:
|
||
panic("lapack: invalid value of ihiz")
|
||
}
|
||
}
|
||
|
||
// LAPACK code does not enforce the documented behavior
|
||
// nw <= kbot-ktop+1
|
||
// but we do (we panic above).
|
||
jw := nw
|
||
lwkopt := max(1, 2*nw)
|
||
if jw > 2 {
|
||
// Workspace query call to Dgehrd.
|
||
impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1)
|
||
lwk1 := int(work[0])
|
||
// Workspace query call to Dormhr.
|
||
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
|
||
lwk2 := int(work[0])
|
||
// Optimal workspace.
|
||
lwkopt = jw + max(lwk1, lwk2)
|
||
}
|
||
|
||
// Quick return in case of workspace query.
|
||
if lwork == -1 {
|
||
work[0] = float64(lwkopt)
|
||
return 0, 0
|
||
}
|
||
|
||
// Check input slices only after workspace query has been handled.
|
||
checkMatrix(n, n, h, ldh)
|
||
checkMatrix(nw, nw, v, ldv)
|
||
checkMatrix(nw, nh, t, ldt)
|
||
checkMatrix(nv, nw, wv, ldwv)
|
||
if wantz {
|
||
checkMatrix(n, n, z, ldz)
|
||
}
|
||
switch {
|
||
case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
|
||
panic("lapack: block not isolated")
|
||
case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
|
||
panic("lapack: block not isolated")
|
||
case len(sr) != kbot+1:
|
||
panic("lapack: bad length of sr")
|
||
case len(si) != kbot+1:
|
||
panic("lapack: bad length of si")
|
||
}
|
||
|
||
if nw == 0 {
|
||
return 0, 0
|
||
}
|
||
|
||
// Machine constants.
|
||
ulp := dlamchP
|
||
smlnum := float64(n) / ulp * dlamchS
|
||
|
||
// Setup deflation window.
|
||
var s float64
|
||
kwtop := kbot - jw + 1
|
||
if kwtop != ktop {
|
||
s = h[kwtop*ldh+kwtop-1]
|
||
}
|
||
if kwtop == kbot {
|
||
// 1×1 deflation window.
|
||
sr[kwtop] = h[kwtop*ldh+kwtop]
|
||
si[kwtop] = 0
|
||
ns = 1
|
||
nd = 0
|
||
if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
|
||
ns = 0
|
||
nd = 1
|
||
if kwtop > ktop {
|
||
h[kwtop*ldh+kwtop-1] = 0
|
||
}
|
||
}
|
||
work[0] = 1
|
||
return ns, nd
|
||
}
|
||
|
||
// Convert to spike-triangular form. In case of a rare QR failure, this
|
||
// routine continues to do aggressive early deflation using that part of
|
||
// the deflation window that converged using infqr here and there to
|
||
// keep track.
|
||
impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
|
||
bi := blas64.Implementation()
|
||
bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
|
||
impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
|
||
infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
|
||
// Note that ilo == 0 which conveniently coincides with the success
|
||
// value of infqr, that is, infqr as an index always points to the first
|
||
// converged eigenvalue.
|
||
|
||
// Dtrexc needs a clean margin near the diagonal.
|
||
for j := 0; j < jw-3; j++ {
|
||
t[(j+2)*ldt+j] = 0
|
||
t[(j+3)*ldt+j] = 0
|
||
}
|
||
if jw >= 3 {
|
||
t[(jw-1)*ldt+jw-3] = 0
|
||
}
|
||
|
||
ns = jw
|
||
ilst := infqr
|
||
// Deflation detection loop.
|
||
for ilst < ns {
|
||
bulge := false
|
||
if ns >= 2 {
|
||
bulge = t[(ns-1)*ldt+ns-2] != 0
|
||
}
|
||
if !bulge {
|
||
// Real eigenvalue.
|
||
abst := math.Abs(t[(ns-1)*ldt+ns-1])
|
||
if abst == 0 {
|
||
abst = math.Abs(s)
|
||
}
|
||
if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
|
||
// Deflatable.
|
||
ns--
|
||
} else {
|
||
// Undeflatable, move it up out of the way.
|
||
// Dtrexc can not fail in this case.
|
||
_, ilst, _ = impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, ns-1, ilst, work)
|
||
ilst++
|
||
}
|
||
continue
|
||
}
|
||
// Complex conjugate pair.
|
||
abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
|
||
if abst == 0 {
|
||
abst = math.Abs(s)
|
||
}
|
||
if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
|
||
// Deflatable.
|
||
ns -= 2
|
||
} else {
|
||
// Undeflatable, move them up out of the way.
|
||
// Dtrexc does the right thing with ilst in case of a
|
||
// rare exchange failure.
|
||
_, ilst, _ = impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, ns-1, ilst, work)
|
||
ilst += 2
|
||
}
|
||
}
|
||
|
||
// Return to Hessenberg form.
|
||
if ns == 0 {
|
||
s = 0
|
||
}
|
||
if ns < jw {
|
||
// Sorting diagonal blocks of T improves accuracy for graded
|
||
// matrices. Bubble sort deals well with exchange failures.
|
||
sorted := false
|
||
i := ns
|
||
for !sorted {
|
||
sorted = true
|
||
kend := i - 1
|
||
i = infqr
|
||
var k int
|
||
if i == ns-1 || t[(i+1)*ldt+i] == 0 {
|
||
k = i + 1
|
||
} else {
|
||
k = i + 2
|
||
}
|
||
for k <= kend {
|
||
var evi float64
|
||
if k == i+1 {
|
||
evi = math.Abs(t[i*ldt+i])
|
||
} else {
|
||
evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
|
||
}
|
||
|
||
var evk float64
|
||
if k == kend || t[(k+1)*ldt+k] == 0 {
|
||
evk = math.Abs(t[k*ldt+k])
|
||
} else {
|
||
evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
|
||
}
|
||
|
||
if evi >= evk {
|
||
i = k
|
||
} else {
|
||
sorted = false
|
||
_, ilst, ok := impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, i, k, work)
|
||
if ok {
|
||
i = ilst
|
||
} else {
|
||
i = k
|
||
}
|
||
}
|
||
if i == kend || t[(i+1)*ldt+i] == 0 {
|
||
k = i + 1
|
||
} else {
|
||
k = i + 2
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Restore shift/eigenvalue array from T.
|
||
for i := jw - 1; i >= infqr; {
|
||
if i == infqr || t[i*ldt+i-1] == 0 {
|
||
sr[kwtop+i] = t[i*ldt+i]
|
||
si[kwtop+i] = 0
|
||
i--
|
||
continue
|
||
}
|
||
aa := t[(i-1)*ldt+i-1]
|
||
bb := t[(i-1)*ldt+i]
|
||
cc := t[i*ldt+i-1]
|
||
dd := t[i*ldt+i]
|
||
_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
|
||
i -= 2
|
||
}
|
||
|
||
if ns < jw || s == 0 {
|
||
if ns > 1 && s != 0 {
|
||
// Reflect spike back into lower triangle.
|
||
bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
|
||
_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
|
||
work[0] = 1
|
||
impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
|
||
impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
|
||
impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
|
||
impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
|
||
impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
|
||
}
|
||
|
||
// Copy updated reduced window into place.
|
||
if kwtop > 0 {
|
||
h[kwtop*ldh+kwtop-1] = s * v[0]
|
||
}
|
||
impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
|
||
bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
|
||
|
||
// Accumulate orthogonal matrix in order to update H and Z, if
|
||
// requested.
|
||
if ns > 1 && s != 0 {
|
||
// work[:ns-1] contains the elementary reflectors stored
|
||
// by a call to Dgehrd above.
|
||
impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
|
||
t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
|
||
}
|
||
|
||
// Update vertical slab in H.
|
||
var ltop int
|
||
if !wantt {
|
||
ltop = ktop
|
||
}
|
||
for krow := ltop; krow < kwtop; krow += nv {
|
||
kln := min(nv, kwtop-krow)
|
||
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
|
||
1, h[krow*ldh+kwtop:], ldh, v, ldv,
|
||
0, wv, ldwv)
|
||
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
|
||
}
|
||
|
||
// Update horizontal slab in H.
|
||
if wantt {
|
||
for kcol := kbot + 1; kcol < n; kcol += nh {
|
||
kln := min(nh, n-kcol)
|
||
bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
|
||
1, v, ldv, h[kwtop*ldh+kcol:], ldh,
|
||
0, t, ldt)
|
||
impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
|
||
}
|
||
}
|
||
|
||
// Update vertical slab in Z.
|
||
if wantz {
|
||
for krow := iloz; krow <= ihiz; krow += nv {
|
||
kln := min(nv, ihiz-krow+1)
|
||
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
|
||
1, z[krow*ldz+kwtop:], ldz, v, ldv,
|
||
0, wv, ldwv)
|
||
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
|
||
}
|
||
}
|
||
}
|
||
|
||
// The number of deflations.
|
||
nd = jw - ns
|
||
// Shifts are converged eigenvalues that could not be deflated.
|
||
// Subtracting infqr from the spike length takes care of the case of a
|
||
// rare QR failure while calculating eigenvalues of the deflation
|
||
// window.
|
||
ns -= infqr
|
||
work[0] = float64(lwkopt)
|
||
return ns, nd
|
||
}
|