mirror of
https://github.com/gonum/gonum.git
synced 2025-10-05 07:06:54 +08:00
476 lines
16 KiB
Go
476 lines
16 KiB
Go
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package native
|
||
|
||
import (
|
||
"math"
|
||
|
||
"gonum.org/v1/gonum/blas"
|
||
)
|
||
|
||
// Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
|
||
// H, and optionally the matrices T and Z from the Schur decomposition
|
||
// H = Z T Z^T
|
||
// where T is an upper quasi-triangular matrix (the Schur form), and Z is the
|
||
// orthogonal matrix of Schur vectors.
|
||
//
|
||
// wantt indicates whether the full Schur form T is required. If wantt is false,
|
||
// then only enough of H will be updated to preserve the eigenvalues.
|
||
//
|
||
// wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
|
||
// is true, the orthogonal similarity transformation will be accumulated into
|
||
// Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
|
||
//
|
||
// ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
|
||
// 0 <= ilo <= ihi < n, if n > 0,
|
||
// ilo == 0 and ihi == -1, if n == 0,
|
||
// and the block must be isolated, that is,
|
||
// ilo == 0 or H[ilo,ilo-1] == 0,
|
||
// ihi == n-1 or H[ihi+1,ihi] == 0,
|
||
// otherwise Dlaqr04 will panic.
|
||
//
|
||
// wr and wi must have length ihi+1.
|
||
//
|
||
// iloz and ihiz specify the rows of Z to which transformations will be applied
|
||
// if wantz is true. It must hold that
|
||
// 0 <= iloz <= ilo, and ihi <= ihiz < n,
|
||
// otherwise Dlaqr04 will panic.
|
||
//
|
||
// work must have length at least lwork and lwork must be
|
||
// lwork >= 1, if n <= 11,
|
||
// lwork >= n, if n > 11,
|
||
// otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
|
||
// optimal performance. On return, work[0] will contain the optimal value of
|
||
// lwork.
|
||
//
|
||
// If lwork is -1, instead of performing Dlaqr04, the function only estimates the
|
||
// optimal workspace size and stores it into work[0]. Neither h nor z are
|
||
// accessed.
|
||
//
|
||
// recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
|
||
// as DLAQR0, for recur == 0 it behaves as DLAQR4.
|
||
//
|
||
// unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
|
||
//
|
||
// If unconverged is zero and wantt is true, H will contain on return the upper
|
||
// quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
|
||
// (corresponding to complex conjugate pairs of eigenvalues) will be returned in
|
||
// standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
|
||
//
|
||
// If unconverged is zero and if wantt is false, the contents of h on return is
|
||
// unspecified.
|
||
//
|
||
// If unconverged is zero, all the eigenvalues have been computed and their real
|
||
// and imaginary parts will be stored on return in wr[ilo:ihi+1] and
|
||
// wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
|
||
// conjugate pair, they are stored in consecutive elements of wr and wi, say the
|
||
// i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
|
||
// eigenvalues are stored in the same order as on the diagonal of the Schur form
|
||
// returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
|
||
// block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
|
||
//
|
||
// If unconverged is positive, some eigenvalues have not converged, and
|
||
// wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
|
||
// eigenvalues which have been successfully computed. Failures are rare.
|
||
//
|
||
// If unconverged is positive and wantt is true, then on return
|
||
// (initial H)*U = U*(final H), (*)
|
||
// where U is an orthogonal matrix. The final H is upper Hessenberg and
|
||
// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
|
||
//
|
||
// If unconverged is positive and wantt is false, on return the remaining
|
||
// unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
|
||
// H[ilo:unconverged,ilo:unconverged].
|
||
//
|
||
// If unconverged is positive and wantz is true, then on return
|
||
// (final Z) = (initial Z)*U,
|
||
// where U is the orthogonal matrix in (*) regardless of the value of wantt.
|
||
//
|
||
// References:
|
||
// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
|
||
// Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
|
||
// Anal. Appl. 23(4) (2002), pp. 929—947
|
||
// URL: http://dx.doi.org/10.1137/S0895479801384573
|
||
// [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
|
||
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
|
||
// URL: http://dx.doi.org/10.1137/S0895479801384585
|
||
//
|
||
// Dlaqr04 is an internal routine. It is exported for testing purposes.
|
||
func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
|
||
const (
|
||
// Matrices of order ntiny or smaller must be processed by
|
||
// Dlahqr because of insufficient subdiagonal scratch space.
|
||
// This is a hard limit.
|
||
ntiny = 11
|
||
// Exceptional deflation windows: try to cure rare slow
|
||
// convergence by varying the size of the deflation window after
|
||
// kexnw iterations.
|
||
kexnw = 5
|
||
// Exceptional shifts: try to cure rare slow convergence with
|
||
// ad-hoc exceptional shifts every kexsh iterations.
|
||
kexsh = 6
|
||
|
||
// See https://github.com/gonum/lapack/pull/151#discussion_r68162802
|
||
// and the surrounding discussion for an explanation where these
|
||
// constants come from.
|
||
// TODO(vladimir-ch): Similar constants for exceptional shifts
|
||
// are used also in dlahqr.go. The first constant is different
|
||
// there, it is equal to 3. Why? And does it matter?
|
||
wilk1 = 0.75
|
||
wilk2 = -0.4375
|
||
)
|
||
|
||
switch {
|
||
case ilo < 0 || max(0, n-1) < ilo:
|
||
panic(badIlo)
|
||
case ihi < min(ilo, n-1) || n <= ihi:
|
||
panic(badIhi)
|
||
case lwork < 1 && n <= ntiny && lwork != -1:
|
||
panic(badWork)
|
||
// TODO(vladimir-ch): Enable if and when we figure out what the minimum
|
||
// necessary lwork value is. Dlaqr04 says that the minimum is n which
|
||
// clashes with Dlaqr23's opinion about optimal work when nw <= 2
|
||
// (independent of n).
|
||
// case lwork < n && n > ntiny && lwork != -1:
|
||
// panic(badWork)
|
||
case len(work) < lwork:
|
||
panic(shortWork)
|
||
case recur < 0:
|
||
panic("lapack: recur is negative")
|
||
}
|
||
if wantz {
|
||
if iloz < 0 || ilo < iloz {
|
||
panic("lapack: invalid value of iloz")
|
||
}
|
||
if ihiz < ihi || n <= ihiz {
|
||
panic("lapack: invalid value of ihiz")
|
||
}
|
||
}
|
||
if lwork != -1 {
|
||
checkMatrix(n, n, h, ldh)
|
||
if wantz {
|
||
checkMatrix(n, n, z, ldz)
|
||
}
|
||
switch {
|
||
case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
|
||
panic("lapack: block not isolated")
|
||
case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
|
||
panic("lapack: block not isolated")
|
||
case len(wr) != ihi+1:
|
||
panic("lapack: bad length of wr")
|
||
case len(wi) != ihi+1:
|
||
panic("lapack: bad length of wi")
|
||
}
|
||
}
|
||
|
||
// Quick return.
|
||
if n == 0 {
|
||
work[0] = 1
|
||
return 0
|
||
}
|
||
|
||
if n <= ntiny {
|
||
// Tiny matrices must use Dlahqr.
|
||
work[0] = 1
|
||
if lwork == -1 {
|
||
return 0
|
||
}
|
||
return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
|
||
}
|
||
|
||
// Use small bulge multi-shift QR with aggressive early deflation on
|
||
// larger-than-tiny matrices.
|
||
var jbcmpz string
|
||
if wantt {
|
||
jbcmpz = "S"
|
||
} else {
|
||
jbcmpz = "E"
|
||
}
|
||
if wantz {
|
||
jbcmpz += "V"
|
||
} else {
|
||
jbcmpz += "N"
|
||
}
|
||
|
||
var fname string
|
||
if recur > 0 {
|
||
fname = "DLAQR0"
|
||
} else {
|
||
fname = "DLAQR4"
|
||
}
|
||
// nwr is the recommended deflation window size. n is greater than 11,
|
||
// so there is enough subdiagonal workspace for nwr >= 2 as required.
|
||
// (In fact, there is enough subdiagonal space for nwr >= 3.)
|
||
// TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
|
||
// use it?
|
||
nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
|
||
nwr = max(2, nwr)
|
||
nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
|
||
|
||
// nsr is the recommended number of simultaneous shifts. n is greater
|
||
// than 11, so there is enough subdiagonal workspace for nsr to be even
|
||
// and greater than or equal to two as required.
|
||
nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
|
||
nsr = min(nsr, min((n+6)/9, ihi-ilo))
|
||
nsr = max(2, nsr&^1)
|
||
|
||
// Workspace query call to Dlaqr23.
|
||
impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
|
||
nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur)
|
||
// Optimal workspace is max(Dlaqr5, Dlaqr23).
|
||
lwkopt := max(3*nsr/2, int(work[0]))
|
||
// Quick return in case of workspace query.
|
||
if lwork == -1 {
|
||
work[0] = float64(lwkopt)
|
||
return 0
|
||
}
|
||
|
||
// Dlahqr/Dlaqr04 crossover point.
|
||
nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
|
||
nmin = max(ntiny, nmin)
|
||
|
||
// Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
|
||
nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
|
||
nibble = max(0, nibble)
|
||
|
||
// Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
|
||
kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
|
||
kacc22 = max(0, min(kacc22, 2))
|
||
|
||
// nwmax is the largest possible deflation window for which there is
|
||
// sufficient workspace.
|
||
nwmax := min((n-1)/3, lwork/2)
|
||
nw := nwmax // Start with maximum deflation window size.
|
||
|
||
// nsmax is the largest number of simultaneous shifts for which there is
|
||
// sufficient workspace.
|
||
nsmax := min((n+6)/9, 2*lwork/3) &^ 1
|
||
|
||
ndfl := 1 // Number of iterations since last deflation.
|
||
ndec := 0 // Deflation window size decrement.
|
||
|
||
// Main loop.
|
||
var (
|
||
itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
|
||
it = 0
|
||
)
|
||
for kbot := ihi; kbot >= ilo; {
|
||
if it == itmax {
|
||
unconverged = kbot + 1
|
||
break
|
||
}
|
||
it++
|
||
|
||
// Locate active block.
|
||
ktop := ilo
|
||
for k := kbot; k >= ilo+1; k-- {
|
||
if h[k*ldh+k-1] == 0 {
|
||
ktop = k
|
||
break
|
||
}
|
||
}
|
||
|
||
// Select deflation window size nw.
|
||
//
|
||
// Typical Case:
|
||
// If possible and advisable, nibble the entire active block.
|
||
// If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
|
||
// depending upon which has the smaller corresponding
|
||
// subdiagonal entry (a heuristic).
|
||
//
|
||
// Exceptional Case:
|
||
// If there have been no deflations in kexnw or more
|
||
// iterations, then vary the deflation window size. At first,
|
||
// because larger windows are, in general, more powerful than
|
||
// smaller ones, rapidly increase the window to the maximum
|
||
// possible. Then, gradually reduce the window size.
|
||
nh := kbot - ktop + 1
|
||
nwupbd := min(nh, nwmax)
|
||
if ndfl < kexnw {
|
||
nw = min(nwupbd, nwr)
|
||
} else {
|
||
nw = min(nwupbd, 2*nw)
|
||
}
|
||
if nw < nwmax {
|
||
if nw >= nh-1 {
|
||
nw = nh
|
||
} else {
|
||
kwtop := kbot - nw + 1
|
||
if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
|
||
nw++
|
||
}
|
||
}
|
||
}
|
||
if ndfl < kexnw {
|
||
ndec = -1
|
||
} else if ndec >= 0 || nw >= nwupbd {
|
||
ndec++
|
||
if nw-ndec < 2 {
|
||
ndec = 0
|
||
}
|
||
nw -= ndec
|
||
}
|
||
|
||
// Split workspace under the subdiagonal of H into:
|
||
// - an nw×nw work array V in the lower left-hand corner,
|
||
// - an nw×nhv horizontal work array along the bottom edge (nhv
|
||
// must be at least nw but more is better),
|
||
// - an nve×nw vertical work array along the left-hand-edge
|
||
// (nhv can be any positive integer but more is better).
|
||
kv := n - nw
|
||
kt := nw
|
||
kwv := nw + 1
|
||
nhv := n - kwv - kt
|
||
// Aggressive early deflation.
|
||
ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
|
||
h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
|
||
h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
|
||
|
||
// Adjust kbot accounting for new deflations.
|
||
kbot -= ld
|
||
// ks points to the shifts.
|
||
ks := kbot - ls + 1
|
||
|
||
// Skip an expensive QR sweep if there is a (partly heuristic)
|
||
// reason to expect that many eigenvalues will deflate without
|
||
// it. Here, the QR sweep is skipped if many eigenvalues have
|
||
// just been deflated or if the remaining active block is small.
|
||
if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
|
||
// ld is positive, note progress.
|
||
ndfl = 1
|
||
continue
|
||
}
|
||
|
||
// ns is the nominal number of simultaneous shifts. This may be
|
||
// lowered (slightly) if Dlaqr23 did not provide that many
|
||
// shifts.
|
||
ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
|
||
|
||
// If there have been no deflations in a multiple of kexsh
|
||
// iterations, then try exceptional shifts. Otherwise use shifts
|
||
// provided by Dlaqr23 above or from the eigenvalues of a
|
||
// trailing principal submatrix.
|
||
if ndfl%kexsh == 0 {
|
||
ks = kbot - ns + 1
|
||
for i := kbot; i > max(ks, ktop+1); i -= 2 {
|
||
ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
|
||
aa := wilk1*ss + h[i*ldh+i]
|
||
_, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
|
||
impl.Dlanv2(aa, ss, wilk2*ss, aa)
|
||
}
|
||
if ks == ktop {
|
||
wr[ks+1] = h[(ks+1)*ldh+ks+1]
|
||
wi[ks+1] = 0
|
||
wr[ks] = wr[ks+1]
|
||
wi[ks] = wi[ks+1]
|
||
}
|
||
} else {
|
||
// If we got ns/2 or fewer shifts, use Dlahqr or recur
|
||
// into Dlaqr04 on a trailing principal submatrix to get
|
||
// more. Since ns <= nsmax <=(n+6)/9, there is enough
|
||
// space below the subdiagonal to fit an ns×ns scratch
|
||
// array.
|
||
if kbot-ks+1 <= ns/2 {
|
||
ks = kbot - ns + 1
|
||
kt = n - ns
|
||
impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
|
||
if ns > nmin && recur > 0 {
|
||
ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
|
||
wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
|
||
} else {
|
||
ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
|
||
wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0)
|
||
}
|
||
// In case of a rare QR failure use eigenvalues
|
||
// of the trailing 2×2 principal submatrix.
|
||
if ks >= kbot {
|
||
aa := h[(kbot-1)*ldh+kbot-1]
|
||
bb := h[(kbot-1)*ldh+kbot]
|
||
cc := h[kbot*ldh+kbot-1]
|
||
dd := h[kbot*ldh+kbot]
|
||
_, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
|
||
impl.Dlanv2(aa, bb, cc, dd)
|
||
ks = kbot - 1
|
||
}
|
||
}
|
||
|
||
if kbot-ks+1 > ns {
|
||
// Sorting the shifts helps a little. Bubble
|
||
// sort keeps complex conjugate pairs together.
|
||
sorted := false
|
||
for k := kbot; k > ks; k-- {
|
||
if sorted {
|
||
break
|
||
}
|
||
sorted = true
|
||
for i := ks; i < k; i++ {
|
||
if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
|
||
continue
|
||
}
|
||
sorted = false
|
||
wr[i], wr[i+1] = wr[i+1], wr[i]
|
||
wi[i], wi[i+1] = wi[i+1], wi[i]
|
||
}
|
||
}
|
||
}
|
||
|
||
// Shuffle shifts into pairs of real shifts and pairs of
|
||
// complex conjugate shifts using the fact that complex
|
||
// conjugate shifts are already adjacent to one another.
|
||
// TODO(vladimir-ch): The shuffling here could probably
|
||
// be removed but I'm not sure right now and it's safer
|
||
// to leave it.
|
||
for i := kbot; i > ks+1; i -= 2 {
|
||
if wi[i] == -wi[i-1] {
|
||
continue
|
||
}
|
||
wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
|
||
wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
|
||
}
|
||
}
|
||
|
||
// If there are only two shifts and both are real, then use only one.
|
||
if kbot-ks+1 == 2 && wi[kbot] == 0 {
|
||
if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
|
||
wr[kbot-1] = wr[kbot]
|
||
} else {
|
||
wr[kbot] = wr[kbot-1]
|
||
}
|
||
}
|
||
|
||
// Use up to ns of the the smallest magnitude shifts. If there
|
||
// aren't ns shifts available, then use them all, possibly
|
||
// dropping one to make the number of shifts even.
|
||
ns = min(ns, kbot-ks+1) &^ 1
|
||
ks = kbot - ns + 1
|
||
|
||
// Split workspace under the subdiagonal into:
|
||
// - a kdu×kdu work array U in the lower left-hand-corner,
|
||
// - a kdu×nhv horizontal work array WH along the bottom edge
|
||
// (nhv must be at least kdu but more is better),
|
||
// - an nhv×kdu vertical work array WV along the left-hand-edge
|
||
// (nhv must be at least kdu but more is better).
|
||
kdu := 3*ns - 3
|
||
ku := n - kdu
|
||
kwh := kdu
|
||
kwv = kdu + 3
|
||
nhv = n - kwv - kdu
|
||
// Small-bulge multi-shift QR sweep.
|
||
impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
|
||
wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
|
||
work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
|
||
|
||
// Note progress (or the lack of it).
|
||
if ld > 0 {
|
||
ndfl = 1
|
||
} else {
|
||
ndfl++
|
||
}
|
||
}
|
||
|
||
work[0] = float64(lwkopt)
|
||
return unconverged
|
||
}
|