Files
gonum/native/dlaqr2.go
Vladimir Chalupecky b125291e21 native: check input slices to Dlaqr2, Dgehrd and Dormhr only after workspace query
so that it is possible to pass all slices as nil to those functions. A workspace
query should depend only on the input sizes, the matrices and vectors must
not be accessed anyway or may not be available at the call site. Moreover, often
the reference LAPACK uses arbitrary placeholders for arrays and relies on the
fact that no checking is done.
2016-08-20 11:05:19 +09:00

383 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"math"
"github.com/gonum/blas"
"github.com/gonum/blas/blas64"
"github.com/gonum/lapack"
)
// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
// trailing principal submatrix using aggressive early deflation [1].
//
// On return, H will be overwritten by a new Hessenberg matrix that is a
// perturbation of an orthogonal similarity transformation of H. It is hoped
// that on output H will have many zero subdiagonal entries.
//
// If wantt is true, the matrix H will be fully updated so that the
// quasi-triangular Schur factor can be computed. If wantt is false, then only
// enough of H will be updated to preserve the eigenvalues.
//
// If wantz is true, the orthogonal similarity transformation will be
// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
//
// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
// of H. It must hold that
// 0 <= ilo <= ihi < n, if n > 0,
// ilo == 0 and ihi == -1, if n == 0,
// and the block must be isolated, that is, it must hold that
// ktop == 0 or H[ktop,ktop-1] == 0,
// kbot == n-1 or H[kbot+1,kbot] == 0,
// otherwise Dlaqr2 will panic.
//
// nw is the deflation window size. It must hold that
// 0 <= nw <= kbot-ktop+1,
// otherwise Dlaqr2 will panic.
//
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
// will be applied if wantz is true. It must hold that
// 0 <= iloz <= ktop, and kbot <= ihiz < n,
// otherwise Dlaqr2 will panic.
//
// sr and si must have length kbot+1, otherwise Dlaqr2 will panic.
//
// v and ldv represent an nw×nw work matrix.
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
// wv and ldwv represent an nv×nw work matrix.
//
// work must have length at least lwork and lwork must be at least max(1,2*nw),
// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater
// efficiency. On return, work[0] will contain the optimal value of lwork.
//
// If lwork is -1, instead of performing Dlaqr2, the function only estimates the
// optimal workspace size and stores it into work[0]. Neither h nor z are
// accessed.
//
// On return, ns and nd will contain respectively the number of unconverged
// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
// sr and si.
//
// On return, the real and imaginary parts of approximate eigenvalues that may
// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
// and si[kbot-nd-ns+1:kbot-nd+1].
//
// On return, the real and imaginary parts of converged eigenvalues will be
// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
//
// References:
// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
// URL: http://dx.doi.org/10.1137/S0895479801384585
//
func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) {
switch {
case ktop < 0 || max(0, n-1) < ktop:
panic("lapack: invalid value of ktop")
case kbot < min(ktop, n-1) || n <= kbot:
panic("lapack: invalid value of kbot")
case nw < 0 || kbot-ktop+1 < nw:
panic("lapack: invalid value of nw")
case nh < nw:
panic("lapack: invalid value of nh")
case lwork < max(1, 2*nw) && lwork != -1:
panic(badWork)
case len(work) < lwork:
panic(shortWork)
}
if wantz {
switch {
case iloz < 0 || ktop < iloz:
panic("lapack: invalid value of iloz")
case ihiz < kbot || n <= ihiz:
panic("lapack: invalid value of ihiz")
}
}
// LAPACK code does not enforce the documented behavior
// nw <= kbot-ktop+1
// but we do (we panic above).
jw := nw
lwkopt := max(1, 2*nw)
if jw > 2 {
// Workspace query call to Dgehrd.
impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1)
lwk1 := int(work[0])
// Workspace query call to Dormhr.
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
lwk2 := int(work[0])
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
}
// Quick return in case of workspace query.
if lwork == -1 {
work[0] = float64(lwkopt)
return 0, 0
}
// Check input slices only after workspace query has been handled.
checkMatrix(n, n, h, ldh)
checkMatrix(nw, nw, v, ldv)
checkMatrix(nw, nh, t, ldt)
checkMatrix(nv, nw, wv, ldwv)
if wantz {
checkMatrix(n, n, z, ldz)
}
switch {
case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
panic("lapack: block not isolated")
case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
panic("lapack: block not isolated")
case len(sr) != kbot+1:
panic("lapack: bad length of sr")
case len(si) != kbot+1:
panic("lapack: bad length of si")
}
if nw == 0 {
return 0, 0
}
// Machine constants.
ulp := dlamchP
smlnum := float64(n) / ulp * dlamchS
// Setup deflation window.
var s float64
kwtop := kbot - jw + 1
if kwtop != ktop {
s = h[kwtop*ldh+kwtop-1]
}
if kwtop == kbot {
// 1×1 deflation window.
sr[kwtop] = h[kwtop*ldh+kwtop]
si[kwtop] = 0
ns = 1
nd = 0
if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
ns = 0
nd = 1
if kwtop > ktop {
h[kwtop*ldh+kwtop-1] = 0
}
}
work[0] = 1
return ns, nd
}
// Convert to spike-triangular form. In case of a rare QR failure, this
// routine continues to do aggressive early deflation using that part of
// the deflation window that converged using infqr here and there to
// keep track.
impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
bi := blas64.Implementation()
bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
// Note that ilo == 0 which conveniently coincides with the success
// value of infqr, that is, infqr as an index always points to the first
// converged eigenvalue.
// Dtrexc needs a clean margin near the diagonal.
for j := 0; j < jw-3; j++ {
t[(j+2)*ldt+j] = 0
t[(j+3)*ldt+j] = 0
}
if jw >= 3 {
t[(jw-1)*ldt+jw-3] = 0
}
ns = jw
ilst := infqr
// Deflation detection loop.
for ilst < ns {
bulge := false
if ns >= 2 {
bulge = t[(ns-1)*ldt+ns-2] != 0
}
if !bulge {
// Real eigenvalue.
abst := math.Abs(t[(ns-1)*ldt+ns-1])
if abst == 0 {
abst = math.Abs(s)
}
if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
// Deflatable.
ns--
} else {
// Undeflatable, move it up out of the way.
// Dtrexc can not fail in this case.
_, ilst, _ = impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, ns-1, ilst, work)
ilst++
}
continue
}
// Complex conjugate pair.
abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
if abst == 0 {
abst = math.Abs(s)
}
if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
// Deflatable.
ns -= 2
} else {
// Undeflatable, move them up out of the way.
// Dtrexc does the right thing with ilst in case of a
// rare exchange failure.
_, ilst, _ = impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, ns-1, ilst, work)
ilst += 2
}
}
// Return to Hessenberg form.
if ns == 0 {
s = 0
}
if ns < jw {
// Sorting diagonal blocks of T improves accuracy for graded
// matrices. Bubble sort deals well with exchange failures.
sorted := false
i := ns
for !sorted {
sorted = true
kend := i - 1
i = infqr
var k int
if i == ns-1 || t[(i+1)*ldt+i] == 0 {
k = i + 1
} else {
k = i + 2
}
for k <= kend {
var evi float64
if k == i+1 {
evi = math.Abs(t[i*ldt+i])
} else {
evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
}
var evk float64
if k == kend || t[(k+1)*ldt+k] == 0 {
evk = math.Abs(t[k*ldt+k])
} else {
evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
}
if evi >= evk {
i = k
} else {
sorted = false
_, ilst, ok := impl.Dtrexc(lapack.EigDecomp, jw, t, ldt, v, ldv, i, k, work)
if ok {
i = ilst
} else {
i = k
}
}
if i == kend || t[(i+1)*ldt+i] == 0 {
k = i + 1
} else {
k = i + 2
}
}
}
}
// Restore shift/eigenvalue array from T.
for i := jw - 1; i >= infqr; {
if i == infqr || t[i*ldt+i-1] == 0 {
sr[kwtop+i] = t[i*ldt+i]
si[kwtop+i] = 0
i--
continue
}
aa := t[(i-1)*ldt+i-1]
bb := t[(i-1)*ldt+i]
cc := t[i*ldt+i-1]
dd := t[i*ldt+i]
_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
i -= 2
}
if ns < jw || s == 0 {
if ns > 1 && s != 0 {
// Reflect spike back into lower triangle.
bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
work[0] = 1
impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
}
// Copy updated reduced window into place.
if kwtop > 0 {
h[kwtop*ldh+kwtop-1] = s * v[0]
}
impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
// Accumulate orthogonal matrix in order to update H and Z, if
// requested.
if ns > 1 && s != 0 {
// work[:ns-1] contains the elementary reflectors stored
// by a call to Dgehrd above.
impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
}
// Update vertical slab in H.
var ltop int
if !wantt {
ltop = ktop
}
for krow := ltop; krow < kwtop; krow += nv {
kln := min(nv, kwtop-krow)
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
1, h[krow*ldh+kwtop:], ldh, v, ldv,
0, wv, ldwv)
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
}
// Update horizontal slab in H.
if wantt {
for kcol := kbot + 1; kcol < n; kcol += nh {
kln := min(nh, n-kcol)
bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
1, v, ldv, h[kwtop*ldh+kcol:], ldh,
0, t, ldt)
impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
}
}
// Update vertical slab in Z.
if wantz {
for krow := iloz; krow <= ihiz; krow += nv {
kln := min(nv, ihiz-krow+1)
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
1, z[krow*ldz+kwtop:], ldz, v, ldv,
0, wv, ldwv)
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
}
}
}
// The number of deflations.
nd = jw - ns
// Shifts are converged eigenvalues that could not be deflated.
// Subtracting infqr from the spike length takes care of the case of a
// rare QR failure while calculating eigenvalues of the deflation
// window.
ns -= infqr
work[0] = float64(lwkopt)
return ns, nd
}