Merge branch 'add-dlaqr23'

This commit is contained in:
Vladimir Chalupecky
2016-08-29 20:23:30 +09:00
4 changed files with 79 additions and 50 deletions

View File

@@ -12,7 +12,7 @@ import (
"github.com/gonum/lapack"
)
// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper
// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
// trailing principal submatrix using aggressive early deflation [1].
//
@@ -34,28 +34,31 @@ import (
// and the block must be isolated, that is, it must hold that
// ktop == 0 or H[ktop,ktop-1] == 0,
// kbot == n-1 or H[kbot+1,kbot] == 0,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// nw is the deflation window size. It must hold that
// 0 <= nw <= kbot-ktop+1,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
// will be applied if wantz is true. It must hold that
// 0 <= iloz <= ktop, and kbot <= ihiz < n,
// otherwise Dlaqr2 will panic.
// otherwise Dlaqr23 will panic.
//
// sr and si must have length kbot+1, otherwise Dlaqr2 will panic.
// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
//
// v and ldv represent an nw×nw work matrix.
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
// wv and ldwv represent an nv×nw work matrix.
//
// work must have length at least lwork and lwork must be at least max(1,2*nw),
// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater
// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
// efficiency. On return, work[0] will contain the optimal value of lwork.
//
// If lwork is -1, instead of performing Dlaqr2, the function only estimates the
// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
// as DLAQR3, for recur == 0 it behaves as DLAQR2.
//
// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
// optimal workspace size and stores it into work[0]. Neither h nor z are
// accessed.
//
@@ -75,7 +78,7 @@ import (
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
// URL: http://dx.doi.org/10.1137/S0895479801384585
//
func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) {
func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
switch {
case ktop < 0 || max(0, n-1) < ktop:
panic("lapack: invalid value of ktop")
@@ -98,6 +101,9 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
panic("lapack: invalid value of ihiz")
}
}
if recur < 0 {
panic("lapack: invalid value of recur")
}
// LAPACK code does not enforce the documented behavior
// nw <= kbot-ktop+1
@@ -106,13 +112,21 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
lwkopt := max(1, 2*nw)
if jw > 2 {
// Workspace query call to Dgehrd.
impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1)
impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
lwk1 := int(work[0])
// Workspace query call to Dormhr.
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
lwk2 := int(work[0])
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
if recur > 0 {
// Workspace query call to Dlaqr4.
impl.Dlaqr4(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1)
lwk3 := int(work[0])
// Optimal workspace.
lwkopt = max(jw+max(lwk1, lwk2), lwk3)
} else {
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
}
}
// Quick return in case of workspace query.
@@ -179,7 +193,13 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []
bi := blas64.Implementation()
bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
var infqr int
if recur > 0 && jw > nmin {
infqr = impl.Dlaqr4(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork)
} else {
infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
}
// Note that ilo == 0 which conveniently coincides with the success
// value of infqr, that is, infqr as an index always points to the first
// converged eigenvalue.

View File

@@ -128,7 +128,7 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
panic(badWork)
// TODO(vladimir-ch): Enable if and when we figure out what the minimum
// necessary lwork value is. Dlaqr4 says that the minimum is n which
// clashes with Dlaqr2's opinion about optimal work when nw <= 2
// clashes with Dlaqr23's opinion about optimal work when nw <= 2
// (independent of n).
// case lwork < n && n > ntiny && lwork != -1:
// panic(badWork)
@@ -205,10 +205,10 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
nsr = min(nsr, min((n+6)/9, ihi-ilo))
nsr = max(2, nsr&^1)
// Workspace query call to Dlaqr2.
impl.Dlaqr2(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1)
// Optimal workspace is max(Dlaqr5, Dlaqr2).
// Workspace query call to Dlaqr23.
impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, 0)
// Optimal workspace is max(Dlaqr5, Dlaqr23).
lwkopt := max(3*nsr/2, int(work[0]))
// Quick return in case of workspace query.
if lwork == -1 {
@@ -313,9 +313,9 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
kwv := nw + 1
nhv := n - kwv - kt
// Aggressive early deflation.
ls, ld := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw,
ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork)
h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, 0)
// Adjust kbot accounting for new deflations.
kbot -= ld
@@ -333,13 +333,13 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6
}
// ns is the nominal number of simultaneous shifts. This may be
// lowered (slightly) if Dlaqr2 did not provide that many
// lowered (slightly) if Dlaqr23 did not provide that many
// shifts.
ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
// If there have been no deflations in a multiple of kexsh
// iterations, then try exceptional shifts. Otherwise use shifts
// provided by Dlaqr2 above or from the eigenvalues of a
// provided by Dlaqr23 above or from the eigenvalues of a
// trailing principal submatrix.
if ndfl%kexsh == 0 {
ks = kbot - ns + 1

View File

@@ -144,8 +144,8 @@ func TestDlaqr1(t *testing.T) {
testlapack.Dlaqr1Test(t, impl)
}
func TestDlaqr2(t *testing.T) {
testlapack.Dlaqr2Test(t, impl)
func TestDlaqr23(t *testing.T) {
testlapack.Dlaqr23Test(t, impl)
}
func TestDlaqr4(t *testing.T) {

View File

@@ -13,11 +13,11 @@ import (
"github.com/gonum/blas/blas64"
)
type Dlaqr2er interface {
Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int)
type Dlaqr23er interface {
Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int)
}
type dlaqr2Test struct {
type dlaqr23Test struct {
wantt, wantz bool
ktop, kbot int
nw int
@@ -27,20 +27,23 @@ type dlaqr2Test struct {
evWant []complex128 // Optional slice with known eigenvalues.
}
func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
func Dlaqr23Test(t *testing.T, impl Dlaqr23er) {
rnd := rand.New(rand.NewSource(1))
for _, wantt := range []bool{true, false} {
for _, wantz := range []bool{true, false} {
for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 53} {
for _, extra := range []int{0, 1, 11} {
for cas := 0; cas < 100; cas++ {
ktop := rnd.Intn(n)
kbot := rnd.Intn(n)
if ktop > kbot {
ktop, kbot = kbot, ktop
for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 100} {
for _, extra := range []int{0, 11} {
for cas := 0; cas < 30; cas++ {
var nw int
if nw <= 75 {
nw = rnd.Intn(n) + 1
} else {
nw = 76 + rnd.Intn(n-75)
}
nw := rnd.Intn(kbot-ktop+1) + 1
ktop := rnd.Intn(n - nw + 1)
kbot := ktop + nw - 1
kbot += rnd.Intn(n - kbot)
h := randomHessenberg(n, n+extra, rnd)
if ktop-1 >= 0 {
h.Data[ktop*h.Stride+ktop-1] = 0
@@ -50,7 +53,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
}
iloz := rnd.Intn(ktop + 1)
ihiz := kbot + rnd.Intn(n-kbot)
test := dlaqr2Test{
test := dlaqr23Test{
wantt: wantt,
wantz: wantz,
ktop: ktop,
@@ -60,8 +63,10 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
iloz: iloz,
ihiz: ihiz,
}
testDlaqr2(t, impl, test, false, rnd)
testDlaqr2(t, impl, test, true, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
}
}
}
@@ -72,7 +77,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
for _, wantt := range []bool{true, false} {
for _, wantz := range []bool{true, false} {
for _, extra := range []int{0, 1, 11} {
test := dlaqr2Test{
test := dlaqr23Test{
wantt: wantt,
wantz: wantz,
h: randomHessenberg(0, extra, rnd),
@@ -82,14 +87,16 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
ihiz: -1,
nw: 0,
}
testDlaqr2(t, impl, test, true, rnd)
testDlaqr2(t, impl, test, false, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
}
}
}
// Tests with explicit eigenvalues computed by Octave.
for _, test := range []dlaqr2Test{
for _, test := range []dlaqr23Test{
{
h: blas64.General{
Rows: 1,
@@ -201,12 +208,14 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) {
test.wantt = true
test.wantz = true
test.nw = test.kbot - test.ktop + 1
testDlaqr2(t, impl, test, true, rnd)
testDlaqr2(t, impl, test, false, rnd)
testDlaqr23(t, impl, test, true, 1, rnd)
testDlaqr23(t, impl, test, false, 1, rnd)
testDlaqr23(t, impl, test, true, 0, rnd)
testDlaqr23(t, impl, test, false, 0, rnd)
}
}
func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *rand.Rand) {
func testDlaqr23(t *testing.T, impl Dlaqr23er, test dlaqr23Test, opt bool, recur int, rnd *rand.Rand) {
const tol = 1e-14
h := cloneGeneral(test.h)
@@ -244,15 +253,15 @@ func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *ran
var work []float64
if opt {
work = nanSlice(1)
impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1)
impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride,
nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1, recur)
work = nanSlice(int(work[0]))
} else {
work = nanSlice(max(1, 2*nw))
}
ns, nd := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work))
ns, nd := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride,
sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work), recur)
prefix := fmt.Sprintf("Case wantt=%v, wantz=%v, n=%v, ktop=%v, kbot=%v, nw=%v, iloz=%v, ihiz=%v, extra=%v",
wantt, wantz, n, ktop, kbot, nw, iloz, ihiz, extra)