diff --git a/native/dlaqr2.go b/native/dlaqr23.go similarity index 86% rename from native/dlaqr2.go rename to native/dlaqr23.go index e14fa298..c3ef6741 100644 --- a/native/dlaqr2.go +++ b/native/dlaqr23.go @@ -12,7 +12,7 @@ import ( "github.com/gonum/lapack" ) -// Dlaqr2 performs the orthogonal similarity transformation of an n×n upper +// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper // Hessenberg matrix to detect and deflate fully converged eigenvalues from a // trailing principal submatrix using aggressive early deflation [1]. // @@ -34,28 +34,31 @@ import ( // and the block must be isolated, that is, it must hold that // ktop == 0 or H[ktop,ktop-1] == 0, // kbot == n-1 or H[kbot+1,kbot] == 0, -// otherwise Dlaqr2 will panic. +// otherwise Dlaqr23 will panic. // // nw is the deflation window size. It must hold that // 0 <= nw <= kbot-ktop+1, -// otherwise Dlaqr2 will panic. +// otherwise Dlaqr23 will panic. // // iloz and ihiz specify the rows of the n×n matrix Z to which transformations // will be applied if wantz is true. It must hold that // 0 <= iloz <= ktop, and kbot <= ihiz < n, -// otherwise Dlaqr2 will panic. +// otherwise Dlaqr23 will panic. // -// sr and si must have length kbot+1, otherwise Dlaqr2 will panic. +// sr and si must have length kbot+1, otherwise Dlaqr23 will panic. // // v and ldv represent an nw×nw work matrix. // t and ldt represent an nw×nh work matrix, and nh must be at least nw. // wv and ldwv represent an nv×nw work matrix. // // work must have length at least lwork and lwork must be at least max(1,2*nw), -// otherwise Dlaqr2 will panic. Larger values of lwork may result in greater +// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater // efficiency. On return, work[0] will contain the optimal value of lwork. // -// If lwork is -1, instead of performing Dlaqr2, the function only estimates the +// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves +// as DLAQR3, for recur == 0 it behaves as DLAQR2. +// +// If lwork is -1, instead of performing Dlaqr23, the function only estimates the // optimal workspace size and stores it into work[0]. Neither h nor z are // accessed. // @@ -75,7 +78,7 @@ import ( // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 // URL: http://dx.doi.org/10.1137/S0895479801384585 // -func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) { +func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { switch { case ktop < 0 || max(0, n-1) < ktop: panic("lapack: invalid value of ktop") @@ -98,6 +101,9 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h [] panic("lapack: invalid value of ihiz") } } + if recur < 0 { + panic("lapack: invalid value of recur") + } // LAPACK code does not enforce the documented behavior // nw <= kbot-ktop+1 @@ -106,13 +112,21 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h [] lwkopt := max(1, 2*nw) if jw > 2 { // Workspace query call to Dgehrd. - impl.Dgehrd(jw, 0, jw-2, t, ldt, nil, work, -1) + impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1) lwk1 := int(work[0]) // Workspace query call to Dormhr. - impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1) + impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1) lwk2 := int(work[0]) - // Optimal workspace. - lwkopt = jw + max(lwk1, lwk2) + if recur > 0 { + // Workspace query call to Dlaqr4. + impl.Dlaqr4(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1) + lwk3 := int(work[0]) + // Optimal workspace. + lwkopt = max(jw+max(lwk1, lwk2), lwk3) + } else { + // Optimal workspace. + lwkopt = jw + max(lwk1, lwk2) + } } // Quick return in case of workspace query. @@ -179,7 +193,13 @@ func (impl Implementation) Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h [] bi := blas64.Implementation() bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) - infqr := impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) + nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) + var infqr int + if recur > 0 && jw > nmin { + infqr = impl.Dlaqr4(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork) + } else { + infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) + } // Note that ilo == 0 which conveniently coincides with the success // value of infqr, that is, infqr as an index always points to the first // converged eigenvalue. diff --git a/native/dlaqr4.go b/native/dlaqr4.go index 12832e65..69244d08 100644 --- a/native/dlaqr4.go +++ b/native/dlaqr4.go @@ -128,7 +128,7 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6 panic(badWork) // TODO(vladimir-ch): Enable if and when we figure out what the minimum // necessary lwork value is. Dlaqr4 says that the minimum is n which - // clashes with Dlaqr2's opinion about optimal work when nw <= 2 + // clashes with Dlaqr23's opinion about optimal work when nw <= 2 // (independent of n). // case lwork < n && n > ntiny && lwork != -1: // panic(badWork) @@ -205,10 +205,10 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6 nsr = min(nsr, min((n+6)/9, ihi-ilo)) nsr = max(2, nsr&^1) - // Workspace query call to Dlaqr2. - impl.Dlaqr2(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0, - nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1) - // Optimal workspace is max(Dlaqr5, Dlaqr2). + // Workspace query call to Dlaqr23. + impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0, + nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, 0) + // Optimal workspace is max(Dlaqr5, Dlaqr23). lwkopt := max(3*nsr/2, int(work[0])) // Quick return in case of workspace query. if lwork == -1 { @@ -313,9 +313,9 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6 kwv := nw + 1 nhv := n - kwv - kt // Aggressive early deflation. - ls, ld := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, + ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], - h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork) + h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, 0) // Adjust kbot accounting for new deflations. kbot -= ld @@ -333,13 +333,13 @@ func (impl Implementation) Dlaqr4(wantt, wantz bool, n, ilo, ihi int, h []float6 } // ns is the nominal number of simultaneous shifts. This may be - // lowered (slightly) if Dlaqr2 did not provide that many + // lowered (slightly) if Dlaqr23 did not provide that many // shifts. ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 // If there have been no deflations in a multiple of kexsh // iterations, then try exceptional shifts. Otherwise use shifts - // provided by Dlaqr2 above or from the eigenvalues of a + // provided by Dlaqr23 above or from the eigenvalues of a // trailing principal submatrix. if ndfl%kexsh == 0 { ks = kbot - ns + 1 diff --git a/native/lapack_test.go b/native/lapack_test.go index a86f59aa..f024c86d 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -144,8 +144,8 @@ func TestDlaqr1(t *testing.T) { testlapack.Dlaqr1Test(t, impl) } -func TestDlaqr2(t *testing.T) { - testlapack.Dlaqr2Test(t, impl) +func TestDlaqr23(t *testing.T) { + testlapack.Dlaqr23Test(t, impl) } func TestDlaqr4(t *testing.T) { diff --git a/testlapack/dlaqr2.go b/testlapack/dlaqr23.go similarity index 79% rename from testlapack/dlaqr2.go rename to testlapack/dlaqr23.go index 48c473a5..e285c3b9 100644 --- a/testlapack/dlaqr2.go +++ b/testlapack/dlaqr23.go @@ -13,11 +13,11 @@ import ( "github.com/gonum/blas/blas64" ) -type Dlaqr2er interface { - Dlaqr2(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int) (ns, nd int) +type Dlaqr23er interface { + Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) } -type dlaqr2Test struct { +type dlaqr23Test struct { wantt, wantz bool ktop, kbot int nw int @@ -27,20 +27,23 @@ type dlaqr2Test struct { evWant []complex128 // Optional slice with known eigenvalues. } -func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { +func Dlaqr23Test(t *testing.T, impl Dlaqr23er) { rnd := rand.New(rand.NewSource(1)) for _, wantt := range []bool{true, false} { for _, wantz := range []bool{true, false} { - for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 53} { - for _, extra := range []int{0, 1, 11} { - for cas := 0; cas < 100; cas++ { - ktop := rnd.Intn(n) - kbot := rnd.Intn(n) - if ktop > kbot { - ktop, kbot = kbot, ktop + for _, n := range []int{1, 2, 3, 4, 5, 6, 10, 18, 31, 100} { + for _, extra := range []int{0, 11} { + for cas := 0; cas < 30; cas++ { + var nw int + if nw <= 75 { + nw = rnd.Intn(n) + 1 + } else { + nw = 76 + rnd.Intn(n-75) } - nw := rnd.Intn(kbot-ktop+1) + 1 + ktop := rnd.Intn(n - nw + 1) + kbot := ktop + nw - 1 + kbot += rnd.Intn(n - kbot) h := randomHessenberg(n, n+extra, rnd) if ktop-1 >= 0 { h.Data[ktop*h.Stride+ktop-1] = 0 @@ -50,7 +53,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { } iloz := rnd.Intn(ktop + 1) ihiz := kbot + rnd.Intn(n-kbot) - test := dlaqr2Test{ + test := dlaqr23Test{ wantt: wantt, wantz: wantz, ktop: ktop, @@ -60,8 +63,10 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { iloz: iloz, ihiz: ihiz, } - testDlaqr2(t, impl, test, false, rnd) - testDlaqr2(t, impl, test, true, rnd) + testDlaqr23(t, impl, test, false, 1, rnd) + testDlaqr23(t, impl, test, true, 1, rnd) + testDlaqr23(t, impl, test, false, 0, rnd) + testDlaqr23(t, impl, test, true, 0, rnd) } } } @@ -72,7 +77,7 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { for _, wantt := range []bool{true, false} { for _, wantz := range []bool{true, false} { for _, extra := range []int{0, 1, 11} { - test := dlaqr2Test{ + test := dlaqr23Test{ wantt: wantt, wantz: wantz, h: randomHessenberg(0, extra, rnd), @@ -82,14 +87,16 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { ihiz: -1, nw: 0, } - testDlaqr2(t, impl, test, true, rnd) - testDlaqr2(t, impl, test, false, rnd) + testDlaqr23(t, impl, test, true, 1, rnd) + testDlaqr23(t, impl, test, false, 1, rnd) + testDlaqr23(t, impl, test, true, 0, rnd) + testDlaqr23(t, impl, test, false, 0, rnd) } } } // Tests with explicit eigenvalues computed by Octave. - for _, test := range []dlaqr2Test{ + for _, test := range []dlaqr23Test{ { h: blas64.General{ Rows: 1, @@ -201,12 +208,14 @@ func Dlaqr2Test(t *testing.T, impl Dlaqr2er) { test.wantt = true test.wantz = true test.nw = test.kbot - test.ktop + 1 - testDlaqr2(t, impl, test, true, rnd) - testDlaqr2(t, impl, test, false, rnd) + testDlaqr23(t, impl, test, true, 1, rnd) + testDlaqr23(t, impl, test, false, 1, rnd) + testDlaqr23(t, impl, test, true, 0, rnd) + testDlaqr23(t, impl, test, false, 0, rnd) } } -func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *rand.Rand) { +func testDlaqr23(t *testing.T, impl Dlaqr23er, test dlaqr23Test, opt bool, recur int, rnd *rand.Rand) { const tol = 1e-14 h := cloneGeneral(test.h) @@ -244,15 +253,15 @@ func testDlaqr2(t *testing.T, impl Dlaqr2er, test dlaqr2Test, opt bool, rnd *ran var work []float64 if opt { work = nanSlice(1) - impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride, - nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1) + impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, nil, h.Stride, iloz, ihiz, nil, z.Stride, + nil, nil, nil, v.Stride, tmat.Cols, nil, tmat.Stride, wv.Rows, nil, wv.Stride, work, -1, recur) work = nanSlice(int(work[0])) } else { work = nanSlice(max(1, 2*nw)) } - ns, nd := impl.Dlaqr2(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride, - sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work)) + ns, nd := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, h.Data, h.Stride, iloz, ihiz, z.Data, z.Stride, + sr, si, v.Data, v.Stride, tmat.Cols, tmat.Data, tmat.Stride, wv.Rows, wv.Data, wv.Stride, work, len(work), recur) prefix := fmt.Sprintf("Case wantt=%v, wantz=%v, n=%v, ktop=%v, kbot=%v, nw=%v, iloz=%v, ihiz=%v, extra=%v", wantt, wantz, n, ktop, kbot, nw, iloz, ihiz, extra)