mirror of
https://github.com/gonum/gonum.git
synced 2025-10-05 23:26:52 +08:00
lapack/gonum: fix ldwork in Dgeqrf and update its test
This commit is contained in:

committed by
Vladimír Chalupecký

parent
08a35caaad
commit
dd4cc715c5
@@ -59,7 +59,6 @@ func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []fl
|
|||||||
nbmin := 2 // Minimal block size.
|
nbmin := 2 // Minimal block size.
|
||||||
var nx int // Use unblocked (unless changed in the next for loop)
|
var nx int // Use unblocked (unless changed in the next for loop)
|
||||||
iws := n
|
iws := n
|
||||||
ldwork := nb
|
|
||||||
// Only consider blocked if the suggested block size is > 1 and the
|
// Only consider blocked if the suggested block size is > 1 and the
|
||||||
// number of rows or columns is sufficiently large.
|
// number of rows or columns is sufficiently large.
|
||||||
if 1 < nb && nb < k {
|
if 1 < nb && nb < k {
|
||||||
@@ -67,7 +66,7 @@ func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []fl
|
|||||||
// to unblocked.
|
// to unblocked.
|
||||||
nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1))
|
nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1))
|
||||||
if k > nx {
|
if k > nx {
|
||||||
iws = ldwork * n
|
iws = n * nb
|
||||||
if lwork < iws {
|
if lwork < iws {
|
||||||
// Not enough workspace to use the optimal block
|
// Not enough workspace to use the optimal block
|
||||||
// size. Get the minimum block size instead.
|
// size. Get the minimum block size instead.
|
||||||
@@ -80,6 +79,7 @@ func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []fl
|
|||||||
// Compute QR using a blocked algorithm.
|
// Compute QR using a blocked algorithm.
|
||||||
var i int
|
var i int
|
||||||
if nbmin <= nb && nb < k && nx < k {
|
if nbmin <= nb && nb < k && nx < k {
|
||||||
|
ldwork := nb
|
||||||
for i = 0; i < k-nx; i += nb {
|
for i = 0; i < k-nx; i += nb {
|
||||||
ib := min(k-i, nb)
|
ib := min(k-i, nb)
|
||||||
// Compute the QR factorization of the current block.
|
// Compute the QR factorization of the current block.
|
||||||
|
@@ -18,6 +18,7 @@ type Dgeqrfer interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func DgeqrfTest(t *testing.T, impl Dgeqrfer) {
|
func DgeqrfTest(t *testing.T, impl Dgeqrfer) {
|
||||||
|
const tol = 1e-12
|
||||||
rnd := rand.New(rand.NewSource(1))
|
rnd := rand.New(rand.NewSource(1))
|
||||||
for c, test := range []struct {
|
for c, test := range []struct {
|
||||||
m, n, lda int
|
m, n, lda int
|
||||||
@@ -49,48 +50,53 @@ func DgeqrfTest(t *testing.T, impl Dgeqrfer) {
|
|||||||
if lda == 0 {
|
if lda == 0 {
|
||||||
lda = test.n
|
lda = test.n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allocate m×n matrix A and fill it with random numbers.
|
||||||
a := make([]float64, m*lda)
|
a := make([]float64, m*lda)
|
||||||
for i := 0; i < m; i++ {
|
for i := range a {
|
||||||
for j := 0; j < n; j++ {
|
a[i] = rnd.NormFloat64()
|
||||||
a[i*lda+j] = rnd.Float64()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// Store a copy of A for later comparison.
|
||||||
|
aCopy := make([]float64, len(a))
|
||||||
|
copy(aCopy, a)
|
||||||
|
|
||||||
|
// Allocate a slice for scalar factors of elementary reflectors
|
||||||
|
// and fill it with random numbers.
|
||||||
tau := make([]float64, n)
|
tau := make([]float64, n)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
tau[i] = rnd.Float64()
|
tau[i] = rnd.Float64()
|
||||||
}
|
}
|
||||||
aCopy := make([]float64, len(a))
|
|
||||||
copy(aCopy, a)
|
|
||||||
ans := make([]float64, len(a))
|
|
||||||
copy(ans, a)
|
|
||||||
work := make([]float64, n)
|
|
||||||
// Compute unblocked QR.
|
|
||||||
impl.Dgeqr2(m, n, ans, lda, tau, work)
|
|
||||||
// Compute blocked QR with small work.
|
|
||||||
impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
|
|
||||||
if !floats.EqualApprox(ans, a, 1e-12) {
|
|
||||||
t.Errorf("Case %v, mismatch small work.", c)
|
|
||||||
}
|
|
||||||
// Try the full length of work.
|
|
||||||
impl.Dgeqrf(m, n, a, lda, tau, work, -1)
|
|
||||||
lwork := int(work[0])
|
|
||||||
work = make([]float64, lwork)
|
|
||||||
copy(a, aCopy)
|
|
||||||
impl.Dgeqrf(m, n, a, lda, tau, work, lwork)
|
|
||||||
if !floats.EqualApprox(ans, a, 1e-12) {
|
|
||||||
t.Errorf("Case %v, mismatch large work.", c)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try a slightly smaller version of work to test blocking.
|
// Compute the expected result using unblocked QR algorithm and
|
||||||
if len(work) <= n {
|
// store it in want.
|
||||||
continue
|
want := make([]float64, len(a))
|
||||||
}
|
copy(want, a)
|
||||||
work = work[1:]
|
impl.Dgeqr2(m, n, want, lda, tau, make([]float64, n))
|
||||||
lwork--
|
|
||||||
|
for _, wl := range []worklen{minimumWork, mediumWork, optimumWork} {
|
||||||
copy(a, aCopy)
|
copy(a, aCopy)
|
||||||
impl.Dgeqrf(m, n, a, lda, tau, work, lwork)
|
|
||||||
if !floats.EqualApprox(ans, a, 1e-12) {
|
var lwork int
|
||||||
t.Errorf("Case %v, mismatch large work.", c)
|
switch wl {
|
||||||
|
case minimumWork:
|
||||||
|
lwork = n
|
||||||
|
case mediumWork:
|
||||||
|
work := make([]float64, 1)
|
||||||
|
impl.Dgeqrf(m, n, a, lda, tau, work, -1)
|
||||||
|
lwork = int(work[0]) - 2*n
|
||||||
|
case optimumWork:
|
||||||
|
work := make([]float64, 1)
|
||||||
|
impl.Dgeqrf(m, n, a, lda, tau, work, -1)
|
||||||
|
lwork = int(work[0])
|
||||||
|
}
|
||||||
|
work := make([]float64, lwork)
|
||||||
|
|
||||||
|
// Compute the QR factorization of A.
|
||||||
|
impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
|
||||||
|
// Compare the result with Dgeqr2.
|
||||||
|
if !floats.EqualApprox(want, a, tol) {
|
||||||
|
t.Errorf("Case %v, workspace %v, unexpected result.", c, wl)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user