Add dorgqr and tests, update dorg2r tests, and fix cgo dorgqr

2025-10-20 21:59:25 +08:00 · 2015-10-27 11:36:46 -06:00
parent 676338c65e
commit be045bbf9a
12 changed files with 520 additions and 36 deletions
--- a/cgo/lapack.go
+++ b/cgo/lapack.go
@@ -30,9 +30,11 @@ const (
 	badWorkStride = "lapack: insufficient working array stride"
 	kGTM          = "lapack: k > m"
 	kGTN          = "lapack: k > n"
+	kLT0          = "lapack: k < 0"
 	mLTN          = "lapack: m < n"
 	negDimension  = "lapack: negative matrix dimension"
 	nLT0          = "lapack: n < 0"
+	nLTM          = "lapack: n < m"
 	shortWork     = "lapack: working array shorter than declared"
 )

@@ -465,27 +467,71 @@ func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64
 	clapack.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb)
 }

-func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64) {
+// Dorglq generates an m×n matrix Q with orthonormal rows defined by the
+// product of elementary reflectors as computed by Dgelqf.
+//  Q = H(0) * H(2) * ... * H(k-1)
+// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
+// routines.
+//
+// len(tau) >= k, 0 <= k <= n, and 0 <= m <= n.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// The C interface does not support providing temporary storage. To provide compatibility
+// with native, lwork == -1 will not run Dorglq but will instead write the minimum
+// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
+// lwork >= m.
+//
+// Dorgqr will panic if the conditions on input values are not met.
+func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	if lwork == -1 {
+		work[0] = float64(m)
+		return
+	}
 	checkMatrix(m, n, a, lda)
+	if k < 0 {
+		panic(kLT0)
+	}
+	if k > m {
+		panic(kGTM)
+	}
+	if m > n {
+		panic(nLTM)
+	}
 	if len(tau) < k {
 		panic(badTau)
 	}
-	if k > m {
-		panic(kGTM)
+	if len(work) < lwork {
+		panic(shortWork)
 	}
-	if k > m {
-		panic(kGTM)
+	if lwork < m {
+		panic(badWork)
 	}
 	clapack.Dorglq(m, n, k, a, lda, tau)
 }

-func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64) {
-	checkMatrix(m, n, a, lda)
-	if len(tau) < k {
-		panic(badTau)
+// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors as computed by Dgeqrf.
+//  Q = H(0) * H(2) * ... * H(k-1)
+// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS
+// routines.
+//
+// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
+//
+// Work is temporary storage, and lwork specifies the usable memory length.
+// The C interface does not support providing temporary storage. To provide compatibility
+// with native, lwork == -1 will not run Dorgqr but will instead write the minimum
+// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
+// lwork >= n.
+//
+// Dorgqr will panic if the conditions on input values are not met.
+func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	if lwork == -1 {
+		work[0] = float64(n)
+		return
 	}
-	if len(work) < n {
-		panic(badWork)
+	checkMatrix(m, n, a, lda)
+	if k < 0 {
+		panic(kLT0)
 	}
 	if k > n {
 		panic(kGTN)
@@ -493,6 +539,15 @@ func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work [
 	if n > m {
 		panic(mLTN)
 	}
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < n {
+		panic(badWork)
+	}
 	clapack.Dorgqr(m, n, k, a, lda, tau)
 }

--- a/cgo/lapack_test.go
+++ b/cgo/lapack_test.go
@@ -85,18 +85,26 @@ func (d blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k i
 }

 func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) {
-	impl.Dorgqr(m, n, k, a, lda, tau, work)
+	impl.Dorgqr(m, n, k, a, lda, tau, work, len(work))
 }

 func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
-	impl.Dorglq(m, n, k, a, lda, tau, work)
+	impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
 }

 func TestDorglq(t *testing.T) {
-	testlapack.Dorgl2Test(t, blockedTranslate{impl})
+	testlapack.DorglqTest(t, blockedTranslate{impl})
 }

 func TestDorgqr(t *testing.T) {
+	testlapack.DorgqrTest(t, blockedTranslate{impl})
+}
+
+func TestDorgl2(t *testing.T) {
+	testlapack.Dorgl2Test(t, blockedTranslate{impl})
+}
+
+func TestDorg2r(t *testing.T) {
 	testlapack.Dorg2rTest(t, blockedTranslate{impl})
 }

--- a/native/dorg2r.go
+++ b/native/dorg2r.go
@@ -9,12 +9,11 @@ import (
 	"github.com/gonum/blas/blas64"
 )

-// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the
+// Dorgl2 generates an m×n matrix Q with orthonormal columns defined by the
 // product of elementary reflectors as computed by Dgeqrf.
 //  Q = H(0) * H(2) * ... * H(k-1)
-// The length of tau must be equal to k, and the length of work must be at least n.
-// It also must be that 0 <= k <= n. m >= n >= 0. Dorg2r will panic if these
-// conditions are not met.
+// len(tau) >= k, 0 <= k <= n, 0 <= n <= m, len(work) >= n.
+// Dorg2r will panic if these conditions are not met.
 func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) {
 	checkMatrix(m, n, a, lda)
 	if len(tau) < k {
@@ -29,6 +28,9 @@ func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float
 	if n > m {
 		panic(mLTN)
 	}
+	if len(work) < n {
+		panic(badWork)
+	}
 	if n == 0 {
 		return
 	}
--- a/native/dorgl2.go
+++ b/native/dorgl2.go
@@ -12,9 +12,8 @@ import (
 // Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the
 // first m rows product of elementary reflectors as computed by Dgelqf.
 //  Q = H(0) * H(2) * ... * H(k-1)
-// The length of tau must be equal to k, and the length of work must be at least n.
-// It also must be that 0 <= k <= m. n >= m >= 0. Dorgl2 will panic if these
-// conditions are not met.
+// len(tau) >= k, 0 <= k <= m, 0 <= m <= n, len(work) >= m.
+// Dorgl2 will panic if these conditions are not met.
 func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
 	checkMatrix(m, n, a, lda)
 	if len(tau) < k {
@@ -26,6 +25,12 @@ func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work [
 	if k > m {
 		panic(kGTM)
 	}
+	if m > n {
+		panic(nLTM)
+	}
+	if len(work) < m {
+		panic(badWork)
+	}
 	if m == 0 {
 		return
 	}
--- a/native/dorglq.go
+++ b/native/dorglq.go
@@ -0,0 +1,111 @@
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dorglq generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors as computed by Dgelqf.
+//  Q = H(0) * H(2) * ... * H(k-1)
+// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
+// routines.
+//
+// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
+//
+// Work is temporary storage, and lwork specifies the usable memory length. At minimum,
+// lwork >= m, and the amount of blocking is limited by the usable length.
+// If lwork == -1, instead of computing Dorglq the optimal work length is stored
+// into work[0].
+//
+// Dorglq will panic if the conditions on input values are not met.
+func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
+	// work is treated as an n×nb matrix
+	if lwork == -1 {
+		work[0] = float64(max(1, m) * nb)
+		return
+	}
+	checkMatrix(m, n, a, lda)
+	if k < 0 {
+		panic(kLT0)
+	}
+	if k > m {
+		panic(kGTM)
+	}
+	if m > n {
+		panic(nLTM)
+	}
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < m {
+		panic(badWork)
+	}
+	if m == 0 {
+		return
+	}
+	nbmin := 2 // Minimum number of blocks
+	var nx int // Minimum number of rows
+	iws := m   // Length of work needed
+	var ldwork int
+	if nb > 1 && nb < k {
+		nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
+		if nx < k {
+			ldwork = nb
+			iws = m * ldwork
+			if lwork < iws {
+				nb = lwork / m
+				ldwork = nb
+				nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
+			}
+		}
+	}
+	var ki, kk int
+	if nb >= nbmin && nb < k && nx < k {
+		// The first kk rows are handled by the blocked method.
+		// Note: lapack has nx here, but this means the last nx rows are handled
+		// serially which could be quite different than nb.
+		ki = ((k - nb - 1) / nb) * nb
+		kk = min(k, ki+nb)
+		for i := kk; i < m; i++ {
+			for j := 0; j < kk; j++ {
+				a[i*lda+j] = 0
+			}
+		}
+	}
+	if kk < m {
+		// Perform the operation on colums kk to the end.
+		impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
+	}
+	if kk == 0 {
+		return
+	}
+	// Perform the operation on column-blocks
+	for i := ki; i >= 0; i -= nb {
+		ib := min(nb, k-i)
+		if i+ib < m {
+			impl.Dlarft(lapack.Forward, lapack.RowWise,
+				n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work, ldwork)
+
+			impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
+				m-i-ib, n-i, ib,
+				a[i*lda+i:], lda,
+				work, ldwork,
+				a[(i+ib)*lda+i:], lda,
+				work[ib*ldwork:], ldwork)
+		}
+		impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
+		for l := i; l < i+ib; l++ {
+			for j := 0; j < i; j++ {
+				a[l*lda+j] = 0
+			}
+		}
+	}
+}
--- a/native/dorgqr.go
+++ b/native/dorgqr.go
@@ -0,0 +1,115 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package native
+
+import (
+	"github.com/gonum/blas"
+	"github.com/gonum/lapack"
+)
+
+// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors as computed by Dgeqrf.
+//  Q = H(0) * H(2) * ... * H(k-1)
+// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS
+// routines.
+//
+// The length of tau must be equal to k, and the length of work must be at least n.
+// It also must be that 0 <= k <= n and 0 <= n <= m. Work is temporary storage,
+// and lwork specifies the usable memory length. At minimum, lwork >= n, and the
+// amount of blocking is limited by the usable length. If lwork == -1, instead of
+// computing Dorgqr the optimal work length is stored into work[0].
+//
+// Dorgqr will panic if the conditions on input values are not met.
+func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1)
+	// work is treated as an n×nb matrix
+	if lwork == -1 {
+		work[0] = float64(max(1, n) * nb)
+		return
+	}
+	checkMatrix(m, n, a, lda)
+	if k < 0 {
+		panic(kLT0)
+	}
+	if k > n {
+		panic(kGTN)
+	}
+	if n > m {
+		panic(mLTN)
+	}
+	if len(tau) < k {
+		panic(badTau)
+	}
+	if len(work) < lwork {
+		panic(shortWork)
+	}
+	if lwork < n {
+		panic(badWork)
+	}
+	if n == 0 {
+		return
+	}
+	nbmin := 2 // Minimum number of blocks
+	var nx int // Minimum number of rows
+	iws := n   // Length of work needed
+	var ldwork int
+	if nb > 1 && nb < k {
+		nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1))
+		if nx < k {
+			ldwork = nb
+			iws = n * ldwork
+			if lwork < iws {
+				nb = lwork / n
+				ldwork = nb
+				nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1))
+			}
+		}
+	}
+	var ki, kk int
+	if nb >= nbmin && nb < k && nx < k {
+		// The first kk columns are handled by the blocked method.
+		// Note: lapack has nx here, but this means the last nx rows are handled
+		// serially which could be quite different than nb.
+		ki = ((k - nb - 1) / nb) * nb
+		kk = min(k, ki+nb)
+		for j := kk; j < n; j++ {
+			for i := 0; i < kk; i++ {
+				a[i*lda+j] = 0
+			}
+		}
+	}
+	if kk < n {
+		// Perform the operation on colums kk to the end.
+		impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
+	}
+	if kk == 0 {
+		return
+	}
+	// Perform the operation on column-blocks
+	for i := ki; i >= 0; i -= nb {
+		ib := min(nb, k-i)
+		if i+ib < n {
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise,
+				m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work, ldwork)
+
+			impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise,
+				m-i, n-i-ib, ib,
+				a[i*lda+i:], lda,
+				work, ldwork,
+				a[i*lda+i+ib:], lda,
+				work[ib*ldwork:], ldwork)
+		}
+		impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:], work)
+		// Set rows 0:i-1 of current block to zero
+		for j := i; j < i+ib; j++ {
+			for l := 0; l < i; l++ {
+				a[l*lda+j] = 0
+			}
+		}
+	}
+}
--- a/native/general.go
+++ b/native/general.go
@@ -36,9 +36,11 @@ const (
 	badWorkStride = "lapack: insufficient working array stride"
 	kGTM          = "lapack: k > m"
 	kGTN          = "lapack: k > n"
+	kLT0          = "lapack: k < 0"
 	mLTN          = "lapack: m < n"
 	negDimension  = "lapack: negative matrix dimension"
 	nLT0          = "lapack: n < 0"
+	nLTM          = "lapack: n < m"
 	shortWork     = "lapack: working array shorter than declared"
 )

--- a/native/lapack_test.go
+++ b/native/lapack_test.go
@@ -104,6 +104,22 @@ func TestDlasv2(t *testing.T) {
 	testlapack.Dlasv2Test(t, impl)
 }

+func TestDorg2r(t *testing.T) {
+	testlapack.Dorg2rTest(t, impl)
+}
+
+func TestDorgl2(t *testing.T) {
+	testlapack.Dorgl2Test(t, impl)
+}
+
+func TestDorglq(t *testing.T) {
+	testlapack.DorglqTest(t, impl)
+}
+
+func TestDorgqr(t *testing.T) {
+	testlapack.DorgqrTest(t, impl)
+}
+
 func TestDorml2(t *testing.T) {
 	testlapack.Dorml2Test(t, impl)
 }
@@ -120,14 +136,6 @@ func TestDorm2r(t *testing.T) {
 	testlapack.Dorm2rTest(t, impl)
 }

-func TestDorg2r(t *testing.T) {
-	testlapack.Dorg2rTest(t, impl)
-}
-
-func TestDorgl2(t *testing.T) {
-	testlapack.Dorgl2Test(t, impl)
-}
-
 func TestDpocon(t *testing.T) {
 	testlapack.DpoconTest(t, impl)
 }
--- a/testlapack/dorg2r.go
+++ b/testlapack/dorg2r.go
@@ -19,13 +19,17 @@ type Dorg2rer interface {

 func Dorg2rTest(t *testing.T, impl Dorg2rer) {
 	for _, test := range []struct {
-		m, n, lda int
+		m, n, k, lda int
 	}{
-		{3, 3, 0},
-		{4, 3, 0},
+		{3, 3, 0, 0},
+		{4, 3, 0, 0},
+		{3, 3, 2, 0},
+		{4, 3, 2, 0},

-		{5, 5, 20},
-		{10, 5, 20},
+		{5, 5, 0, 20},
+		{5, 5, 3, 20},
+		{10, 5, 0, 20},
+		{10, 5, 2, 20},
 	} {
 		m := test.m
 		n := test.n
@@ -44,7 +48,11 @@ func Dorg2rTest(t *testing.T, impl Dorg2rer) {
 		work = make([]float64, int(work[0]))
 		impl.Dgeqrf(m, n, a, lda, tau, work, len(work))

-		q := constructQ("QR", m, n, a, lda, tau)
+		k = test.k
+		if k == 0 {
+			k = n
+		}
+		q := constructQK("QR", m, n, k, a, lda, tau)

 		impl.Dorg2r(m, n, k, a, lda, tau, work)

--- a/testlapack/dorglq.go
+++ b/testlapack/dorglq.go
@@ -0,0 +1,82 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/floats"
+)
+
+type Dorglqer interface {
+	Dorgl2er
+	Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int)
+}
+
+func DorglqTest(t *testing.T, impl Dorglqer) {
+	// TODO(btracey): Base tests off of nb and nx.
+	for _, test := range []struct{ m, n, k, lda int }{
+		{10, 10, 10, 0},
+		{10, 10, 10, 20},
+		{10, 30, 10, 0},
+		{20, 30, 10, 0},
+
+		{100, 100, 100, 0},
+		{100, 100, 50, 0},
+		{100, 130, 100, 0},
+		{100, 130, 50, 0},
+		{100, 100, 100, 150},
+		{100, 100, 50, 150},
+		{100, 130, 100, 150},
+		{100, 130, 50, 150},
+
+		{200, 200, 200, 0},
+		{200, 200, 150, 0},
+		{200, 230, 200, 0},
+		{200, 230, 150, 0},
+		{200, 200, 200, 250},
+		{200, 200, 150, 250},
+		{200, 230, 200, 250},
+		{200, 230, 150, 250},
+	} {
+		m := test.m
+		n := test.n
+		k := test.k
+		lda := test.lda
+		if lda == 0 {
+			lda = n
+		}
+		a := make([]float64, m*lda)
+		for i := range a {
+			a[i] = rand.Float64()
+		}
+		work := make([]float64, 1)
+		tau := make([]float64, m)
+		for i := range tau {
+			tau[i] = math.NaN()
+		}
+		// Compute LQ factorization.
+		impl.Dgelqf(m, n, a, lda, tau, work, -1)
+		work = make([]float64, int(work[0]))
+		impl.Dgelqf(m, n, a, lda, tau, work, len(work))
+
+		aUnblocked := make([]float64, len(a))
+		copy(aUnblocked, a)
+		for i := range work {
+			work[i] = math.NaN()
+		}
+		impl.Dorgl2(m, n, k, aUnblocked, lda, tau, work)
+		// make sure work isn't used before initialized
+		for i := range work {
+			work[i] = math.NaN()
+		}
+		impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
+		if !floats.EqualApprox(a, aUnblocked, 1e-10) {
+			t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda)
+		}
+	}
+}
--- a/testlapack/dorgqr.go
+++ b/testlapack/dorgqr.go
@@ -0,0 +1,82 @@
+// Copyright ©2015 The gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlapack
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/gonum/floats"
+)
+
+type Dorgqrer interface {
+	Dorg2rer
+	Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int)
+}
+
+func DorgqrTest(t *testing.T, impl Dorgqrer) {
+	// TODO(btracey): Base tests off of nb and nx.
+	for _, test := range []struct{ m, n, k, lda int }{
+		{10, 10, 10, 0},
+		{10, 10, 10, 20},
+		{30, 10, 10, 0},
+		{30, 20, 10, 0},
+
+		{100, 100, 100, 0},
+		{100, 100, 50, 0},
+		{130, 100, 100, 0},
+		{130, 100, 50, 0},
+		{100, 100, 100, 150},
+		{100, 100, 50, 150},
+		{130, 100, 100, 150},
+		{130, 100, 50, 150},
+
+		{200, 200, 200, 0},
+		{200, 200, 150, 0},
+		{230, 200, 200, 0},
+		{230, 200, 150, 0},
+		{200, 200, 200, 250},
+		{200, 200, 150, 250},
+		{230, 200, 200, 250},
+		{230, 200, 150, 250},
+	} {
+		m := test.m
+		n := test.n
+		k := test.k
+		lda := test.lda
+		if lda == 0 {
+			lda = n
+		}
+		a := make([]float64, m*lda)
+		for i := range a {
+			a[i] = rand.Float64()
+		}
+		work := make([]float64, 1)
+		tau := make([]float64, n)
+		for i := range tau {
+			tau[i] = math.NaN()
+		}
+		// Compute QR factorization.
+		impl.Dgeqrf(m, n, a, lda, tau, work, -1)
+		work = make([]float64, int(work[0]))
+		impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
+
+		aUnblocked := make([]float64, len(a))
+		copy(aUnblocked, a)
+		for i := range work {
+			work[i] = math.NaN()
+		}
+		impl.Dorg2r(m, n, k, aUnblocked, lda, tau, work)
+		// make sure work isn't used before initialized
+		for i := range work {
+			work[i] = math.NaN()
+		}
+		impl.Dorgqr(m, n, k, a, lda, tau, work, len(work))
+		if !floats.EqualApprox(a, aUnblocked, 1e-10) {
+			t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda)
+		}
+	}
+}
--- a/testlapack/general.go
+++ b/testlapack/general.go
@@ -226,9 +226,15 @@ func constructH(tau []float64, v blas64.General, store lapack.StoreV, direct lap
 	return h
 }

-// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2
+// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2.
 func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General {
 	k := min(m, n)
+	return constructQK(kind, m, n, k, a, lda, tau)
+}
+
+// constructQK constructs the Q matrix from the result of dgeqrf and dgeqr2 using
+// the first k reflectors.
+func constructQK(kind string, m, n, k int, a []float64, lda int, tau []float64) blas64.General {
 	var sz int
 	switch kind {
 	case "QR":