Add dorgqr and tests, update dorg2r tests, and fix cgo dorgqr

This commit is contained in:
btracey
2015-10-27 11:36:46 -06:00
parent 676338c65e
commit be045bbf9a
12 changed files with 520 additions and 36 deletions

View File

@@ -30,9 +30,11 @@ const (
badWorkStride = "lapack: insufficient working array stride" badWorkStride = "lapack: insufficient working array stride"
kGTM = "lapack: k > m" kGTM = "lapack: k > m"
kGTN = "lapack: k > n" kGTN = "lapack: k > n"
kLT0 = "lapack: k < 0"
mLTN = "lapack: m < n" mLTN = "lapack: m < n"
negDimension = "lapack: negative matrix dimension" negDimension = "lapack: negative matrix dimension"
nLT0 = "lapack: n < 0" nLT0 = "lapack: n < 0"
nLTM = "lapack: n < m"
shortWork = "lapack: working array shorter than declared" shortWork = "lapack: working array shorter than declared"
) )
@@ -465,27 +467,71 @@ func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64
clapack.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb) clapack.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb)
} }
func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64) { // Dorglq generates an m×n matrix Q with orthonormal rows defined by the
// product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= m <= n.
//
// Work is temporary storage, and lwork specifies the usable memory length.
// The C interface does not support providing temporary storage. To provide compatibility
// with native, lwork == -1 will not run Dorglq but will instead write the minimum
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
// lwork >= m.
//
// Dorgqr will panic if the conditions on input values are not met.
func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
if lwork == -1 {
work[0] = float64(m)
return
}
checkMatrix(m, n, a, lda) checkMatrix(m, n, a, lda)
if k < 0 {
panic(kLT0)
}
if k > m {
panic(kGTM)
}
if m > n {
panic(nLTM)
}
if len(tau) < k { if len(tau) < k {
panic(badTau) panic(badTau)
} }
if k > m { if len(work) < lwork {
panic(kGTM) panic(shortWork)
} }
if k > m { if lwork < m {
panic(kGTM) panic(badWork)
} }
clapack.Dorglq(m, n, k, a, lda, tau) clapack.Dorglq(m, n, k, a, lda, tau)
} }
func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64) { // Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
checkMatrix(m, n, a, lda) // product of elementary reflectors as computed by Dgeqrf.
if len(tau) < k { // Q = H(0) * H(2) * ... * H(k-1)
panic(badTau) // Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
//
// Work is temporary storage, and lwork specifies the usable memory length.
// The C interface does not support providing temporary storage. To provide compatibility
// with native, lwork == -1 will not run Dorgqr but will instead write the minimum
// work necessary to work[0]. If len(work) < lwork, Dgeqrf will panic, and at minimum
// lwork >= n.
//
// Dorgqr will panic if the conditions on input values are not met.
func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
if lwork == -1 {
work[0] = float64(n)
return
} }
if len(work) < n { checkMatrix(m, n, a, lda)
panic(badWork) if k < 0 {
panic(kLT0)
} }
if k > n { if k > n {
panic(kGTN) panic(kGTN)
@@ -493,6 +539,15 @@ func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work [
if n > m { if n > m {
panic(mLTN) panic(mLTN)
} }
if len(tau) < k {
panic(badTau)
}
if len(work) < lwork {
panic(shortWork)
}
if lwork < n {
panic(badWork)
}
clapack.Dorgqr(m, n, k, a, lda, tau) clapack.Dorgqr(m, n, k, a, lda, tau)
} }

View File

@@ -85,18 +85,26 @@ func (d blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k i
} }
func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) { func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) {
impl.Dorgqr(m, n, k, a, lda, tau, work) impl.Dorgqr(m, n, k, a, lda, tau, work, len(work))
} }
func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
impl.Dorglq(m, n, k, a, lda, tau, work) impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
} }
func TestDorglq(t *testing.T) { func TestDorglq(t *testing.T) {
testlapack.Dorgl2Test(t, blockedTranslate{impl}) testlapack.DorglqTest(t, blockedTranslate{impl})
} }
func TestDorgqr(t *testing.T) { func TestDorgqr(t *testing.T) {
testlapack.DorgqrTest(t, blockedTranslate{impl})
}
func TestDorgl2(t *testing.T) {
testlapack.Dorgl2Test(t, blockedTranslate{impl})
}
func TestDorg2r(t *testing.T) {
testlapack.Dorg2rTest(t, blockedTranslate{impl}) testlapack.Dorg2rTest(t, blockedTranslate{impl})
} }

View File

@@ -9,12 +9,11 @@ import (
"github.com/gonum/blas/blas64" "github.com/gonum/blas/blas64"
) )
// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the // Dorgl2 generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgeqrf. // product of elementary reflectors as computed by Dgeqrf.
// Q = H(0) * H(2) * ... * H(k-1) // Q = H(0) * H(2) * ... * H(k-1)
// The length of tau must be equal to k, and the length of work must be at least n. // len(tau) >= k, 0 <= k <= n, 0 <= n <= m, len(work) >= n.
// It also must be that 0 <= k <= n. m >= n >= 0. Dorg2r will panic if these // Dorg2r will panic if these conditions are not met.
// conditions are not met.
func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) { func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) {
checkMatrix(m, n, a, lda) checkMatrix(m, n, a, lda)
if len(tau) < k { if len(tau) < k {
@@ -29,6 +28,9 @@ func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float
if n > m { if n > m {
panic(mLTN) panic(mLTN)
} }
if len(work) < n {
panic(badWork)
}
if n == 0 { if n == 0 {
return return
} }

View File

@@ -12,9 +12,8 @@ import (
// Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the // Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the
// first m rows product of elementary reflectors as computed by Dgelqf. // first m rows product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1) // Q = H(0) * H(2) * ... * H(k-1)
// The length of tau must be equal to k, and the length of work must be at least n. // len(tau) >= k, 0 <= k <= m, 0 <= m <= n, len(work) >= m.
// It also must be that 0 <= k <= m. n >= m >= 0. Dorgl2 will panic if these // Dorgl2 will panic if these conditions are not met.
// conditions are not met.
func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
checkMatrix(m, n, a, lda) checkMatrix(m, n, a, lda)
if len(tau) < k { if len(tau) < k {
@@ -26,6 +25,12 @@ func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work [
if k > m { if k > m {
panic(kGTM) panic(kGTM)
} }
if m > n {
panic(nLTM)
}
if len(work) < m {
panic(badWork)
}
if m == 0 { if m == 0 {
return return
} }

111
native/dorglq.go Normal file
View File

@@ -0,0 +1,111 @@
package native
import (
"github.com/gonum/blas"
"github.com/gonum/lapack"
)
// Dorglq generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgelqf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorglq is the blocked version of dorgl2 that makes greater use of level-3 BLAS
// routines.
//
// len(tau) >= k, 0 <= k <= n, and 0 <= n <= m.
//
// Work is temporary storage, and lwork specifies the usable memory length. At minimum,
// lwork >= m, and the amount of blocking is limited by the usable length.
// If lwork == -1, instead of computing Dorglq the optimal work length is stored
// into work[0].
//
// Dorglq will panic if the conditions on input values are not met.
func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
// work is treated as an n×nb matrix
if lwork == -1 {
work[0] = float64(max(1, m) * nb)
return
}
checkMatrix(m, n, a, lda)
if k < 0 {
panic(kLT0)
}
if k > m {
panic(kGTM)
}
if m > n {
panic(nLTM)
}
if len(tau) < k {
panic(badTau)
}
if len(work) < lwork {
panic(shortWork)
}
if lwork < m {
panic(badWork)
}
if m == 0 {
return
}
nbmin := 2 // Minimum number of blocks
var nx int // Minimum number of rows
iws := m // Length of work needed
var ldwork int
if nb > 1 && nb < k {
nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
if nx < k {
ldwork = nb
iws = m * ldwork
if lwork < iws {
nb = lwork / m
ldwork = nb
nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
}
}
}
var ki, kk int
if nb >= nbmin && nb < k && nx < k {
// The first kk rows are handled by the blocked method.
// Note: lapack has nx here, but this means the last nx rows are handled
// serially which could be quite different than nb.
ki = ((k - nb - 1) / nb) * nb
kk = min(k, ki+nb)
for i := kk; i < m; i++ {
for j := 0; j < kk; j++ {
a[i*lda+j] = 0
}
}
}
if kk < m {
// Perform the operation on colums kk to the end.
impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
}
if kk == 0 {
return
}
// Perform the operation on column-blocks
for i := ki; i >= 0; i -= nb {
ib := min(nb, k-i)
if i+ib < m {
impl.Dlarft(lapack.Forward, lapack.RowWise,
n-i, ib,
a[i*lda+i:], lda,
tau[i:],
work, ldwork)
impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
m-i-ib, n-i, ib,
a[i*lda+i:], lda,
work, ldwork,
a[(i+ib)*lda+i:], lda,
work[ib*ldwork:], ldwork)
}
impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
for l := i; l < i+ib; l++ {
for j := 0; j < i; j++ {
a[l*lda+j] = 0
}
}
}
}

115
native/dorgqr.go Normal file
View File

@@ -0,0 +1,115 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package native
import (
"github.com/gonum/blas"
"github.com/gonum/lapack"
)
// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
// product of elementary reflectors as computed by Dgeqrf.
// Q = H(0) * H(2) * ... * H(k-1)
// Dorgqr is the blocked version of dorg2r that makes greater use of level-3 BLAS
// routines.
//
// The length of tau must be equal to k, and the length of work must be at least n.
// It also must be that 0 <= k <= n and 0 <= n <= m. Work is temporary storage,
// and lwork specifies the usable memory length. At minimum, lwork >= n, and the
// amount of blocking is limited by the usable length. If lwork == -1, instead of
// computing Dorgqr the optimal work length is stored into work[0].
//
// Dorgqr will panic if the conditions on input values are not met.
func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1)
// work is treated as an n×nb matrix
if lwork == -1 {
work[0] = float64(max(1, n) * nb)
return
}
checkMatrix(m, n, a, lda)
if k < 0 {
panic(kLT0)
}
if k > n {
panic(kGTN)
}
if n > m {
panic(mLTN)
}
if len(tau) < k {
panic(badTau)
}
if len(work) < lwork {
panic(shortWork)
}
if lwork < n {
panic(badWork)
}
if n == 0 {
return
}
nbmin := 2 // Minimum number of blocks
var nx int // Minimum number of rows
iws := n // Length of work needed
var ldwork int
if nb > 1 && nb < k {
nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1))
if nx < k {
ldwork = nb
iws = n * ldwork
if lwork < iws {
nb = lwork / n
ldwork = nb
nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1))
}
}
}
var ki, kk int
if nb >= nbmin && nb < k && nx < k {
// The first kk columns are handled by the blocked method.
// Note: lapack has nx here, but this means the last nx rows are handled
// serially which could be quite different than nb.
ki = ((k - nb - 1) / nb) * nb
kk = min(k, ki+nb)
for j := kk; j < n; j++ {
for i := 0; i < kk; i++ {
a[i*lda+j] = 0
}
}
}
if kk < n {
// Perform the operation on colums kk to the end.
impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
}
if kk == 0 {
return
}
// Perform the operation on column-blocks
for i := ki; i >= 0; i -= nb {
ib := min(nb, k-i)
if i+ib < n {
impl.Dlarft(lapack.Forward, lapack.ColumnWise,
m-i, ib,
a[i*lda+i:], lda,
tau[i:],
work, ldwork)
impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise,
m-i, n-i-ib, ib,
a[i*lda+i:], lda,
work, ldwork,
a[i*lda+i+ib:], lda,
work[ib*ldwork:], ldwork)
}
impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:], work)
// Set rows 0:i-1 of current block to zero
for j := i; j < i+ib; j++ {
for l := 0; l < i; l++ {
a[l*lda+j] = 0
}
}
}
}

View File

@@ -36,9 +36,11 @@ const (
badWorkStride = "lapack: insufficient working array stride" badWorkStride = "lapack: insufficient working array stride"
kGTM = "lapack: k > m" kGTM = "lapack: k > m"
kGTN = "lapack: k > n" kGTN = "lapack: k > n"
kLT0 = "lapack: k < 0"
mLTN = "lapack: m < n" mLTN = "lapack: m < n"
negDimension = "lapack: negative matrix dimension" negDimension = "lapack: negative matrix dimension"
nLT0 = "lapack: n < 0" nLT0 = "lapack: n < 0"
nLTM = "lapack: n < m"
shortWork = "lapack: working array shorter than declared" shortWork = "lapack: working array shorter than declared"
) )

View File

@@ -104,6 +104,22 @@ func TestDlasv2(t *testing.T) {
testlapack.Dlasv2Test(t, impl) testlapack.Dlasv2Test(t, impl)
} }
func TestDorg2r(t *testing.T) {
testlapack.Dorg2rTest(t, impl)
}
func TestDorgl2(t *testing.T) {
testlapack.Dorgl2Test(t, impl)
}
func TestDorglq(t *testing.T) {
testlapack.DorglqTest(t, impl)
}
func TestDorgqr(t *testing.T) {
testlapack.DorgqrTest(t, impl)
}
func TestDorml2(t *testing.T) { func TestDorml2(t *testing.T) {
testlapack.Dorml2Test(t, impl) testlapack.Dorml2Test(t, impl)
} }
@@ -120,14 +136,6 @@ func TestDorm2r(t *testing.T) {
testlapack.Dorm2rTest(t, impl) testlapack.Dorm2rTest(t, impl)
} }
func TestDorg2r(t *testing.T) {
testlapack.Dorg2rTest(t, impl)
}
func TestDorgl2(t *testing.T) {
testlapack.Dorgl2Test(t, impl)
}
func TestDpocon(t *testing.T) { func TestDpocon(t *testing.T) {
testlapack.DpoconTest(t, impl) testlapack.DpoconTest(t, impl)
} }

View File

@@ -19,13 +19,17 @@ type Dorg2rer interface {
func Dorg2rTest(t *testing.T, impl Dorg2rer) { func Dorg2rTest(t *testing.T, impl Dorg2rer) {
for _, test := range []struct { for _, test := range []struct {
m, n, lda int m, n, k, lda int
}{ }{
{3, 3, 0}, {3, 3, 0, 0},
{4, 3, 0}, {4, 3, 0, 0},
{3, 3, 2, 0},
{4, 3, 2, 0},
{5, 5, 20}, {5, 5, 0, 20},
{10, 5, 20}, {5, 5, 3, 20},
{10, 5, 0, 20},
{10, 5, 2, 20},
} { } {
m := test.m m := test.m
n := test.n n := test.n
@@ -44,7 +48,11 @@ func Dorg2rTest(t *testing.T, impl Dorg2rer) {
work = make([]float64, int(work[0])) work = make([]float64, int(work[0]))
impl.Dgeqrf(m, n, a, lda, tau, work, len(work)) impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
q := constructQ("QR", m, n, a, lda, tau) k = test.k
if k == 0 {
k = n
}
q := constructQK("QR", m, n, k, a, lda, tau)
impl.Dorg2r(m, n, k, a, lda, tau, work) impl.Dorg2r(m, n, k, a, lda, tau, work)

82
testlapack/dorglq.go Normal file
View File

@@ -0,0 +1,82 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testlapack
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
)
type Dorglqer interface {
Dorgl2er
Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int)
}
func DorglqTest(t *testing.T, impl Dorglqer) {
// TODO(btracey): Base tests off of nb and nx.
for _, test := range []struct{ m, n, k, lda int }{
{10, 10, 10, 0},
{10, 10, 10, 20},
{10, 30, 10, 0},
{20, 30, 10, 0},
{100, 100, 100, 0},
{100, 100, 50, 0},
{100, 130, 100, 0},
{100, 130, 50, 0},
{100, 100, 100, 150},
{100, 100, 50, 150},
{100, 130, 100, 150},
{100, 130, 50, 150},
{200, 200, 200, 0},
{200, 200, 150, 0},
{200, 230, 200, 0},
{200, 230, 150, 0},
{200, 200, 200, 250},
{200, 200, 150, 250},
{200, 230, 200, 250},
{200, 230, 150, 250},
} {
m := test.m
n := test.n
k := test.k
lda := test.lda
if lda == 0 {
lda = n
}
a := make([]float64, m*lda)
for i := range a {
a[i] = rand.Float64()
}
work := make([]float64, 1)
tau := make([]float64, m)
for i := range tau {
tau[i] = math.NaN()
}
// Compute LQ factorization.
impl.Dgelqf(m, n, a, lda, tau, work, -1)
work = make([]float64, int(work[0]))
impl.Dgelqf(m, n, a, lda, tau, work, len(work))
aUnblocked := make([]float64, len(a))
copy(aUnblocked, a)
for i := range work {
work[i] = math.NaN()
}
impl.Dorgl2(m, n, k, aUnblocked, lda, tau, work)
// make sure work isn't used before initialized
for i := range work {
work[i] = math.NaN()
}
impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
if !floats.EqualApprox(a, aUnblocked, 1e-10) {
t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda)
}
}
}

82
testlapack/dorgqr.go Normal file
View File

@@ -0,0 +1,82 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testlapack
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
)
type Dorgqrer interface {
Dorg2rer
Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int)
}
func DorgqrTest(t *testing.T, impl Dorgqrer) {
// TODO(btracey): Base tests off of nb and nx.
for _, test := range []struct{ m, n, k, lda int }{
{10, 10, 10, 0},
{10, 10, 10, 20},
{30, 10, 10, 0},
{30, 20, 10, 0},
{100, 100, 100, 0},
{100, 100, 50, 0},
{130, 100, 100, 0},
{130, 100, 50, 0},
{100, 100, 100, 150},
{100, 100, 50, 150},
{130, 100, 100, 150},
{130, 100, 50, 150},
{200, 200, 200, 0},
{200, 200, 150, 0},
{230, 200, 200, 0},
{230, 200, 150, 0},
{200, 200, 200, 250},
{200, 200, 150, 250},
{230, 200, 200, 250},
{230, 200, 150, 250},
} {
m := test.m
n := test.n
k := test.k
lda := test.lda
if lda == 0 {
lda = n
}
a := make([]float64, m*lda)
for i := range a {
a[i] = rand.Float64()
}
work := make([]float64, 1)
tau := make([]float64, n)
for i := range tau {
tau[i] = math.NaN()
}
// Compute QR factorization.
impl.Dgeqrf(m, n, a, lda, tau, work, -1)
work = make([]float64, int(work[0]))
impl.Dgeqrf(m, n, a, lda, tau, work, len(work))
aUnblocked := make([]float64, len(a))
copy(aUnblocked, a)
for i := range work {
work[i] = math.NaN()
}
impl.Dorg2r(m, n, k, aUnblocked, lda, tau, work)
// make sure work isn't used before initialized
for i := range work {
work[i] = math.NaN()
}
impl.Dorgqr(m, n, k, a, lda, tau, work, len(work))
if !floats.EqualApprox(a, aUnblocked, 1e-10) {
t.Errorf("Q Mismatch. m = %d, n = %d, k = %d, lda = %d", m, n, k, lda)
}
}
}

View File

@@ -226,9 +226,15 @@ func constructH(tau []float64, v blas64.General, store lapack.StoreV, direct lap
return h return h
} }
// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2 // constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2.
func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General { func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General {
k := min(m, n) k := min(m, n)
return constructQK(kind, m, n, k, a, lda, tau)
}
// constructQK constructs the Q matrix from the result of dgeqrf and dgeqr2 using
// the first k reflectors.
func constructQK(kind string, m, n, k int, a []float64, lda int, tau []float64) blas64.General {
var sz int var sz int
switch kind { switch kind {
case "QR": case "QR":