diff --git a/cgo/lapack.go b/cgo/lapack.go index 09d7630e..f4f6e53a 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -13,35 +13,36 @@ import ( // Copied from lapack/native. Keep in sync. const ( - absIncNotOne = "lapack: increment not one or negative one" - badD = "lapack: d has insufficient length" - badDiag = "lapack: bad diag" - badDirect = "lapack: bad direct" - badE = "lapack: e has insufficient length" - badIpiv = "lapack: insufficient permutation length" - badLdA = "lapack: index of a out of range" - badNorm = "lapack: bad norm" - badPivot = "lapack: bad pivot" - badSide = "lapack: bad side" - badSlice = "lapack: bad input slice length" - badStore = "lapack: bad store" - badTau = "lapack: tau has insufficient length" - badTauQ = "lapack: tauQ has insufficient length" - badTauP = "lapack: tauP has insufficient length" - badTrans = "lapack: bad trans" - badUplo = "lapack: illegal triangle" - badWork = "lapack: insufficient working memory" - badWorkStride = "lapack: insufficient working array stride" - badZ = "lapack: insufficient z length" - kGTM = "lapack: k > m" - kGTN = "lapack: k > n" - kLT0 = "lapack: k < 0" - mLTN = "lapack: m < n" - negDimension = "lapack: negative matrix dimension" - negZ = "lapack: negative z value" - nLT0 = "lapack: n < 0" - nLTM = "lapack: n < m" - shortWork = "lapack: working array shorter than declared" + absIncNotOne = "lapack: increment not one or negative one" + badD = "lapack: d has insufficient length" + badDecompUpdate = "lapack: bad decomp update" + badDiag = "lapack: bad diag" + badDirect = "lapack: bad direct" + badE = "lapack: e has insufficient length" + badIpiv = "lapack: insufficient permutation length" + badLdA = "lapack: index of a out of range" + badNorm = "lapack: bad norm" + badPivot = "lapack: bad pivot" + badSide = "lapack: bad side" + badSlice = "lapack: bad input slice length" + badStore = "lapack: bad store" + badTau = "lapack: tau has insufficient length" + badTauQ = "lapack: tauQ has insufficient length" + badTauP = "lapack: tauP has insufficient length" + badTrans = "lapack: bad trans" + badUplo = "lapack: illegal triangle" + badWork = "lapack: insufficient working memory" + badWorkStride = "lapack: insufficient working array stride" + badZ = "lapack: insufficient z length" + kGTM = "lapack: k > m" + kGTN = "lapack: k > n" + kLT0 = "lapack: k < 0" + mLTN = "lapack: m < n" + negDimension = "lapack: negative matrix dimension" + negZ = "lapack: negative z value" + nLT0 = "lapack: n < 0" + nLTM = "lapack: n < m" + shortWork = "lapack: working array shorter than declared" ) func min(m, n int) int { @@ -170,6 +171,74 @@ func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok return clapack.Dpotrf(ul, n, a, lda) } +// Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by +// an orthogonal transformation: +// Q^T * A * P = B. +// The diagonal elements of B are stored in d and the off-diagonal elements are +// stored in e. These are additionally stored along the diagonal of A and the +// off-diagonal of A. If m >= n B is an upper-bidiagonal matrix, and if m < n B +// is a lower-bidiagonal matrix. +// +// The remaining elements of A store the data needed to construct Q and P. +// The matrices Q and P are products of elementary reflectors +// Q = H_1 * H_2 * ... * H_nb +// P = G_1 * G_2 * ... * G_nb +// where +// H_i = I - tauQ[i] * v_i * v_i^T +// G_i = I - tauP[i] * u_i * u_i^T +// +// As an example, on exit the entries of A when m = 6, and n = 5 +// ( d e u1 u1 u1 ) +// ( v1 d e u2 u2 ) +// ( v1 v2 d e u3 ) +// ( v1 v2 v3 d e ) +// ( v1 v2 v3 v4 d ) +// ( v1 v2 v3 v4 v5 ) +// and when m = 5, n = 6 +// ( d u1 u1 u1 u1 u1 ) +// ( e d u2 u2 u2 u2 ) +// ( v1 e d u3 u3 u3 ) +// ( v1 v2 e d u4 u4 ) +// ( v1 v2 v3 e d u5 ) +// +// d, tauQ, and tauP must all have length at least min(m,n), and e must have +// length min(m,n) - 1. +// +// Work is temporary storage, and lwork specifies the usable memory length. +// At minimum, lwork >= max(m,n) and this function will panic otherwise. +// The C interface does not support providing temporary storage. To provide compatibility +// with native, lwork == -1 will not run Dgeqrf but will instead write the minimum +// work necessary to work[0]. If len(work) < lwork, Dgbrd will panic. +func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64, lwork int) { + checkMatrix(m, n, a, lda) + minmn := min(m, n) + if len(d) < minmn { + panic(badD) + } + if len(e) < minmn-1 { + panic(badE) + } + if len(tauQ) < minmn { + panic(badTauQ) + } + if len(tauP) < minmn { + panic(badTauP) + } + ws := max(m, n) + if lwork == -1 { + work[0] = float64(ws) + return + } + if lwork < ws { + panic(badWork) + } + if len(work) < lwork { + panic(badWork) + } + + clapack.Dgebrd(m, n, a, lda, d, e, tauQ, tauP) +} + // Dgecon estimates the reciprocal of the condition number of the n×n matrix A // given the LU decomposition of the matrix. The condition number computed may // be based on the 1-norm or the ∞-norm. @@ -557,6 +626,53 @@ func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work [ clapack.Dorgqr(m, n, k, a, lda, tau) } +// Dormbr applies a multiplicative update to the matrix C based on a +// decomposition computed by Dgebrd. +// +// Dormbr computes +// Q * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.NoTrans +// C * Q if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.NoTrans +// Q^T * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.Trans +// C * Q^T if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.Trans +// +// P * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans +// C * P if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans +// P^T * C if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans +// C * P^T if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans +// where P and Q are the orthogonal matrices determined by Dgebrd, A = Q * B * P^T. +// See Dgebrd for the definitions of Q and P. +// +// If vect == lapack.ApplyQ, A is assumed to have been an nq×k matrix, while if +// vect == lapack.ApplyP, A is assumed to have been a k×nq matrix. nq = m if +// side == blas.Left, while nq = n if side == blas.Right. +// +// C is an m×n matrix. On exit it is updated by the multiplication listed above. +// +// Tau must have length min(nq,k), and Dormbr will panic otherwise. Tau contains +// the elementary reflectors to construct Q or P depending on the value of +// vect. +func (impl Implementation) Dormbr(vect lapack.DecompUpdate, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + if side != blas.Left && side != blas.Right { + panic(badSide) + } + if trans != blas.NoTrans && trans != blas.Trans { + panic(badTrans) + } + if vect != lapack.ApplyP && vect != lapack.ApplyQ { + panic(badDecompUpdate) + } + nq := n + if side == blas.Left { + nq = m + } + if vect == lapack.ApplyQ { + checkMatrix(nq, min(nq, k), a, lda) + } else { + checkMatrix(min(nq, k), nq, a, lda) + } + clapack.Dormbr(byte(vect), side, trans, m, n, k, a, lda, tau, c, ldc) +} + // Dormlq multiplies the matrix C by the othogonal matrix Q defined by the // slices a and tau. A and tau are as returned from Dgelqf. // C = Q * C if side == blas.Left and trans == blas.NoTrans diff --git a/cgo/lapack_test.go b/cgo/lapack_test.go index 3f0b1163..52ca2ec5 100644 --- a/cgo/lapack_test.go +++ b/cgo/lapack_test.go @@ -13,6 +13,33 @@ import ( var impl = Implementation{} +// blockedTranslate transforms some blocked C calls to be the unblocked algorithms +// for testing, as several of the unblocked algorithms are not defined by the C +// interface. +type blockedTranslate struct { + Implementation +} + +func (bl blockedTranslate) Dgebd2(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64) { + impl.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, len(work)) +} + +func (bl blockedTranslate) Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { + impl.Dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, len(work)) +} + +func (bl blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { + impl.Dormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, len(work)) +} + +func (bl blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) { + impl.Dorgqr(m, n, k, a, lda, tau, work, len(work)) +} + +func (bl blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { + impl.Dorglq(m, n, k, a, lda, tau, work, len(work)) +} + func TestDlacpy(t *testing.T) { testlapack.DlacpyTest(t, impl) } @@ -29,6 +56,10 @@ func TestDpotrf(t *testing.T) { testlapack.DpotrfTest(t, impl) } +func TestDgebd2(t *testing.T) { + testlapack.Dgebd2Test(t, blockedTranslate{impl}) +} + func TestDgecon(t *testing.T) { testlapack.DgeconTest(t, impl) } @@ -69,29 +100,6 @@ func TestDgetrs(t *testing.T) { testlapack.DgetrsTest(t, impl) } -// blockedTranslate transforms some blocked C calls to be the unblocked algorithms -// for testing, as several of the unblocked algorithms are not defined by the C -// interface. -type blockedTranslate struct { - Implementation -} - -func (d blockedTranslate) Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { - impl.Dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, len(work)) -} - -func (d blockedTranslate) Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) { - impl.Dormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, len(work)) -} - -func (d blockedTranslate) Dorg2r(m, n, k int, a []float64, lda int, tau, work []float64) { - impl.Dorgqr(m, n, k, a, lda, tau, work, len(work)) -} - -func (d blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) { - impl.Dorglq(m, n, k, a, lda, tau, work, len(work)) -} - func TestDorglq(t *testing.T) { testlapack.DorglqTest(t, blockedTranslate{impl}) } @@ -108,12 +116,27 @@ func TestDorg2r(t *testing.T) { testlapack.Dorg2rTest(t, blockedTranslate{impl}) } +/* +// Test disabled because of bug in c interface. Leaving stub for easy reproducer. +// +// Bug at: https://github.com/xianyi/OpenBLAS/issues/712 +// Fix at: https://github.com/xianyi/OpenBLAS/pull/713 +// Easily copiable fix: https://github.com/gonum/lapack/pull/74#issuecomment-163142140 +func TestDormbr(t *testing.T) { + testlapack.DormbrTest(t, blockedTranslate{impl}) +} +*/ + func TestDormqr(t *testing.T) { testlapack.Dorm2rTest(t, blockedTranslate{impl}) } /* // Test disabled because of bug in c interface. Leaving stub for easy reproducer. +// +// Bug at: https://github.com/xianyi/OpenBLAS/issues/615 +// Fix at: https://github.com/xianyi/OpenBLAS/pull/711 +// Easily copiable fix: https://github.com/gonum/lapack/pull/74#issuecomment-163110751 func TestDormlq(t *testing.T) { testlapack.Dorml2Test(t, blockedTranslate{impl}) } diff --git a/native/dormbr.go b/native/dormbr.go new file mode 100644 index 00000000..157dcdb5 --- /dev/null +++ b/native/dormbr.go @@ -0,0 +1,136 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "github.com/gonum/blas" + "github.com/gonum/lapack" +) + +// Dormbr applies a multiplicative update to the matrix C based on a +// decomposition computed by Dgebrd. +// +// Dormbr computes +// Q * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.NoTrans +// C * Q if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.NoTrans +// Q^T * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.Trans +// C * Q^T if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.Trans +// +// P * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans +// C * P if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans +// P^T * C if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans +// C * P^T if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans +// where P and Q are the orthogonal matrices determined by Dgebrd, A = Q * B * P^T. +// See Dgebrd for the definitions of Q and P. +// +// If vect == lapack.ApplyQ, A is assumed to have been an nq×k matrix, while if +// vect == lapack.ApplyP, A is assumed to have been a k×nq matrix. nq = m if +// side == blas.Left, while nq = n if side == blas.Right. +// +// C is an m×n matrix. On exit it is updated by the multiplication listed above. +// +// Tau must have length min(nq,k), and Dormbr will panic otherwise. Tau contains +// the elementary reflectors to construct Q or P depending on the value of +// vect. +func (impl Implementation) Dormbr(vect lapack.DecompUpdate, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) { + if side != blas.Left && side != blas.Right { + panic(badSide) + } + if trans != blas.NoTrans && trans != blas.Trans { + panic(badTrans) + } + if vect != lapack.ApplyP && vect != lapack.ApplyQ { + panic(badDecompUpdate) + } + nq := n + nw := m + if side == blas.Left { + nq = m + nw = n + } + if vect == lapack.ApplyQ { + checkMatrix(nq, min(nq, k), a, lda) + } else { + checkMatrix(min(nq, k), nq, a, lda) + } + + applyQ := vect == lapack.ApplyQ + left := side == blas.Left + var nb int + + // The current implementation does not use opts, but a future change may + // use these options so construct them. + var opts string + if side == blas.Left { + opts = "L" + } else { + opts = "R" + } + if trans == blas.Trans { + opts += "T" + } else { + opts += "N" + } + if applyQ { + if left { + nb = impl.Ilaenv(1, "DORMQR", opts, m-1, n, m-1, -1) + } else { + nb = impl.Ilaenv(1, "DORMQR", opts, m, n-1, n-1, -1) + } + } else { + if left { + nb = impl.Ilaenv(1, "DORMLQ", opts, m-1, n, m-1, -1) + } else { + nb = impl.Ilaenv(1, "DORMLQ", opts, m, n-1, n-1, -1) + } + } + lworkopt := max(1, nw) * nb + if lwork == -1 { + work[0] = float64(lworkopt) + } + if applyQ { + // Change the operation to get Q depending on the size of the initial + // matrix to Dgebrd. The size matters due to the storage location of + // the off-diagonal elements. + if nq >= k { + impl.Dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork) + } else if nq > 1 { + mi := m + ni := n - 1 + i1 := 0 + i2 := 1 + if left { + mi = m - 1 + ni = n + i1 = 1 + i2 = 0 + } + impl.Dormqr(side, trans, mi, ni, nq-1, a[1*lda:], lda, tau, c[i1*ldc+i2:], ldc, work, lwork) + } + return + } + transt := blas.Trans + if trans == blas.Trans { + transt = blas.NoTrans + } + // Change the operation to get P depending on the size of the initial + // matrix to Dgebrd. The size matters due to the storage location of + // the off-diagonal elements. + if nq > k { + impl.Dormlq(side, transt, m, n, k, a, lda, tau, c, ldc, work, lwork) + } else if nq > 1 { + mi := m + ni := n - 1 + i1 := 0 + i2 := 1 + if left { + mi = m - 1 + ni = n + i1 = 1 + i2 = 0 + } + impl.Dormlq(side, transt, mi, ni, nq-1, a[1:], lda, tau, c[i1*ldc+i2:], ldc, work, lwork) + } +} diff --git a/native/dorml2.go b/native/dorml2.go index 52a1460b..112b4da1 100644 --- a/native/dorml2.go +++ b/native/dorml2.go @@ -15,7 +15,6 @@ import "github.com/gonum/blas" // If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right // a is of size k×n. // -// // Tau contains the householder factors and is of length at least k and this function will // panic otherwise. // diff --git a/native/general.go b/native/general.go index 912f3920..0d5ac5b1 100644 --- a/native/general.go +++ b/native/general.go @@ -19,35 +19,36 @@ var _ lapack.Float64 = Implementation{} // This list is duplicated in lapack/cgo. Keep in sync. const ( - absIncNotOne = "lapack: increment not one or negative one" - badD = "lapack: d has insufficient length" - badDiag = "lapack: bad diag" - badDirect = "lapack: bad direct" - badE = "lapack: e has insufficient length" - badIpiv = "lapack: insufficient permutation length" - badLdA = "lapack: index of a out of range" - badNorm = "lapack: bad norm" - badPivot = "lapack: bad pivot" - badSide = "lapack: bad side" - badSlice = "lapack: bad input slice length" - badStore = "lapack: bad store" - badTau = "lapack: tau has insufficient length" - badTauQ = "lapack: tauQ has insufficient length" - badTauP = "lapack: tauP has insufficient length" - badTrans = "lapack: bad trans" - badUplo = "lapack: illegal triangle" - badWork = "lapack: insufficient working memory" - badWorkStride = "lapack: insufficient working array stride" - badZ = "lapack: insufficient z length" - kGTM = "lapack: k > m" - kGTN = "lapack: k > n" - kLT0 = "lapack: k < 0" - mLTN = "lapack: m < n" - negDimension = "lapack: negative matrix dimension" - negZ = "lapack: negative z value" - nLT0 = "lapack: n < 0" - nLTM = "lapack: n < m" - shortWork = "lapack: working array shorter than declared" + absIncNotOne = "lapack: increment not one or negative one" + badD = "lapack: d has insufficient length" + badDecompUpdate = "lapack: bad decomp update" + badDiag = "lapack: bad diag" + badDirect = "lapack: bad direct" + badE = "lapack: e has insufficient length" + badIpiv = "lapack: insufficient permutation length" + badLdA = "lapack: index of a out of range" + badNorm = "lapack: bad norm" + badPivot = "lapack: bad pivot" + badSide = "lapack: bad side" + badSlice = "lapack: bad input slice length" + badStore = "lapack: bad store" + badTau = "lapack: tau has insufficient length" + badTauQ = "lapack: tauQ has insufficient length" + badTauP = "lapack: tauP has insufficient length" + badTrans = "lapack: bad trans" + badUplo = "lapack: illegal triangle" + badWork = "lapack: insufficient working memory" + badWorkStride = "lapack: insufficient working array stride" + badZ = "lapack: insufficient z length" + kGTM = "lapack: k > m" + kGTN = "lapack: k > n" + kLT0 = "lapack: k < 0" + mLTN = "lapack: m < n" + negDimension = "lapack: negative matrix dimension" + negZ = "lapack: negative z value" + nLT0 = "lapack: n < 0" + nLTM = "lapack: n < m" + shortWork = "lapack: working array shorter than declared" ) // checkMatrix verifies the parameters of a matrix input. diff --git a/native/lapack_test.go b/native/lapack_test.go index 93f486c7..ae69ebff 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -152,6 +152,10 @@ func TestDorgqr(t *testing.T) { testlapack.DorgqrTest(t, impl) } +func TestDormbr(t *testing.T) { + testlapack.DormbrTest(t, impl) +} + func TestDorml2(t *testing.T) { testlapack.Dorml2Test(t, impl) } diff --git a/testlapack/dormbr.go b/testlapack/dormbr.go new file mode 100644 index 00000000..e7268fbe --- /dev/null +++ b/testlapack/dormbr.go @@ -0,0 +1,145 @@ +// Copyright ©2015 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" + "github.com/gonum/floats" + "github.com/gonum/lapack" +) + +type Dormbrer interface { + Dormbr(vect lapack.DecompUpdate, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) + Dgebrder +} + +func DormbrTest(t *testing.T, impl Dormbrer) { + bi := blas64.Implementation() + for _, vect := range []lapack.DecompUpdate{lapack.ApplyQ, lapack.ApplyP} { + for _, side := range []blas.Side{blas.Left, blas.Right} { + for _, trans := range []blas.Transpose{blas.NoTrans, blas.Trans} { + for _, test := range []struct { + m, n, k, lda, ldc int + }{ + {3, 4, 5, 0, 0}, + {3, 5, 4, 0, 0}, + {4, 3, 5, 0, 0}, + {4, 5, 3, 0, 0}, + {5, 3, 4, 0, 0}, + {5, 4, 3, 0, 0}, + + {3, 4, 5, 10, 12}, + {3, 5, 4, 10, 12}, + {4, 3, 5, 10, 12}, + {4, 5, 3, 10, 12}, + {5, 3, 4, 10, 12}, + {5, 4, 3, 10, 12}, + } { + m := test.m + n := test.n + k := test.k + ldc := test.ldc + if ldc == 0 { + ldc = n + } + nq := n + if side == blas.Left { + nq = m + } + + // Compute a decomposition. + var ma, na int + var a []float64 + if vect == lapack.ApplyQ { + ma = nq + na = k + } else { + ma = k + na = nq + } + lda := test.lda + if lda == 0 { + lda = na + } + a = make([]float64, ma*lda) + for i := range a { + a[i] = rand.NormFloat64() + } + nTau := min(nq, k) + tauP := make([]float64, nTau) + tauQ := make([]float64, nTau) + d := make([]float64, nTau) + e := make([]float64, nTau) + lwork := -1 + work := make([]float64, 1) + impl.Dgebrd(ma, na, a, lda, d, e, tauQ, tauP, work, lwork) + work = make([]float64, int(work[0])) + lwork = len(work) + + impl.Dgebrd(ma, na, a, lda, d, e, tauQ, tauP, work, lwork) + + // Apply and compare update. + c := make([]float64, m*ldc) + for i := range c { + c[i] = rand.NormFloat64() + } + + cCopy := make([]float64, len(c)) + copy(cCopy, c) + + if vect == lapack.ApplyQ { + impl.Dormbr(vect, side, trans, m, n, k, a, lda, tauQ, c, ldc, work, lwork) + } else { + impl.Dormbr(vect, side, trans, m, n, k, a, lda, tauP, c, ldc, work, lwork) + } + + // Check that the multiplication was correct. + cOrig := blas64.General{ + Rows: m, + Cols: n, + Stride: ldc, + Data: make([]float64, len(cCopy)), + } + copy(cOrig.Data, cCopy) + cAns := blas64.General{ + Rows: m, + Cols: n, + Stride: ldc, + Data: make([]float64, len(cCopy)), + } + copy(cAns.Data, cCopy) + nb := min(ma, na) + var mulMat blas64.General + if vect == lapack.ApplyQ { + mulMat = constructQPBidiagonal(lapack.ApplyQ, ma, na, nb, a, lda, tauQ) + } else { + mulMat = constructQPBidiagonal(lapack.ApplyP, ma, na, nb, a, lda, tauP) + } + + mulTrans := trans + + if side == blas.Left { + bi.Dgemm(mulTrans, blas.NoTrans, m, n, m, 1, mulMat.Data, mulMat.Stride, cOrig.Data, cOrig.Stride, 0, cAns.Data, cAns.Stride) + } else { + bi.Dgemm(blas.NoTrans, mulTrans, m, n, n, 1, cOrig.Data, cOrig.Stride, mulMat.Data, mulMat.Stride, 0, cAns.Data, cAns.Stride) + } + + if !floats.EqualApprox(cAns.Data, c, 1e-8) { + isApplyQ := vect == lapack.ApplyQ + isLeft := side == blas.Left + isTrans := trans == blas.Trans + + t.Errorf("C mismatch. isApplyQ: %v, isLeft: %v, isTrans: %v, m = %v, n = %v, k = %v, lda = %v, ldc = %v", + isApplyQ, isLeft, isTrans, m, n, k, lda, ldc) + } + } + } + } + } +} diff --git a/testlapack/dorml2.go b/testlapack/dorml2.go index 5e87becf..4cdbf8ab 100644 --- a/testlapack/dorml2.go +++ b/testlapack/dorml2.go @@ -30,6 +30,7 @@ func Dorml2Test(t *testing.T, impl Dorml2er) { {4, 5, 3, 0, 0}, {5, 3, 4, 0, 0}, {5, 4, 3, 0, 0}, + {3, 4, 5, 6, 20}, {3, 5, 4, 6, 20}, {4, 3, 5, 6, 20}, @@ -129,7 +130,9 @@ func Dorml2Test(t *testing.T, impl Dorml2er) { t.Errorf("tau changed in call") } if !floats.EqualApprox(cMat.Data, c, 1e-14) { - t.Errorf("Multiplication mismatch.\n Want %v \n got %v.", cMat.Data, c) + isLeft := side == blas.Left + isTrans := trans == blas.Trans + t.Errorf("Multiplication mismatch. IsLeft = %v. IsTrans = %v", isLeft, isTrans) } } } diff --git a/testlapack/general.go b/testlapack/general.go index a274247e..e03ab9c8 100644 --- a/testlapack/general.go +++ b/testlapack/general.go @@ -320,141 +320,9 @@ func constructQK(kind string, m, n, k int, a []float64, lda int, tau []float64) // the result is bidiagonal. func checkBidiagonal(t *testing.T, m, n, nb int, a []float64, lda int, d, e, tauP, tauQ, aCopy []float64) { // Check the answer. - // Construct V.and U - ldv := nb - v := blas64.General{ - Rows: m, - Cols: nb, - Stride: ldv, - Data: make([]float64, m*ldv), - } - if m >= n { - for i := 0; i < m; i++ { - for j := 0; j <= min(nb-1, i); j++ { - if i == j { - v.Data[i*ldv+j] = 1 - continue - } - v.Data[i*ldv+j] = a[i*lda+j] - } - } - } else { - for i := 1; i < m; i++ { - for j := 0; j <= min(nb-1, i-1); j++ { - if i-1 == j { - v.Data[i*ldv+j] = 1 - continue - } - v.Data[i*ldv+j] = a[i*lda+j] - } - } - } - - ldu := n - u := blas64.General{ - Rows: nb, - Cols: n, - Stride: ldu, - Data: make([]float64, nb*ldu), - } - if m < n { - for i := 0; i < nb; i++ { - for j := i; j < n; j++ { - if i == j { - u.Data[i*ldu+j] = 1 - continue - } - u.Data[i*ldu+j] = a[i*lda+j] - } - } - } else { - for i := 0; i < nb; i++ { - for j := i + 1; j < n; j++ { - if j-1 == i { - u.Data[i*ldu+j] = 1 - continue - } - u.Data[i*ldu+j] = a[i*lda+j] - } - } - } - - // Check the reconstruction Q^T * A * P - qMat := blas64.General{ - Rows: m, - Cols: m, - Stride: m, - Data: make([]float64, m*m), - } - hMat := blas64.General{ - Rows: m, - Cols: m, - Stride: m, - Data: make([]float64, m*m), - } - pMat := blas64.General{ - Rows: n, - Cols: n, - Stride: n, - Data: make([]float64, n*n), - } - gMat := blas64.General{ - Rows: n, - Cols: n, - Stride: n, - Data: make([]float64, n*n), - } - // set Q and P to I - for i := 0; i < m; i++ { - qMat.Data[i*qMat.Stride+i] = 1 - } - for i := 0; i < n; i++ { - pMat.Data[i*pMat.Stride+i] = 1 - } - - for i := 0; i < nb; i++ { - qCopy := blas64.General{Rows: qMat.Rows, Cols: qMat.Cols, Stride: qMat.Stride, Data: make([]float64, len(qMat.Data))} - copy(qCopy.Data, qMat.Data) - pCopy := blas64.General{Rows: pMat.Rows, Cols: pMat.Cols, Stride: pMat.Stride, Data: make([]float64, len(pMat.Data))} - copy(pCopy.Data, pMat.Data) - - // Set g and h to I - for i := 0; i < m; i++ { - for j := 0; j < m; j++ { - if i == j { - hMat.Data[i*m+j] = 1 - } else { - hMat.Data[i*m+j] = 0 - } - } - } - for i := 0; i < n; i++ { - for j := 0; j < n; j++ { - if i == j { - gMat.Data[i*n+j] = 1 - } else { - gMat.Data[i*n+j] = 0 - } - } - } - // H -= tauQ[i] * v[i] * v[i]^t - vi := blas64.Vector{ - Inc: v.Stride, - Data: v.Data[i:], - } - blas64.Ger(-tauQ[i], vi, vi, hMat) - // G -= tauP[i] * u[i] * u[i]^T - ui := blas64.Vector{ - Inc: 1, - Data: u.Data[i*u.Stride:], - } - blas64.Ger(-tauP[i], ui, ui, gMat) - - // Q = Q * G[1] - blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, hMat, 0, qMat) - // P = P * G[i] - blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, pCopy, gMat, 0, pMat) - } + // Construct V and U. + qMat := constructQPBidiagonal(lapack.ApplyQ, m, n, nb, a, lda, tauQ) + pMat := constructQPBidiagonal(lapack.ApplyP, m, n, nb, a, lda, tauP) // Compute Q^T * A * P aMat := blas64.General{ @@ -526,6 +394,132 @@ func checkBidiagonal(t *testing.T, m, n, nb int, a []float64, lda int, d, e, tau } } +// constructQPBidiagonal constructs Q or P from the Bidiagonal decomposition +// computed by dlabrd and bgebd2. +func constructQPBidiagonal(vect lapack.DecompUpdate, m, n, nb int, a []float64, lda int, tau []float64) blas64.General { + sz := n + if vect == lapack.ApplyQ { + sz = m + } + + var ldv int + var v blas64.General + if vect == lapack.ApplyQ { + ldv = nb + v = blas64.General{ + Rows: m, + Cols: nb, + Stride: ldv, + Data: make([]float64, m*ldv), + } + } else { + ldv = n + v = blas64.General{ + Rows: nb, + Cols: n, + Stride: ldv, + Data: make([]float64, m*ldv), + } + } + + if vect == lapack.ApplyQ { + if m >= n { + for i := 0; i < m; i++ { + for j := 0; j <= min(nb-1, i); j++ { + if i == j { + v.Data[i*ldv+j] = 1 + continue + } + v.Data[i*ldv+j] = a[i*lda+j] + } + } + } else { + for i := 1; i < m; i++ { + for j := 0; j <= min(nb-1, i-1); j++ { + if i-1 == j { + v.Data[i*ldv+j] = 1 + continue + } + v.Data[i*ldv+j] = a[i*lda+j] + } + } + } + } else { + if m < n { + for i := 0; i < nb; i++ { + for j := i; j < n; j++ { + if i == j { + v.Data[i*ldv+j] = 1 + continue + } + v.Data[i*ldv+j] = a[i*lda+j] + } + } + } else { + for i := 0; i < nb; i++ { + for j := i + 1; j < n; j++ { + if j-1 == i { + v.Data[i*ldv+j] = 1 + continue + } + v.Data[i*ldv+j] = a[i*lda+j] + } + } + } + } + + // The variable name is a computation of Q, but the algorithm is mostly the + // same for computing P (just with different data). + qMat := blas64.General{ + Rows: sz, + Cols: sz, + Stride: sz, + Data: make([]float64, sz*sz), + } + hMat := blas64.General{ + Rows: sz, + Cols: sz, + Stride: sz, + Data: make([]float64, sz*sz), + } + // set Q to I + for i := 0; i < sz; i++ { + qMat.Data[i*qMat.Stride+i] = 1 + } + for i := 0; i < nb; i++ { + qCopy := blas64.General{Rows: qMat.Rows, Cols: qMat.Cols, Stride: qMat.Stride, Data: make([]float64, len(qMat.Data))} + copy(qCopy.Data, qMat.Data) + + // Set g and h to I + for i := 0; i < sz; i++ { + for j := 0; j < sz; j++ { + if i == j { + hMat.Data[i*sz+j] = 1 + } else { + hMat.Data[i*sz+j] = 0 + } + } + } + var vi blas64.Vector + // H -= tauQ[i] * v[i] * v[i]^t + if vect == lapack.ApplyQ { + vi = blas64.Vector{ + Inc: v.Stride, + Data: v.Data[i:], + } + } else { + vi = blas64.Vector{ + Inc: 1, + Data: v.Data[i*v.Stride:], + } + } + blas64.Ger(-tau[i], vi, vi, hMat) + // Q = Q * G[1] + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, hMat, 0, qMat) + } + return qMat +} + // printRowise prints the matrix with one row per line. This is useful for debugging. // If beyond is true, it prints beyond the final column to lda. If false, only // the columns are printed.