diff --git a/cgo/lapack.go b/cgo/lapack.go index 694d3d77..55970127 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -16,6 +16,7 @@ import ( // Copied from lapack/native. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badAuxv = "lapack: auxv has insufficient length" badD = "lapack: d has insufficient length" badDecompUpdate = "lapack: bad decomp update" badDiag = "lapack: bad diag" @@ -34,6 +35,7 @@ const ( badK2 = "lapack: k2 out of range" badKperm = "lapack: incorrect permutation length" badLdA = "lapack: index of a out of range" + badNb = "lapack: nb out of range" badNorm = "lapack: bad norm" badPivot = "lapack: bad pivot" badS = "lapack: s has insufficient length" @@ -46,6 +48,8 @@ const ( badTauQ = "lapack: tauQ has insufficient length" badTauP = "lapack: tauP has insufficient length" badTrans = "lapack: bad trans" + badVn1 = "lapack: vn1 has insufficient length" + badVn2 = "lapack: vn2 has insufficient length" badUplo = "lapack: illegal triangle" badWork = "lapack: insufficient working memory" badWorkStride = "lapack: insufficient working array stride" @@ -60,6 +64,7 @@ const ( negZ = "lapack: negative z value" nLT0 = "lapack: n < 0" nLTM = "lapack: n < m" + offsetGTM = "lapack: offset > m" shortWork = "lapack: working array shorter than declared" zeroDiv = "lapack: zero divisor" ) diff --git a/native/dlaqp2.go b/native/dlaqp2.go new file mode 100644 index 00000000..f9ae6454 --- /dev/null +++ b/native/dlaqp2.go @@ -0,0 +1,111 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "math" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" +) + +// Dlaqp2 computes a QR factorization with column pivoting of the block A[offset:m, 0:n] +// of the m×n matrix A. The block A[0:offset, 0:n] is accordingly pivoted, but not factorized. +// +// On exit, the upper triangle of block A[offset:m, 0:n] is the triangular factor obtained. +// The elements in block A[offset:m, 0:n] below the diagonal, together with tau, represent +// the orthogonal matrix Q as a product of elementary reflectors. +// +// offset is number of rows of the matrix A that must be pivoted but not factorized. +// offset must not be negative otherwise Dlaqp2 will panic. +// +// On exit, jpvt holds the permutation that was applied; the jth column of A*P was the +// jpvt[j] column of A. jpvt must have length n, otherwise Dlaqp2 will panic. +// +// On exit tau holds the scalar factors of the elementary reflectors. It must have length +// at least min(m-offset, n) otherwise Dlaqp2 will panic. +// +// vn1 and vn2 hold the partial and complete column norms respectively. They must have length n, +// otherwise Dlaqp2 will panic. +// +// work must have length n, otherwise Dlaqp2 will panic. +// +// Dlaqp2 is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqp2(m, n, offset int, a []float64, lda int, jpvt []int, tau, vn1, vn2, work []float64) { + checkMatrix(m, n, a, lda) + if len(jpvt) != n { + panic(badIpiv) + } + mn := min(m-offset, n) + if len(tau) < mn { + panic(badTau) + } + if len(vn1) < n { + panic(badVn1) + } + if len(vn2) < n { + panic(badVn2) + } + if len(work) < n { + panic(badWork) + } + + tol3z := math.Sqrt(dlamchE) + + bi := blas64.Implementation() + + // Compute factorization. + for i := 0; i < mn; i++ { + offpi := offset + i + + // Determine ith pivot column and swap if necessary. + p := i + bi.Idamax(n-i, vn1[i:], 1) + if p != i { + bi.Dswap(m, a[p:], lda, a[i:], lda) + jpvt[p], jpvt[i] = jpvt[i], jpvt[p] + vn1[p] = vn1[i] + vn2[p] = vn2[i] + } + + // Generate elementary reflector H_i. + if offpi < m-1 { + a[offpi*lda+i], tau[i] = impl.Dlarfg(m-offpi, a[offpi*lda+i], a[(offpi+1)*lda+i:], lda) + } else { + tau[i] = 0 + } + + if i < n-1 { + // Apply H_i^T to A[offset+i:m, i:n] from the left. + aii := a[offpi*lda+i] + a[offpi*lda+i] = 1 + impl.Dlarf(blas.Left, m-offpi, n-i-1, a[offpi*lda+i:], lda, tau[i], a[offpi*lda+i+1:], lda, work) + a[offpi*lda+i] = aii + } + + // Update partial column norms. + for j := i + 1; j < n; j++ { + if vn1[j] == 0 { + continue + } + + // The following marked lines follow from the + // analysis in Lapack Working Note 176. + r := math.Abs(a[offpi*lda+j]) / vn1[j] // * + temp := math.Max(0, 1-r*r) // * + r = vn1[j] / vn2[j] // * + temp2 := temp * r * r // * + if temp2 < tol3z { + var v float64 + if offpi < m-1 { + v = bi.Dnrm2(m-offpi-1, a[(offpi+1)*lda+j:], lda) + } + vn1[j] = v + vn2[j] = v + } else { + vn1[j] *= math.Sqrt(temp) // * + } + } + } +} diff --git a/native/general.go b/native/general.go index cd43c42d..4a134331 100644 --- a/native/general.go +++ b/native/general.go @@ -20,6 +20,7 @@ var _ lapack.Float64 = Implementation{} // This list is duplicated in lapack/cgo. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badAuxv = "lapack: auxv has insufficient length" badD = "lapack: d has insufficient length" badDecompUpdate = "lapack: bad decomp update" badDiag = "lapack: bad diag" @@ -38,6 +39,7 @@ const ( badK2 = "lapack: k2 out of range" badKperm = "lapack: incorrect permutation length" badLdA = "lapack: index of a out of range" + badNb = "lapack: nb out of range" badNorm = "lapack: bad norm" badPivot = "lapack: bad pivot" badS = "lapack: s has insufficient length" @@ -50,6 +52,8 @@ const ( badTauQ = "lapack: tauQ has insufficient length" badTauP = "lapack: tauP has insufficient length" badTrans = "lapack: bad trans" + badVn1 = "lapack: vn1 has insufficient length" + badVn2 = "lapack: vn2 has insufficient length" badUplo = "lapack: illegal triangle" badWork = "lapack: insufficient working memory" badWorkStride = "lapack: insufficient working array stride" @@ -64,6 +68,7 @@ const ( negZ = "lapack: negative z value" nLT0 = "lapack: n < 0" nLTM = "lapack: n < m" + offsetGTM = "lapack: offset > m" shortWork = "lapack: working array shorter than declared" zeroDiv = "lapack: zero divisor" ) diff --git a/native/lapack_test.go b/native/lapack_test.go index fe579a67..64997fb9 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -192,6 +192,10 @@ func TestDlaqr04(t *testing.T) { testlapack.Dlaqr04Test(t, impl) } +func TestDlaqp2(t *testing.T) { + testlapack.Dlaqp2Test(t, impl) +} + func TestDlaqr1(t *testing.T) { testlapack.Dlaqr1Test(t, impl) } diff --git a/testlapack/dlaqp2.go b/testlapack/dlaqp2.go new file mode 100644 index 00000000..a15b9d89 --- /dev/null +++ b/testlapack/dlaqp2.go @@ -0,0 +1,115 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "fmt" + "math" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" +) + +type Dlaqp2er interface { + Dlapmter + Dlaqp2(m, n, offset int, a []float64, lda int, jpvt []int, tau, vn1, vn2, work []float64) +} + +func Dlaqp2Test(t *testing.T, impl Dlaqp2er) { + for ti, test := range []struct { + m, n, offset int + }{ + {m: 4, n: 3, offset: 0}, + {m: 4, n: 3, offset: 2}, + {m: 4, n: 3, offset: 4}, + {m: 3, n: 4, offset: 0}, + {m: 3, n: 4, offset: 1}, + {m: 3, n: 4, offset: 2}, + {m: 8, n: 3, offset: 0}, + {m: 8, n: 3, offset: 4}, + {m: 8, n: 3, offset: 8}, + {m: 3, n: 8, offset: 0}, + {m: 3, n: 8, offset: 1}, + {m: 3, n: 8, offset: 2}, + {m: 10, n: 10, offset: 0}, + {m: 10, n: 10, offset: 5}, + {m: 10, n: 10, offset: 10}, + } { + m := test.m + n := test.n + jpiv := make([]int, n) + + for _, extra := range []int{0, 11} { + a := zeros(m, n, n+extra) + c := 1 + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + a.Data[i*a.Stride+j] = float64(c) + c++ + } + } + aCopy := cloneGeneral(a) + for j := range jpiv { + jpiv[j] = j + } + + tau := make([]float64, n) + vn1 := columnNorms(m, n, a.Data, a.Stride) + vn2 := columnNorms(m, n, a.Data, a.Stride) + work := make([]float64, n) + + impl.Dlaqp2(m, n, test.offset, a.Data, a.Stride, jpiv, tau, vn1, vn2, work) + + prefix := fmt.Sprintf("Case %v (offset=%t,m=%v,n=%v,extra=%v)", ti, test.offset, m, n, extra) + if !generalOutsideAllNaN(a) { + t.Errorf("%v: out-of-range write to A", prefix) + } + + if test.offset == m { + continue + } + + mo := m - test.offset + q := constructQ("QR", mo, n, a.Data[test.offset*a.Stride:], a.Stride, tau) + // Check that q is orthonormal + for i := 0; i < mo; i++ { + nrm := blas64.Nrm2(mo, blas64.Vector{Inc: 1, Data: q.Data[i*mo:]}) + if math.Abs(nrm-1) > 1e-13 { + t.Errorf("Case %v, q not normal", ti) + } + for j := 0; j < i; j++ { + dot := blas64.Dot(mo, blas64.Vector{Inc: 1, Data: q.Data[i*mo:]}, blas64.Vector{Inc: 1, Data: q.Data[j*mo:]}) + if math.Abs(dot) > 1e-14 { + t.Errorf("Case %v, q not orthogonal", ti) + } + } + } + + // Check that A * P = Q * R + r := blas64.General{ + Rows: mo, + Cols: n, + Stride: n, + Data: make([]float64, mo*n), + } + for i := 0; i < mo; i++ { + for j := i; j < n; j++ { + r.Data[i*n+j] = a.Data[(test.offset+i)*a.Stride+j] + } + } + got := nanGeneral(mo, n, n) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, r, 0, got) + + want := aCopy + impl.Dlapmt(true, want.Rows, want.Cols, want.Data, want.Stride, jpiv) + want.Rows = mo + want.Data = want.Data[test.offset*want.Stride:] + if !equalApproxGeneral(got, want, 1e-12) { + t.Errorf("Case %v, Q*R != A*P\nQ*R=%v\nA*P=%v", ti, got, want) + } + } + } +} diff --git a/testlapack/general.go b/testlapack/general.go index cc5b970f..ec8a38cb 100644 --- a/testlapack/general.go +++ b/testlapack/general.go @@ -262,6 +262,16 @@ func transposeGeneral(a blas64.General) blas64.General { return ans } +// columnNorms returns the column norms of a. +func columnNorms(m, n int, a []float64, lda int) []float64 { + bi := blas64.Implementation() + norms := make([]float64, n) + for j := 0; j < n; j++ { + norms[j] = bi.Dnrm2(m, a[j:], lda) + } + return norms +} + // extractVMat collects the single reflectors from a into a matrix. func extractVMat(m, n int, a []float64, lda int, direct lapack.Direct, store lapack.StoreV) blas64.General { k := min(m, n)