diff --git a/native/dlaqps.go b/native/dlaqps.go new file mode 100644 index 00000000..f96d40a5 --- /dev/null +++ b/native/dlaqps.go @@ -0,0 +1,217 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "math" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" +) + +// Dlaqps computes a step of QR factorization with column pivoting +// of an m×n matrix A by using Blas-3. It tries to factorize nb +// columns from A starting from the row offset, and updates all +// of the matrix with Dgemm. +// +// In some cases, due to catastrophic cancellations, it cannot +// factorize nb columns. Hence, the actual number of factorized +// columns is returned in kb. +// +// Dlaqps computes a QR factorization with column pivoting of the +// block A[offset:m, 0:nb] of the m×n matrix A. The block +// A[0:offset, 0:n] is accordingly pivoted, but not factorized. +// +// On exit, the upper triangle of block A[offset:m, 0:kb] is the +// triangular factor obtained. The elements in block A[offset:m, 0:n] +// below the diagonal, together with tau, represent the orthogonal +// matrix Q as a product of elementary reflectors. +// +// offset is number of rows of the matrix A that must be pivoted but +// not factorized. offset must not be negative otherwise Dlaqps will panic. +// +// On exit, jpvt holds the permutation that was applied; the jth column +// of A*P was the jpvt[j] column of A. jpvt must have length n, +// otherwise Dlapqs will panic. +// +// On exit tau holds the scalar factors of the elementary reflectors. +// It must have length nb, otherwise Dlapqs will panic. +// +// vn1 and vn2 hold the partial and complete column norms respectively. +// They must have length n, otherwise Dlapqs will panic. +// +// auxv must have length nb, otherwise Dlaqps will panic. +// +// f and ldf represent an n×nb matrix F that is overwritten during the +// call. +// +// Dlaqps is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) { + checkMatrix(m, n, a, lda) + checkMatrix(n, nb, f, ldf) + if offset > m { + panic(offsetGTM) + } + if n < 0 || nb > n { + panic(badNb) + } + if len(jpvt) != n { + panic(badIpiv) + } + if len(tau) < nb { + panic(badTau) + } + if len(vn1) < n { + panic(badVn1) + } + if len(vn2) < n { + panic(badVn2) + } + if len(auxv) < nb { + panic(badAuxv) + } + + lastrk := min(m, n+offset) + lsticc := -1 + tol3z := math.Sqrt(dlamchE) + + bi := blas64.Implementation() + + var k, rk int + for ; k < nb && lsticc == -1; k++ { + rk = offset + k + + // Determine kth pivot column and swap if necessary. + p := k + bi.Idamax(n-k, vn1[k:], 1) + if p != k { + bi.Dswap(m, a[p:], lda, a[k:], lda) + bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1) + jpvt[p], jpvt[k] = jpvt[k], jpvt[p] + vn1[p] = vn1[k] + vn2[p] = vn2[k] + } + + // Apply previous Householder reflectors to column K: + // + // A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]^T. + if k > 0 { + bi.Dgemv(blas.NoTrans, m-rk, k, -1, + a[rk*lda:], lda, + f[k*ldf:], 1, + 1, + a[rk*lda+k:], lda) + } + + // Generate elementary reflector H_k. + if rk < m-1 { + a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda) + } else { + tau[k] = 0 + } + + akk := a[rk*lda+k] + a[rk*lda+k] = 1 + + // Compute kth column of F: + // + // Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]^T*A[rk:m, k]. + if k < n-1 { + bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k], + a[rk*lda+k+1:], lda, + a[rk*lda+k:], lda, + 0, + f[(k+1)*ldf+k:], ldf) + } + + // Padding F[0:k, k] with zeros. + for j := 0; j < k; j++ { + f[j*ldf+k] = 0 + } + + // Incremental updating of F: + // + // F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]^T*A[rk:m,k]. + if k > 0 { + bi.Dgemv(blas.Trans, m-rk, k, -tau[k], + a[rk*lda:], lda, + a[rk*lda+k:], lda, + 0, + auxv, 1) + bi.Dgemv(blas.NoTrans, n, k, 1, + f, ldf, + auxv, 1, + 1, + f[k:], ldf) + } + + // Update the current row of A: + // + // A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]^T. + if k < n-1 { + bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1, + f[(k+1)*ldf:], ldf, + a[rk*lda:], 1, + 1, + a[rk*lda+k+1:], 1) + } + + // Update partial column norms. + if rk < lastrk-1 { + for j := k + 1; j < n; j++ { + if vn1[j] == 0 { + continue + } + + // The following marked lines follow from the + // analysis in Lapack Working Note 176. + r := math.Abs(a[rk*lda+j]) / vn1[j] // * + temp := math.Max(0, 1-r*r) // * + r = vn1[j] / vn2[j] // * + temp2 := temp * r * r // * + if temp2 < tol3z { + // vn2 is used here as a collection of + // indices into vn2 and also a collection + // of column norms. + vn2[j] = float64(lsticc) + lsticc = j + } else { + vn1[j] *= math.Sqrt(temp) // * + } + } + } + + a[rk*lda+k] = akk + } + kb = k + rk = offset + kb + + // Apply the block reflector to the rest of the matrix: + // + // A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]^T. + if kb < min(n, m-offset) { + bi.Dgemm(blas.NoTrans, blas.Trans, + m-rk, n-kb, kb, -1, + a[rk*lda:], lda, + f[kb*ldf:], ldf, + 1, + a[rk*lda+kb:], lda) + } + + // Recomputation of difficult columns. + for lsticc >= 0 { + itemp := int(vn2[lsticc]) + + // NOTE: The computation of vn1[lsticc] relies on the fact that + // Dnrm2 does not fail on vectors with norm below the value of + // sqrt(dlamchS) + v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda) + vn1[lsticc] = v + vn2[lsticc] = v + + lsticc = itemp + } + + return kb +} diff --git a/native/lapack_test.go b/native/lapack_test.go index 64997fb9..244f75cb 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -196,6 +196,10 @@ func TestDlaqp2(t *testing.T) { testlapack.Dlaqp2Test(t, impl) } +func TestDlaqps(t *testing.T) { + testlapack.DlaqpsTest(t, impl) +} + func TestDlaqr1(t *testing.T) { testlapack.Dlaqr1Test(t, impl) } diff --git a/testlapack/dlaqps.go b/testlapack/dlaqps.go new file mode 100644 index 00000000..ed89a96d --- /dev/null +++ b/testlapack/dlaqps.go @@ -0,0 +1,112 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "fmt" + "math" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" +) + +type Dlaqpser interface { + Dlapmter + Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) +} + +func DlaqpsTest(t *testing.T, impl Dlaqpser) { + for ti, test := range []struct { + m, n, nb, offset int + }{ + {m: 4, n: 3, nb: 2, offset: 0}, + {m: 4, n: 3, nb: 1, offset: 2}, + {m: 3, n: 4, nb: 2, offset: 0}, + {m: 3, n: 4, nb: 1, offset: 2}, + {m: 8, n: 3, nb: 2, offset: 0}, + {m: 8, n: 3, nb: 1, offset: 4}, + {m: 3, n: 8, nb: 2, offset: 0}, + {m: 3, n: 8, nb: 1, offset: 1}, + {m: 10, n: 10, nb: 3, offset: 0}, + {m: 10, n: 10, nb: 2, offset: 5}, + } { + m := test.m + n := test.n + jpiv := make([]int, n) + + for _, extra := range []int{0, 11} { + a := zeros(m, n, n+extra) + c := 1 + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + a.Data[i*a.Stride+j] = float64(c) + c++ + } + } + aCopy := cloneGeneral(a) + for j := range jpiv { + jpiv[j] = j + } + + tau := make([]float64, n) + vn1 := columnNorms(m, n, a.Data, a.Stride) + vn2 := columnNorms(m, n, a.Data, a.Stride) + auxv := make([]float64, test.nb) + f := zeros(test.n, test.nb, n) + + kb := impl.Dlaqps(m, n, test.offset, test.nb, a.Data, a.Stride, jpiv, tau, vn1, vn2, auxv, f.Data, f.Stride) + + prefix := fmt.Sprintf("Case %v (offset=%t,m=%v,n=%v,extra=%v)", ti, test.offset, m, n, extra) + if !generalOutsideAllNaN(a) { + t.Errorf("%v: out-of-range write to A", prefix) + } + + if test.offset == m { + continue + } + + mo := m - test.offset + q := constructQ("QR", mo, kb, a.Data[test.offset*a.Stride:], a.Stride, tau) + // Check that q is orthonormal + for i := 0; i < mo; i++ { + nrm := blas64.Nrm2(mo, blas64.Vector{Inc: 1, Data: q.Data[i*mo:]}) + if math.Abs(nrm-1) > 1e-13 { + t.Errorf("Case %v, q not normal", ti) + } + for j := 0; j < i; j++ { + dot := blas64.Dot(mo, blas64.Vector{Inc: 1, Data: q.Data[i*mo:]}, blas64.Vector{Inc: 1, Data: q.Data[j*mo:]}) + if math.Abs(dot) > 1e-14 { + t.Errorf("Case %v, q not orthogonal", ti) + } + } + } + + // Check that A * P = Q * R + r := blas64.General{ + Rows: mo, + Cols: kb, + Stride: kb, + Data: make([]float64, mo*kb), + } + for i := 0; i < mo; i++ { + for j := i; j < kb; j++ { + r.Data[i*kb+j] = a.Data[(test.offset+i)*a.Stride+j] + } + } + got := nanGeneral(mo, kb, kb) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, r, 0, got) + + want := aCopy + impl.Dlapmt(true, want.Rows, want.Cols, want.Data, want.Stride, jpiv) + want.Rows = mo + want.Cols = kb + want.Data = want.Data[test.offset*want.Stride:] + if !equalApproxGeneral(got, want, 1e-12) { + t.Errorf("Case %v, Q*R != A*P\nQ*R=%v\nA*P=%v", ti, got, want) + } + } + } +}