mirror of
https://github.com/gonum/gonum.git
synced 2025-10-19 05:24:52 +08:00
cgo,native: implement dgeqp3 and test
This commit is contained in:
@@ -116,6 +116,70 @@ type Implementation struct{}
|
||||
|
||||
var _ lapack.Float64 = Implementation{}
|
||||
|
||||
// Dgeqp3 computes a QR factorization with column pivoting of the
|
||||
// m×n matrix A: A*P = Q*R using Level 3 BLAS.
|
||||
//
|
||||
// The matrix Q is represented as a product of elementary reflectors
|
||||
// Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
|
||||
// Each H_i has the form
|
||||
// H_i = I - tau * v * v^T
|
||||
// where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
|
||||
// v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
|
||||
//
|
||||
// jpvt specifies a column pivot to be applied to A. If
|
||||
// jpvt[j] is at least zero, the jth column of A is permuted
|
||||
// to the front of A*P (a leading column), if jpvt[j] is -1
|
||||
// the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
|
||||
// will panic. On return, jpvt holds the permutation that was
|
||||
// applied; the jth column of A*P was the jpvt[j] column of A.
|
||||
// jpvt must have length n or Dgeqp3 will panic.
|
||||
//
|
||||
// tau holds the scalar factors of the elementary reflectors.
|
||||
// It must have length min(m, n), otherwise Dgeqp3 will panic.
|
||||
//
|
||||
// work must have length at least max(1,lwork), and lwork must be at least
|
||||
// 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
|
||||
// be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
|
||||
// work[0] will contain the optimal value of lwork.
|
||||
//
|
||||
// If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
|
||||
// will be stored in work[0].
|
||||
//
|
||||
// Dgeqp3 is an internal routine. It is exported for testing purposes.
|
||||
func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
|
||||
checkMatrix(m, n, a, lda)
|
||||
if len(jpvt) != n {
|
||||
panic(badIpiv)
|
||||
}
|
||||
if len(tau) != min(m, n) {
|
||||
panic(badTau)
|
||||
}
|
||||
if len(work) < max(1, lwork) {
|
||||
panic(badWork)
|
||||
}
|
||||
|
||||
// Don't update jpvt if querying lwkopt.
|
||||
if lwork == -1 {
|
||||
lapacke.Dgeqp3(m, n, a, lda, nil, nil, work, -1)
|
||||
return
|
||||
}
|
||||
|
||||
jpvt32 := make([]int32, len(jpvt))
|
||||
for i, v := range jpvt {
|
||||
v++
|
||||
if v != int(int32(v)) || v < 0 || n < v {
|
||||
panic("lapack: jpvt element out of range")
|
||||
}
|
||||
jpvt32[i] = int32(v)
|
||||
}
|
||||
|
||||
lapacke.Dgeqp3(m, n, a, lda, jpvt32, tau, work, lwork)
|
||||
|
||||
for i, v := range jpvt32 {
|
||||
jpvt[i] = int(v - 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with
|
||||
// matrix-vector products provided externally.
|
||||
//
|
||||
|
@@ -44,6 +44,10 @@ func (bl blockedTranslate) Dorgl2(m, n, k int, a []float64, lda int, tau, work [
|
||||
impl.Dorglq(m, n, k, a, lda, tau, work, len(work))
|
||||
}
|
||||
|
||||
func TestDgeqp3(t *testing.T) {
|
||||
testlapack.Dgeqp3Test(t, impl)
|
||||
}
|
||||
|
||||
func TestDlacn2(t *testing.T) {
|
||||
testlapack.Dlacn2Test(t, impl)
|
||||
}
|
||||
|
173
native/dgeqp3.go
Normal file
173
native/dgeqp3.go
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright ©2017 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package native
|
||||
|
||||
import (
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
)
|
||||
|
||||
// Dgeqp3 computes a QR factorization with column pivoting of the
|
||||
// m×n matrix A: A*P = Q*R using Level 3 BLAS.
|
||||
//
|
||||
// The matrix Q is represented as a product of elementary reflectors
|
||||
// Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
|
||||
// Each H_i has the form
|
||||
// H_i = I - tau * v * v^T
|
||||
// where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
|
||||
// v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
|
||||
//
|
||||
// jpvt specifies a column pivot to be applied to A. If
|
||||
// jpvt[j] is at least zero, the jth column of A is permuted
|
||||
// to the front of A*P (a leading column), if jpvt[j] is -1
|
||||
// the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
|
||||
// will panic. On return, jpvt holds the permutation that was
|
||||
// applied; the jth column of A*P was the jpvt[j] column of A.
|
||||
// jpvt must have length n or Dgeqp3 will panic.
|
||||
//
|
||||
// tau holds the scalar factors of the elementary reflectors.
|
||||
// It must have length min(m, n), otherwise Dgeqp3 will panic.
|
||||
//
|
||||
// work must have length at least max(1,lwork), and lwork must be at least
|
||||
// 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
|
||||
// be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
|
||||
// work[0] will contain the optimal value of lwork.
|
||||
//
|
||||
// If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
|
||||
// will be stored in work[0].
|
||||
//
|
||||
// Dgeqp3 is an internal routine. It is exported for testing purposes.
|
||||
func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
|
||||
const (
|
||||
inb = 1
|
||||
inbmin = 2
|
||||
ixover = 3
|
||||
)
|
||||
checkMatrix(m, n, a, lda)
|
||||
|
||||
if len(jpvt) != n {
|
||||
panic(badIpiv)
|
||||
}
|
||||
for _, v := range jpvt {
|
||||
if v < -1 || n <= v {
|
||||
panic("lapack: jpvt element out of range")
|
||||
}
|
||||
}
|
||||
minmn := min(m, n)
|
||||
if len(tau) != minmn {
|
||||
panic(badTau)
|
||||
}
|
||||
if len(work) < max(1, lwork) {
|
||||
panic(badWork)
|
||||
}
|
||||
|
||||
var iws, lwkopt, nb int
|
||||
if minmn == 0 {
|
||||
iws = 1
|
||||
lwkopt = 1
|
||||
} else {
|
||||
iws = 3*n + 1
|
||||
nb = impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
|
||||
lwkopt = 2*n + (n+1)*nb
|
||||
}
|
||||
work[0] = float64(lwkopt)
|
||||
|
||||
if lwork == -1 {
|
||||
return
|
||||
}
|
||||
|
||||
bi := blas64.Implementation()
|
||||
|
||||
// Move initial columns up front.
|
||||
var nfxd int
|
||||
for j := 0; j < n; j++ {
|
||||
if jpvt[j] == -1 {
|
||||
jpvt[j] = j
|
||||
continue
|
||||
}
|
||||
if j != nfxd {
|
||||
bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
|
||||
jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
|
||||
} else {
|
||||
jpvt[j] = j
|
||||
}
|
||||
nfxd++
|
||||
}
|
||||
|
||||
// Factorize nfxd columns.
|
||||
//
|
||||
// Compute the QR factorization of nfxd columns and update remaining columns.
|
||||
if nfxd > 0 {
|
||||
na := min(m, nfxd)
|
||||
impl.Dgeqrf(m, na, a, lda, tau, work, lwork)
|
||||
iws = max(iws, int(work[0]))
|
||||
if na < n {
|
||||
impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
|
||||
work, lwork)
|
||||
iws = max(iws, int(work[0]))
|
||||
}
|
||||
}
|
||||
|
||||
if nfxd >= minmn {
|
||||
work[0] = float64(iws)
|
||||
return
|
||||
}
|
||||
|
||||
// Factorize free columns.
|
||||
sm := m - nfxd
|
||||
sn := n - nfxd
|
||||
sminmn := minmn - nfxd
|
||||
|
||||
// Determine the block size.
|
||||
nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
|
||||
nbmin := 2
|
||||
nx := 0
|
||||
|
||||
if 1 < nb && nb < sminmn {
|
||||
// Determine when to cross over from blocked to unblocked code.
|
||||
nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
|
||||
|
||||
if nx < sminmn {
|
||||
// Determine if workspace is large enough for blocked code.
|
||||
minws := 2*sn + (sn+1)*nb
|
||||
iws = max(iws, minws)
|
||||
if lwork < minws {
|
||||
// Not enough workspace to use optimal nb. Reduce
|
||||
// nb and determine the minimum value of nb.
|
||||
nb = (lwork - 2*sn) / (sn + 1)
|
||||
nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize partial column norms.
|
||||
// The first n elements of work store the exact column norms.
|
||||
for j := nfxd; j < n; j++ {
|
||||
work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
|
||||
work[n+j] = work[j]
|
||||
}
|
||||
j := nfxd
|
||||
if nbmin <= nb && nb < sminmn && nx < sminmn {
|
||||
// Use blocked code initially.
|
||||
|
||||
// Compute factorization.
|
||||
var fjb int
|
||||
for topbmn := minmn - nx; j < topbmn; j += fjb {
|
||||
jb := min(nb, topbmn-j)
|
||||
|
||||
// Factorize jb columns among columns j:n.
|
||||
fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
|
||||
work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
|
||||
}
|
||||
}
|
||||
|
||||
// Use unblocked code to factor the last or only block.
|
||||
if j < minmn {
|
||||
impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
|
||||
work[j:n], work[j+n:2*n], work[2*n:])
|
||||
}
|
||||
|
||||
work[0] = float64(iws)
|
||||
}
|
@@ -68,6 +68,10 @@ func TestDgels(t *testing.T) {
|
||||
testlapack.DgelsTest(t, impl)
|
||||
}
|
||||
|
||||
func TestDgeqp3(t *testing.T) {
|
||||
testlapack.Dgeqp3Test(t, impl)
|
||||
}
|
||||
|
||||
func TestDgeqr2(t *testing.T) {
|
||||
testlapack.Dgeqr2Test(t, impl)
|
||||
}
|
||||
|
135
testlapack/dgeqp3.go
Normal file
135
testlapack/dgeqp3.go
Normal file
@@ -0,0 +1,135 @@
|
||||
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package testlapack
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/gonum/blas"
|
||||
"github.com/gonum/blas/blas64"
|
||||
)
|
||||
|
||||
type Dgeqp3er interface {
|
||||
Dlapmter
|
||||
Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int)
|
||||
}
|
||||
|
||||
func Dgeqp3Test(t *testing.T, impl Dgeqp3er) {
|
||||
rnd := rand.New(rand.NewSource(1))
|
||||
for c, test := range []struct {
|
||||
m, n, lda int
|
||||
}{
|
||||
{1, 1, 0},
|
||||
{2, 2, 0},
|
||||
{3, 2, 0},
|
||||
{2, 3, 0},
|
||||
{1, 12, 0},
|
||||
{2, 6, 0},
|
||||
{3, 4, 0},
|
||||
{4, 3, 0},
|
||||
{6, 2, 0},
|
||||
{12, 1, 0},
|
||||
{1, 1, 20},
|
||||
{2, 2, 20},
|
||||
{3, 2, 20},
|
||||
{2, 3, 20},
|
||||
{1, 12, 20},
|
||||
{2, 6, 20},
|
||||
{3, 4, 20},
|
||||
{4, 3, 20},
|
||||
{6, 2, 20},
|
||||
{12, 1, 20},
|
||||
{129, 256, 0},
|
||||
{256, 129, 0},
|
||||
{129, 256, 266},
|
||||
{256, 129, 266},
|
||||
} {
|
||||
n := test.n
|
||||
m := test.m
|
||||
lda := test.lda
|
||||
if lda == 0 {
|
||||
lda = test.n
|
||||
}
|
||||
const (
|
||||
all = iota
|
||||
some
|
||||
none
|
||||
)
|
||||
for _, free := range []int{all, some, none} {
|
||||
a := make([]float64, m*lda)
|
||||
for i := range a {
|
||||
a[i] = rnd.Float64()
|
||||
}
|
||||
aCopy := make([]float64, len(a))
|
||||
copy(aCopy, a)
|
||||
jpvt := make([]int, n)
|
||||
for j := range jpvt {
|
||||
switch free {
|
||||
case all:
|
||||
jpvt[j] = -1
|
||||
case some:
|
||||
jpvt[j] = rnd.Intn(2) - 1
|
||||
case none:
|
||||
jpvt[j] = 0
|
||||
default:
|
||||
panic("bad freedom")
|
||||
}
|
||||
}
|
||||
k := min(m, n)
|
||||
tau := make([]float64, k)
|
||||
for i := range tau {
|
||||
tau[i] = rnd.Float64()
|
||||
}
|
||||
work := make([]float64, 1)
|
||||
impl.Dgeqp3(m, n, a, lda, jpvt, tau, work, -1)
|
||||
lwork := int(work[0])
|
||||
work = make([]float64, lwork)
|
||||
for i := range work {
|
||||
work[i] = rnd.Float64()
|
||||
}
|
||||
impl.Dgeqp3(m, n, a, lda, jpvt, tau, work, lwork)
|
||||
|
||||
// Test that the QR factorization has completed successfully. Compute
|
||||
// Q based on the vectors.
|
||||
q := constructQ("QR", m, n, a, lda, tau)
|
||||
|
||||
// Check that q is orthonormal
|
||||
for i := 0; i < m; i++ {
|
||||
nrm := blas64.Nrm2(m, blas64.Vector{Inc: 1, Data: q.Data[i*m:]})
|
||||
if math.Abs(nrm-1) > 1e-13 {
|
||||
t.Errorf("Case %v, q not normal", c)
|
||||
}
|
||||
for j := 0; j < i; j++ {
|
||||
dot := blas64.Dot(m, blas64.Vector{Inc: 1, Data: q.Data[i*m:]}, blas64.Vector{Inc: 1, Data: q.Data[j*m:]})
|
||||
if math.Abs(dot) > 1e-14 {
|
||||
t.Errorf("Case %v, q not orthogonal", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check that A * P = Q * R
|
||||
r := blas64.General{
|
||||
Rows: m,
|
||||
Cols: n,
|
||||
Stride: n,
|
||||
Data: make([]float64, m*n),
|
||||
}
|
||||
for i := 0; i < m; i++ {
|
||||
for j := i; j < n; j++ {
|
||||
r.Data[i*n+j] = a[i*lda+j]
|
||||
}
|
||||
}
|
||||
got := nanGeneral(m, n, lda)
|
||||
blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, r, 0, got)
|
||||
|
||||
want := blas64.General{Rows: m, Cols: n, Stride: lda, Data: aCopy}
|
||||
impl.Dlapmt(true, want.Rows, want.Cols, want.Data, want.Stride, jpvt)
|
||||
if !equalApproxGeneral(got, want, 1e-13) {
|
||||
t.Errorf("Case %v, Q*R != A*P\nQ*R=%v\nA*P=%v", c, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user