From ce09d0a8a44ecaf366916131779b80ee26b59b56 Mon Sep 17 00:00:00 2001 From: kortschak Date: Sun, 29 Jan 2017 16:39:13 +1030 Subject: [PATCH] native: implement dtgsja and test --- cgo/lapack.go | 194 +++++++++++++++++++++++ cgo/lapack_test.go | 4 + lapack.go | 11 ++ native/dtgsja.go | 357 ++++++++++++++++++++++++++++++++++++++++++ native/general.go | 3 + native/lapack_test.go | 4 + testlapack/dtgsja.go | 165 +++++++++++++++++++ testlapack/general.go | 107 +++++++++++++ 8 files changed, 845 insertions(+) create mode 100644 native/dtgsja.go create mode 100644 testlapack/dtgsja.go diff --git a/cgo/lapack.go b/cgo/lapack.go index d455716a..f5a03611 100644 --- a/cgo/lapack.go +++ b/cgo/lapack.go @@ -16,7 +16,9 @@ import ( // Copied from lapack/native. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badAlpha = "lapack: bad alpha length" badAuxv = "lapack: auxv has insufficient length" + badBeta = "lapack: bad beta length" badD = "lapack: d has insufficient length" badDecompUpdate = "lapack: bad decomp update" badDiag = "lapack: bad diag" @@ -26,6 +28,7 @@ const ( badEVComp = "lapack: bad EVComp" badEVJob = "lapack: bad EVJob" badEVSide = "lapack: bad EVSide" + badGSVDJob = "lapack: bad GSVDJob" badHowMany = "lapack: bad HowMany" badIlo = "lapack: ilo out of range" badIhi = "lapack: ihi out of range" @@ -2482,3 +2485,194 @@ func (impl Implementation) Dgeev(jobvl lapack.LeftEVJob, jobvr lapack.RightEVJob } return first } + +// Dtgsja computes the generalized singular value decomposition (GSVD) +// of two real upper triangular or trapezoidal matrices A and B. +// +// A and B have the following forms, which may be obtained by the +// preprocessing subroutine Dggsvp from a general m×n matrix A and p×n +// matrix B: +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l >= 0; +// l [ 0 0 A23 ] +// m-k-l [ 0 0 0 ] +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l < 0; +// m-k [ 0 0 A23 ] +// +// n-k-l k l +// B = l [ 0 0 B13 ] +// p-l [ 0 0 0 ] +// +// where the k×k matrix A12 and l×l matrix B13 are non-singular +// upper triangular. A23 is l×l upper triangular if m-k-l >= 0, +// otherwise A23 is (m-k)×l upper trapezoidal. +// +// On exit, +// +// U^T*A*Q = D1*[ 0 R ], V^T*B*Q = D2*[ 0 R ], +// +// where U, V and Q are orthogonal matrices. +// R is a non-singular upper triangular matrix, and D1 and D2 are +// diagonal matrices, which are of the following structures: +// +// If m-k-l >= 0, +// +// k l +// D1 = k [ I 0 ] +// l [ 0 C ] +// m-k-l [ 0 0 ] +// +// k l +// D2 = l [ 0 S ] +// p-l [ 0 0 ] +// +// n-k-l k l +// [ 0 R ] = k [ 0 R11 R12 ] k +// l [ 0 0 R22 ] l +// +// where +// +// C = diag( alpha_k, ... , alpha_{k+l} ), +// S = diag( beta_k, ... , beta_{k+l} ), +// C^2 + S^2 = I. +// +// R is stored in +// A[0:k+l, n-k-l:n] +// on exit. +// +// If m-k-l < 0, +// +// k m-k k+l-m +// D1 = k [ I 0 0 ] +// m-k [ 0 C 0 ] +// +// k m-k k+l-m +// D2 = m-k [ 0 S 0 ] +// k+l-m [ 0 0 I ] +// p-l [ 0 0 0 ] +// +// n-k-l k m-k k+l-m +// [ 0 R ] = k [ 0 R11 R12 R13 ] +// m-k [ 0 0 R22 R23 ] +// k+l-m [ 0 0 0 R33 ] +// +// where +// C = diag( alpha_k, ... , alpha_m ), +// S = diag( beta_k, ... , beta_m ), +// C^2 + S^2 = I. +// +// R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n] +// [ 0 R22 R23 ] +// and R33 is stored in +// B[m-k:l, n+m-k-l:n] on exit. +// +// The computation of the orthogonal transformation matrices U, V or Q +// is optional. These matrices may either be formed explicitly, or they +// may be post-multiplied into input matrices U1, V1, or Q1. +// +// Dtgsja essentially uses a variant of Kogbetliantz algorithm to reduce +// min(l,m-k)×l triangular or trapezoidal matrix A23 and l×l +// matrix B13 to the form: +// +// U1^T*A13*Q1 = C1*R1; V1^T*B13*Q1 = S1*R1, +// +// where U1, V1 and Q1 are orthogonal matrices. C1 and S1 are diagonal +// matrices satisfying +// +// C1^2 + S1^2 = I, +// +// and R1 is an l×l non-singular upper triangular matrix. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDUnit Use unit-initialized matrix +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// k and l specify the sub-blocks in the input matrices A and B: +// A23 = A[k:min(k+l,m), n-l:n) and B13 = B[0:l, n-l:n] +// of A and B, whose GSVD is going to be computed by Dtgsja. +// +// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz +// iteration procedure. Generally, they are the same as used in the preprocessing +// step, for example, +// tola = max(m, n)*norm(A)*eps, +// tolb = max(p, n)*norm(B)*eps, +// where eps is the machine epsilon. +// +// work must have length at least 2*n, otherwise Dtgsja will panic. +// +// alpha and beta must have length n or Dtgsja will panic. On exit, alpha and +// beta contain the generalized singular value pairs of A and B +// alpha[0:k] = 1, +// beta[0:k] = 0, +// if m-k-l >= 0, +// alpha[k:k+l] = diag(C), +// beta[k:k+l] = diag(S), +// if m-k-l < 0, +// alpha[k:m]= C, alpha[m:k+l]= 0 +// beta[k:m] = S, beta[m:k+l] = 1. +// if k+l < n, +// alpha[k+l:n] = 0 and +// beta[k+l:n] = 0. +// +// On exit, A[n-k:n, 0:min(k+l,m)] contains the triangular matrix R or part of R +// and if necessary, B[m-k:l, n+m-k-l:n] contains a part of R. +// +// Dtgsja returns whether the routine converged and the number of iteration cycles +// that were run. +// +// Dtgsja is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) { + checkMatrix(m, n, a, lda) + checkMatrix(p, n, b, ldb) + + if len(alpha) != n { + panic(badAlpha) + } + if len(beta) != n { + panic(badBeta) + } + + initu := jobU == lapack.GSVDUnit + wantu := initu || jobU == lapack.GSVDU + if !initu && !wantu && jobU != lapack.GSVDNone { + panic(badGSVDJob + "U") + } + if jobU != lapack.GSVDNone { + checkMatrix(m, m, u, ldu) + } + + initv := jobV == lapack.GSVDUnit + wantv := initv || jobV == lapack.GSVDV + if !initv && !wantv && jobV != lapack.GSVDNone { + panic(badGSVDJob + "V") + } + if jobV != lapack.GSVDNone { + checkMatrix(p, p, v, ldv) + } + + initq := jobQ == lapack.GSVDUnit + wantq := initq || jobQ == lapack.GSVDQ + if !initq && !wantq && jobQ != lapack.GSVDNone { + panic(badGSVDJob + "Q") + } + if jobQ != lapack.GSVDNone { + checkMatrix(n, n, q, ldq) + } + + if len(work) < 2*n { + panic(badWork) + } + + ncycle := []int32{0} + ok = lapacke.Dtgsja(lapack.Job(jobU), lapack.Job(jobV), lapack.Job(jobQ), m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle) + return int(ncycle[0]), ok +} diff --git a/cgo/lapack_test.go b/cgo/lapack_test.go index 67c16de7..eb96550d 100644 --- a/cgo/lapack_test.go +++ b/cgo/lapack_test.go @@ -244,6 +244,10 @@ func TestDsytrd(t *testing.T) { testlapack.DsytrdTest(t, impl) } +func TestDtgsja(t *testing.T) { + testlapack.DtgsjaTest(t, impl) +} + func TestDtrexc(t *testing.T) { testlapack.DtrexcTest(t, impl) } diff --git a/lapack.go b/lapack.go index 90da267e..c84e6e25 100644 --- a/lapack.go +++ b/lapack.go @@ -108,6 +108,17 @@ const ( SVDNone SVDJob = 'N' // Do not compute singular vectors ) +// GSVDJob specifies the singular vector computation type for Generalized SVD. +type GSVDJob byte + +const ( + GSVDU GSVDJob = 'U' // Compute orthogonal matrix U + GSVDV GSVDJob = 'V' // Compute orthogonal matrix V + GSVDQ GSVDJob = 'Q' // Compute orthogonal matrix Q + GSVDUnit GSVDJob = 'I' // Use unit-initialized matrix + GSVDNone GSVDJob = 'N' // Do not compute orthogonal matrix +) + // EVComp specifies how eigenvectors are computed. type EVComp byte diff --git a/native/dtgsja.go b/native/dtgsja.go new file mode 100644 index 00000000..a47e2526 --- /dev/null +++ b/native/dtgsja.go @@ -0,0 +1,357 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package native + +import ( + "math" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" + "github.com/gonum/lapack" +) + +// Dtgsja computes the generalized singular value decomposition (GSVD) +// of two real upper triangular or trapezoidal matrices A and B. +// +// A and B have the following forms, which may be obtained by the +// preprocessing subroutine Dggsvp from a general m×n matrix A and p×n +// matrix B: +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l >= 0; +// l [ 0 0 A23 ] +// m-k-l [ 0 0 0 ] +// +// n-k-l k l +// A = k [ 0 A12 A13 ] if m-k-l < 0; +// m-k [ 0 0 A23 ] +// +// n-k-l k l +// B = l [ 0 0 B13 ] +// p-l [ 0 0 0 ] +// +// where the k×k matrix A12 and l×l matrix B13 are non-singular +// upper triangular. A23 is l×l upper triangular if m-k-l >= 0, +// otherwise A23 is (m-k)×l upper trapezoidal. +// +// On exit, +// +// U^T*A*Q = D1*[ 0 R ], V^T*B*Q = D2*[ 0 R ], +// +// where U, V and Q are orthogonal matrices. +// R is a non-singular upper triangular matrix, and D1 and D2 are +// diagonal matrices, which are of the following structures: +// +// If m-k-l >= 0, +// +// k l +// D1 = k [ I 0 ] +// l [ 0 C ] +// m-k-l [ 0 0 ] +// +// k l +// D2 = l [ 0 S ] +// p-l [ 0 0 ] +// +// n-k-l k l +// [ 0 R ] = k [ 0 R11 R12 ] k +// l [ 0 0 R22 ] l +// +// where +// +// C = diag( alpha_k, ... , alpha_{k+l} ), +// S = diag( beta_k, ... , beta_{k+l} ), +// C^2 + S^2 = I. +// +// R is stored in +// A[0:k+l, n-k-l:n] +// on exit. +// +// If m-k-l < 0, +// +// k m-k k+l-m +// D1 = k [ I 0 0 ] +// m-k [ 0 C 0 ] +// +// k m-k k+l-m +// D2 = m-k [ 0 S 0 ] +// k+l-m [ 0 0 I ] +// p-l [ 0 0 0 ] +// +// n-k-l k m-k k+l-m +// [ 0 R ] = k [ 0 R11 R12 R13 ] +// m-k [ 0 0 R22 R23 ] +// k+l-m [ 0 0 0 R33 ] +// +// where +// C = diag( alpha_k, ... , alpha_m ), +// S = diag( beta_k, ... , beta_m ), +// C^2 + S^2 = I. +// +// R = [ R11 R12 R13 ] is stored in A[0:m, n-k-l:n] +// [ 0 R22 R23 ] +// and R33 is stored in +// B[m-k:l, n+m-k-l:n] on exit. +// +// The computation of the orthogonal transformation matrices U, V or Q +// is optional. These matrices may either be formed explicitly, or they +// may be post-multiplied into input matrices U1, V1, or Q1. +// +// Dtgsja essentially uses a variant of Kogbetliantz algorithm to reduce +// min(l,m-k)×l triangular or trapezoidal matrix A23 and l×l +// matrix B13 to the form: +// +// U1^T*A13*Q1 = C1*R1; V1^T*B13*Q1 = S1*R1, +// +// where U1, V1 and Q1 are orthogonal matrices. C1 and S1 are diagonal +// matrices satisfying +// +// C1^2 + S1^2 = I, +// +// and R1 is an l×l non-singular upper triangular matrix. +// +// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior +// is as follows +// jobU == lapack.GSVDU Compute orthogonal matrix U +// jobU == lapack.GSVDUnit Use unit-initialized matrix +// jobU == lapack.GSVDNone Do not compute orthogonal matrix. +// The behavior is the same for jobV and jobQ with the exception that instead of +// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively. +// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the +// relevant job parameter is lapack.GSVDNone. +// +// k and l specify the sub-blocks in the input matrices A and B: +// A23 = A[k:min(k+l,m), n-l:n) and B13 = B[0:l, n-l:n] +// of A and B, whose GSVD is going to be computed by Dtgsja. +// +// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz +// iteration procedure. Generally, they are the same as used in the preprocessing +// step, for example, +// tola = max(m, n)*norm(A)*eps, +// tolb = max(p, n)*norm(B)*eps, +// where eps is the machine epsilon. +// +// work must have length at least 2*n, otherwise Dtgsja will panic. +// +// alpha and beta must have length n or Dtgsja will panic. On exit, alpha and +// beta contain the generalized singular value pairs of A and B +// alpha[0:k] = 1, +// beta[0:k] = 0, +// if m-k-l >= 0, +// alpha[k:k+l] = diag(C), +// beta[k:k+l] = diag(S), +// if m-k-l < 0, +// alpha[k:m]= C, alpha[m:k+l]= 0 +// beta[k:m] = S, beta[m:k+l] = 1. +// if k+l < n, +// alpha[k+l:n] = 0 and +// beta[k+l:n] = 0. +// +// On exit, A[n-k:n, 0:min(k+l,m)] contains the triangular matrix R or part of R +// and if necessary, B[m-k:l, n+m-k-l:n] contains a part of R. +// +// Dtgsja returns whether the routine converged and the number of iteration cycles +// that were run. +// +// Dtgsja is an internal routine. It is exported for testing purposes. +func (impl Implementation) Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) { + const maxit = 40 + + checkMatrix(m, n, a, lda) + checkMatrix(p, n, b, ldb) + + if len(alpha) != n { + panic(badAlpha) + } + if len(beta) != n { + panic(badBeta) + } + + initu := jobU == lapack.GSVDUnit + wantu := initu || jobU == lapack.GSVDU + if !initu && !wantu && jobU != lapack.GSVDNone { + panic(badGSVDJob + "U") + } + if jobU != lapack.GSVDNone { + checkMatrix(m, m, u, ldu) + } + + initv := jobV == lapack.GSVDUnit + wantv := initv || jobV == lapack.GSVDV + if !initv && !wantv && jobV != lapack.GSVDNone { + panic(badGSVDJob + "V") + } + if jobV != lapack.GSVDNone { + checkMatrix(p, p, v, ldv) + } + + initq := jobQ == lapack.GSVDUnit + wantq := initq || jobQ == lapack.GSVDQ + if !initq && !wantq && jobQ != lapack.GSVDNone { + panic(badGSVDJob + "Q") + } + if jobQ != lapack.GSVDNone { + checkMatrix(n, n, q, ldq) + } + + if len(work) < 2*n { + panic(badWork) + } + + // Initialize U, V and Q, if necessary + if initu { + impl.Dlaset(blas.All, m, m, 0, 1, u, ldu) + } + if initv { + impl.Dlaset(blas.All, p, p, 0, 1, v, ldv) + } + if initq { + impl.Dlaset(blas.All, n, n, 0, 1, q, ldq) + } + + bi := blas64.Implementation() + minTol := math.Min(tola, tolb) + + // Loop until convergence. + upper := false + for cycles = 1; cycles <= maxit; cycles++ { + upper = !upper + + for i := 0; i < l-1; i++ { + for j := i + 1; j < l; j++ { + var a1, a2, a3 float64 + if k+i < m { + a1 = a[(k+i)*lda+n-l+i] + } + if k+j < m { + a3 = a[(k+j)*lda+n-l+j] + } + + b1 := b[i*ldb+n-l+i] + b3 := b[j*ldb+n-l+j] + + var b2 float64 + if upper { + if k+i < m { + a2 = a[(k+i)*lda+n-l+j] + } + b2 = b[i*ldb+n-l+j] + } else { + if k+j < m { + a2 = a[(k+j)*lda+n-l+i] + } + b2 = b[j*ldb+n-l+i] + } + + csu, snu, csv, snv, csq, snq := impl.Dlags2(upper, a1, a2, a3, b1, b2, b3) + + // Update (k+i)-th and (k+j)-th rows of matrix A: U^T*A. + if k+j < m { + bi.Drot(l, a[(k+j)*lda+n-l:], 1, a[(k+i)*lda+n-l:], 1, csu, snu) + } + + // Update i-th and j-th rows of matrix B: V^T*B. + bi.Drot(l, b[j*ldb+n-l:], 1, b[i*ldb+n-l:], 1, csv, snv) + + // Update (n-l+i)-th and (n-l+j)-th columns of matrices + // A and B: A*Q and B*Q. + bi.Drot(min(k+l, m), a[n-l+j:], lda, a[n-l+i:], lda, csq, snq) + bi.Drot(l, b[n-l+j:], ldb, b[n-l+i:], ldb, csq, snq) + + if upper { + if k+i < m { + a[(k+i)*lda+n-l+j] = 0 + } + b[i*ldb+n-l+j] = 0 + } else { + if k+j < m { + a[(k+j)*lda+n-l+i] = 0 + } + b[j*ldb+n-l+i] = 0 + } + + // Update orthogonal matrices U, V, Q, if desired. + if wantu && k+j < m { + bi.Drot(m, u[k+j:], ldu, u[k+i:], ldu, csu, snu) + } + if wantv { + bi.Drot(p, v[j:], ldv, v[i:], ldv, csv, snv) + } + if wantq { + bi.Drot(n, q[n-l+j:], ldq, q[n-l+i:], ldq, csq, snq) + } + } + } + + if !upper { + // The matrices A13 and B13 were lower triangular at the start + // of the cycle, and are now upper triangular. + // + // Convergence test: test the parallelism of the corresponding + // rows of A and B. + var error float64 + for i := 0; i < min(l, m-k); i++ { + bi.Dcopy(l-i, a[(k+i)*lda+n-l+i:], 1, work, 1) + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, work[l:], 1) + ssmin := impl.Dlapll(l-i, work, 1, work[l:], 1) + error = math.Max(error, ssmin) + } + if math.Abs(error) <= minTol { + // The algorithm has converged. + // Compute the generalized singular value pairs (alpha, beta) + // and set the triangular matrix R to array A. + for i := 0; i < k; i++ { + alpha[i] = 1 + beta[i] = 0 + } + + for i := 0; i < min(l, m-k); i++ { + a1 := a[(k+i)*lda+n-l+i] + b1 := b[i*ldb+n-l+i] + + if a1 != 0 { + gamma := b1 / a1 + + // Change sign if necessary. + if gamma < 0 { + bi.Dscal(l-i, -1, b[i*ldb+n-l+i:], 1) + if wantv { + bi.Dscal(p, -1, v[i:], ldv) + } + } + beta[k+i], alpha[k+i], _ = impl.Dlartg(math.Abs(gamma), 1) + + if alpha[k+i] >= beta[k+i] { + bi.Dscal(l-i, 1/alpha[k+i], a[(k+i)*lda+n-l+i:], 1) + } else { + bi.Dscal(l-i, 1/beta[k+i], b[i*ldb+n-l+i:], 1) + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1) + } + } else { + alpha[k+i] = 0 + beta[k+i] = 1 + bi.Dcopy(l-i, b[i*ldb+n-l+i:], 1, a[(k+i)*lda+n-l+i:], 1) + } + } + + for i := m; i < k+l; i++ { + alpha[i] = 0 + beta[i] = 1 + } + if k+l < n { + for i := k + l; i < n; i++ { + alpha[i] = 0 + beta[i] = 0 + } + } + + return cycles, true + } + } + } + + // The algorithm has not converged after maxit cycles. + return cycles, false +} diff --git a/native/general.go b/native/general.go index 4a134331..43ec0439 100644 --- a/native/general.go +++ b/native/general.go @@ -20,7 +20,9 @@ var _ lapack.Float64 = Implementation{} // This list is duplicated in lapack/cgo. Keep in sync. const ( absIncNotOne = "lapack: increment not one or negative one" + badAlpha = "lapack: bad alpha length" badAuxv = "lapack: auxv has insufficient length" + badBeta = "lapack: bad beta length" badD = "lapack: d has insufficient length" badDecompUpdate = "lapack: bad decomp update" badDiag = "lapack: bad diag" @@ -30,6 +32,7 @@ const ( badEVComp = "lapack: bad EVComp" badEVJob = "lapack: bad EVJob" badEVSide = "lapack: bad EVSide" + badGSVDJob = "lapack: bad GSVDJob" badHowMany = "lapack: bad HowMany" badIlo = "lapack: ilo out of range" badIhi = "lapack: ihi out of range" diff --git a/native/lapack_test.go b/native/lapack_test.go index 9db5a45f..7f107204 100644 --- a/native/lapack_test.go +++ b/native/lapack_test.go @@ -372,6 +372,10 @@ func TestDsytrd(t *testing.T) { testlapack.DsytrdTest(t, impl) } +func TestDtgsja(t *testing.T) { + testlapack.DtgsjaTest(t, impl) +} + func TestDtrcon(t *testing.T) { testlapack.DtrconTest(t, impl) } diff --git a/testlapack/dtgsja.go b/testlapack/dtgsja.go new file mode 100644 index 00000000..b9466220 --- /dev/null +++ b/testlapack/dtgsja.go @@ -0,0 +1,165 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package testlapack + +import ( + "math/rand" + "testing" + + "github.com/gonum/blas" + "github.com/gonum/blas/blas64" + "github.com/gonum/floats" + "github.com/gonum/lapack" +) + +type Dtgsjaer interface { + Dlanger + Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) +} + +func DtgsjaTest(t *testing.T, impl Dtgsjaer) { + rnd := rand.New(rand.NewSource(1)) + for cas, test := range []struct { + m, p, n, k, l, lda, ldb, ldu, ldv, ldq int + + ok bool + }{ + {m: 5, p: 5, n: 5, k: 2, l: 2, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 5, p: 5, n: 5, k: 4, l: 1, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 5, p: 5, n: 10, k: 2, l: 2, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 5, p: 5, n: 10, k: 4, l: 1, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 5, p: 5, n: 10, k: 4, l: 2, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 10, p: 5, n: 5, k: 2, l: 2, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 10, p: 5, n: 5, k: 4, l: 1, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 10, p: 10, n: 10, k: 5, l: 3, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 10, p: 10, n: 10, k: 6, l: 4, lda: 0, ldb: 0, ldu: 0, ldv: 0, ldq: 0, ok: true}, + {m: 5, p: 5, n: 5, k: 2, l: 2, lda: 10, ldb: 10, ldu: 10, ldv: 10, ldq: 10, ok: true}, + {m: 5, p: 5, n: 5, k: 4, l: 1, lda: 10, ldb: 10, ldu: 10, ldv: 10, ldq: 10, ok: true}, + {m: 5, p: 5, n: 10, k: 2, l: 2, lda: 20, ldb: 20, ldu: 10, ldv: 10, ldq: 20, ok: true}, + {m: 5, p: 5, n: 10, k: 4, l: 1, lda: 20, ldb: 20, ldu: 10, ldv: 10, ldq: 20, ok: true}, + {m: 5, p: 5, n: 10, k: 4, l: 2, lda: 20, ldb: 20, ldu: 10, ldv: 10, ldq: 20, ok: true}, + {m: 10, p: 5, n: 5, k: 2, l: 2, lda: 10, ldb: 10, ldu: 20, ldv: 10, ldq: 10, ok: true}, + {m: 10, p: 5, n: 5, k: 4, l: 1, lda: 10, ldb: 10, ldu: 20, ldv: 10, ldq: 10, ok: true}, + {m: 10, p: 10, n: 10, k: 5, l: 3, lda: 20, ldb: 20, ldu: 20, ldv: 20, ldq: 20, ok: true}, + {m: 10, p: 10, n: 10, k: 6, l: 4, lda: 20, ldb: 20, ldu: 20, ldv: 20, ldq: 20, ok: true}, + } { + m := test.m + p := test.p + n := test.n + k := test.k + l := test.l + lda := test.lda + if lda == 0 { + lda = n + } + ldb := test.ldb + if ldb == 0 { + ldb = n + } + ldu := test.ldu + if ldu == 0 { + ldu = m + } + ldv := test.ldv + if ldv == 0 { + ldv = p + } + ldq := test.ldq + if ldq == 0 { + ldq = n + } + + a := blockedUpperTriGeneral(m, n, k, l, lda, true, rnd) + aCopy := cloneGeneral(a) + b := blockedUpperTriGeneral(p, n, k, l, ldb, false, rnd) + bCopy := cloneGeneral(b) + + tola := float64(max(m, n)) * impl.Dlange(lapack.NormFrob, m, n, a.Data, a.Stride, nil) * dlamchE + tolb := float64(max(p, n)) * impl.Dlange(lapack.NormFrob, p, n, b.Data, b.Stride, nil) * dlamchE + + alpha := make([]float64, n) + beta := make([]float64, n) + + work := make([]float64, 2*n) + + u := nanGeneral(m, m, ldu) + v := nanGeneral(p, p, ldv) + q := nanGeneral(n, n, ldq) + + _, ok := impl.Dtgsja(lapack.GSVDUnit, lapack.GSVDUnit, lapack.GSVDUnit, + m, p, n, k, l, + a.Data, a.Stride, + b.Data, b.Stride, + tola, tolb, + alpha, beta, + u.Data, u.Stride, + v.Data, v.Stride, + q.Data, q.Stride, + work) + + if !ok { + if test.ok { + t.Errorf("test %d unexpectedly did not converge", cas) + } + continue + } + + // Check orthogonality of U, V and Q. + if !isOrthonormal(u) { + t.Errorf("test %d: U is not orthogonal\n%+v", cas, u) + } + if !isOrthonormal(v) { + t.Errorf("test %d: V is not orthogonal\n%+v", cas, v) + } + if !isOrthonormal(q) { + t.Errorf("test %d: Q is not orthogonal\n%+v", cas, q) + } + + // Check C^2 + S^2 = I. + var elements []float64 + if m-k-l >= 0 { + elements = alpha[k : k+l] + } else { + elements = alpha[k:m] + } + for i := range elements { + i += k + d := alpha[i]*alpha[i] + beta[i]*beta[i] + if !floats.EqualWithinAbsOrRel(d, 1, 1e-14, 1e-14) { + t.Errorf("test %d: alpha_%d^2 + beta_%d^2 != 1: got: %v", cas, i, i, d) + } + } + + zeroR, d1, d2 := constructGSVDresults(n, p, m, k, l, a, b, alpha, beta) + + // Check U^T*A*Q = D1*[ 0 R ]. + uTmp := nanGeneral(m, n, n) + blas64.Gemm(blas.Trans, blas.NoTrans, 1, u, aCopy, 0, uTmp) + uAns := nanGeneral(m, n, n) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uTmp, q, 0, uAns) + + d10r := nanGeneral(m, n, n) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, d1, zeroR, 0, d10r) + + if !equalApproxGeneral(uAns, d10r, 1e-14) { + t.Errorf("test %d: U^T*A*Q != D1*[ 0 R ]\nU^T*A*Q:\n%+v\nD1*[ 0 R ]:\n%+v", + cas, uAns, d10r) + } + + // Check V^T*B*Q = D2*[ 0 R ]. + vTmp := nanGeneral(p, n, n) + blas64.Gemm(blas.Trans, blas.NoTrans, 1, v, bCopy, 0, vTmp) + vAns := nanGeneral(p, n, n) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, vTmp, q, 0, vAns) + + d20r := nanGeneral(p, n, n) + blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, d2, zeroR, 0, d20r) + + if !equalApproxGeneral(vAns, d20r, 1e-14) { + t.Errorf("test %d: V^T*B*Q != D2*[ 0 R ]\nV^T*B*Q:\n%+v\nD2*[ 0 R ]:\n%+v", + cas, vAns, d20r) + } + } +} diff --git a/testlapack/general.go b/testlapack/general.go index ec8a38cb..0b092b4c 100644 --- a/testlapack/general.go +++ b/testlapack/general.go @@ -17,6 +17,9 @@ import ( "github.com/gonum/lapack" ) +// dlamchE is the machine epsilon. For IEEE this is 2^-53. +const dlamchE = 1.0 / (1 << 53) + func max(a, b int) int { if a > b { return a @@ -138,6 +141,41 @@ func randomSchurCanonical(n, stride int, rnd *rand.Rand) blas64.General { return t } +// blockedUpperTriGeneral returns a normal random, general matrix in the form +// +// c-k-l k l +// A = k [ 0 A12 A13 ] if r-k-l >= 0; +// l [ 0 0 A23 ] +// r-k-l [ 0 0 0 ] +// +// c-k-l k l +// A = k [ 0 A12 A13 ] if r-k-l < 0; +// r-k [ 0 0 A23 ] +// +// where the k×k matrix A12 and l×l matrix is non-singular +// upper triangular. A23 is l×l upper triangular if r-k-l >= 0, +// otherwise A23 is (r-k)×l upper trapezoidal. +func blockedUpperTriGeneral(r, c, k, l, stride int, kblock bool, rnd *rand.Rand) blas64.General { + t := l + if kblock { + t += k + } + ans := zeros(r, c, stride) + for i := 0; i < min(r, t); i++ { + var v float64 + for v == 0 { + v = rnd.NormFloat64() + } + ans.Data[i*ans.Stride+i+(c-t)] = v + } + for i := 0; i < min(r, t); i++ { + for j := i + (c - t) + 1; j < c; j++ { + ans.Data[i*ans.Stride+j] = rnd.NormFloat64() + } + } + return ans +} + // nanTriangular allocates a new r×c triangular matrix filled with NaN values. func nanTriangular(uplo blas.Uplo, n, stride int) blas64.Triangular { if n < 0 { @@ -1291,3 +1329,72 @@ func applyReflector(qh blas64.General, q blas64.General, v []float64) { } } } + +// constructGSVDresults returns the matrices [ 0 R ], D1 and D2 described +// in the documentation of Dtgsja and Dggsvd3, and the result matrix in +// the documentation for Dggsvp3. +func constructGSVDresults(n, p, m, k, l int, a, b blas64.General, alpha, beta []float64) (zeroR, d1, d2 blas64.General) { + zeroR = zeros(k+l, n, n) + d1 = zeros(m, k+l, k+l) + d2 = zeros(p, k+l, k+l) + if m-k-l >= 0 { + // [ 0 R ] + dst := zeroR + dst.Cols = k + l + dst.Data = zeroR.Data[n-k-l:] + src := a + src.Cols = k + l + src.Data = a.Data[n-k-l:] + copyGeneral(dst, src) + + // D1 + for i := 0; i < k; i++ { + d1.Data[i*d1.Stride+i] = 1 + } + for i := k; i < k+l; i++ { + d1.Data[i*d1.Stride+i] = alpha[i] + } + + // D2 + for i := 0; i < l; i++ { + d2.Data[i*d2.Stride+i+k] = beta[k+i] + } + } else { + // [ 0 R ] + dst := zeroR + dst.Rows = m + dst.Cols = k + l + dst.Data = zeroR.Data[n-k-l:] + src := a + src.Rows = m + src.Cols = k + l + src.Data = a.Data[n-k-l:] + copyGeneral(dst, src) + dst.Rows = k + l - m + dst.Cols = k + l - m + dst.Data = zeroR.Data[m*zeroR.Stride+n-(k+l-m):] + src = b + src.Rows = k + l - m + src.Cols = k + l - m + src.Data = b.Data[(m-k)*b.Stride+n+m-k-l:] + copyGeneral(dst, src) + + // D1 + for i := 0; i < k; i++ { + d1.Data[i*d1.Stride+i] = 1 + } + for i := k; i < m; i++ { + d1.Data[i*d1.Stride+i] = alpha[i] + } + + // D2 + for i := 0; i < m-k; i++ { + d2.Data[i*d2.Stride+i+k] = beta[k+i] + } + for i := m - k; i < l; i++ { + d2.Data[i*d2.Stride+i+k] = 1 + } + } + + return zeroR, d1, d2 +}