mirror of
https://github.com/gonum/gonum.git
synced 2025-10-24 15:43:07 +08:00
179 lines
5.1 KiB
Go
179 lines
5.1 KiB
Go
// Copyright ©2016 The gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package native
|
||
|
||
import (
|
||
"gonum.org/v1/gonum/blas"
|
||
"gonum.org/v1/gonum/blas/blas64"
|
||
)
|
||
|
||
// Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
|
||
// orthogonal similarity transformation
|
||
// Q^T * A * Q = T
|
||
// where Q is an orthonormal matrix and T is symmetric and tridiagonal.
|
||
//
|
||
// On entry, a contains the elements of the input matrix in the triangle specified
|
||
// by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
|
||
// corresponding elements of the tridiagonal matrix T. The remaining elements in
|
||
// the triangle, along with the array tau, contain the data to construct Q as
|
||
// the product of elementary reflectors.
|
||
//
|
||
// If uplo == blas.Upper, Q is constructed with
|
||
// Q = H_{n-2} * ... * H_1 * H_0
|
||
// where
|
||
// H_i = I - tau_i * v * v^T
|
||
// v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
|
||
// The elements of A are
|
||
// [ d e v1 v2 v3]
|
||
// [ d e v2 v3]
|
||
// [ d e v3]
|
||
// [ d e]
|
||
// [ e]
|
||
//
|
||
// If uplo == blas.Lower, Q is constructed with
|
||
// Q = H_0 * H_1 * ... * H_{n-2}
|
||
// where
|
||
// H_i = I - tau_i * v * v^T
|
||
// v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
|
||
// The elements of A are
|
||
// [ d ]
|
||
// [ e d ]
|
||
// [v0 e d ]
|
||
// [v0 v1 e d ]
|
||
// [v0 v1 v2 e d]
|
||
//
|
||
// d must have length n, and e and tau must have length n-1. Dsytrd will panic if
|
||
// these conditions are not met.
|
||
//
|
||
// work is temporary storage, and lwork specifies the usable memory length. At minimum,
|
||
// lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is
|
||
// limited by the usable length.
|
||
// If lwork == -1, instead of computing Dsytrd the optimal work length is stored
|
||
// into work[0].
|
||
//
|
||
// Dsytrd is an internal routine. It is exported for testing purposes.
|
||
func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) {
|
||
checkMatrix(n, n, a, lda)
|
||
if len(d) < n {
|
||
panic(badD)
|
||
}
|
||
if len(e) < n-1 {
|
||
panic(badE)
|
||
}
|
||
if len(tau) < n-1 {
|
||
panic(badTau)
|
||
}
|
||
if len(work) < lwork {
|
||
panic(shortWork)
|
||
}
|
||
if lwork != -1 && lwork < 1 {
|
||
panic(badWork)
|
||
}
|
||
|
||
var upper bool
|
||
var opts string
|
||
switch uplo {
|
||
case blas.Upper:
|
||
upper = true
|
||
opts = "U"
|
||
case blas.Lower:
|
||
opts = "L"
|
||
default:
|
||
panic(badUplo)
|
||
}
|
||
|
||
if n == 0 {
|
||
work[0] = 1
|
||
return
|
||
}
|
||
|
||
nb := impl.Ilaenv(1, "DSYTRD", opts, n, -1, -1, -1)
|
||
lworkopt := n * nb
|
||
if lwork == -1 {
|
||
work[0] = float64(lworkopt)
|
||
return
|
||
}
|
||
|
||
nx := n
|
||
bi := blas64.Implementation()
|
||
var ldwork int
|
||
if 1 < nb && nb < n {
|
||
// Determine when to cross over from blocked to unblocked code. The last
|
||
// block is always handled by unblocked code.
|
||
opts := "L"
|
||
if upper {
|
||
opts = "U"
|
||
}
|
||
nx = max(nb, impl.Ilaenv(3, "DSYTRD", opts, n, -1, -1, -1))
|
||
if nx < n {
|
||
// Determine if workspace is large enough for blocked code.
|
||
ldwork = nb
|
||
iws := n * ldwork
|
||
if lwork < iws {
|
||
// Not enough workspace to use optimal nb: determine the minimum
|
||
// value of nb and reduce nb or force use of unblocked code by
|
||
// setting nx = n.
|
||
nb = max(lwork/n, 1)
|
||
nbmin := impl.Ilaenv(2, "DSYTRD", opts, n, -1, -1, -1)
|
||
if nb < nbmin {
|
||
nx = n
|
||
}
|
||
}
|
||
} else {
|
||
nx = n
|
||
}
|
||
} else {
|
||
nb = 1
|
||
}
|
||
ldwork = nb
|
||
|
||
if upper {
|
||
// Reduce the upper triangle of A. Columns 0:kk are handled by the
|
||
// unblocked method.
|
||
var i int
|
||
kk := n - ((n-nx+nb-1)/nb)*nb
|
||
for i = n - nb; i >= kk; i -= nb {
|
||
// Reduce columns i:i+nb to tridiagonal form and form the matrix W
|
||
// which is needed to update the unreduced part of the matrix.
|
||
impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork)
|
||
|
||
// Update the unreduced submatrix A[0:i-1,0:i-1], using an update
|
||
// of the form A = A - V*W^T - W*V^T.
|
||
bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda)
|
||
|
||
// Copy superdiagonal elements back into A, and diagonal elements into D.
|
||
for j := i; j < i+nb; j++ {
|
||
a[(j-1)*lda+j] = e[j-1]
|
||
d[j] = a[j*lda+j]
|
||
}
|
||
}
|
||
// Use unblocked code to reduce the last or only block
|
||
// check that i == kk.
|
||
impl.Dsytd2(uplo, kk, a, lda, d, e, tau)
|
||
} else {
|
||
var i int
|
||
// Reduce the lower triangle of A.
|
||
for i = 0; i < n-nx; i += nb {
|
||
// Reduce columns 0:i+nb to tridiagonal form and form the matrix W
|
||
// which is needed to update the unreduced part of the matrix.
|
||
impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork)
|
||
|
||
// Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update
|
||
// of the form A = A + V*W^T - W*V^T.
|
||
bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda,
|
||
work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda)
|
||
|
||
// Copy subdiagonal elements back into A, and diagonal elements into D.
|
||
for j := i; j < i+nb; j++ {
|
||
a[(j+1)*lda+j] = e[j]
|
||
d[j] = a[j*lda+j]
|
||
}
|
||
}
|
||
// Use unblocked code to reduce the last or only block.
|
||
impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:])
|
||
}
|
||
work[0] = float64(lworkopt)
|
||
}
|