mirror of
https://github.com/gonum/gonum.git
synced 2025-10-05 15:16:59 +08:00

Apply (with manual curation after the fact): * s/^T/U+1d40/g * s/^H/U+1d34/g * s/, {2,3}if / $1/g Some additional manual editing of odd formatting.
215 lines
6.3 KiB
Go
215 lines
6.3 KiB
Go
// Copyright ©2019 The Gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package gonum
|
||
|
||
import (
|
||
"gonum.org/v1/gonum/blas"
|
||
"gonum.org/v1/gonum/blas/blas64"
|
||
)
|
||
|
||
// Dpbtrf computes the Cholesky factorization of an n×n symmetric positive
|
||
// definite band matrix
|
||
// A = Uᵀ * U if uplo == blas.Upper
|
||
// A = L * Lᵀ if uplo == blas.Lower
|
||
// where U is an upper triangular band matrix and L is lower triangular. kd is
|
||
// the number of super- or sub-diagonals of A.
|
||
//
|
||
// The band storage scheme is illustrated below when n = 6 and kd = 2. Elements
|
||
// marked * are not used by the function.
|
||
//
|
||
// uplo == blas.Upper
|
||
// On entry: On return:
|
||
// a00 a01 a02 u00 u01 u02
|
||
// a11 a12 a13 u11 u12 u13
|
||
// a22 a23 a24 u22 u23 u24
|
||
// a33 a34 a35 u33 u34 u35
|
||
// a44 a45 * u44 u45 *
|
||
// a55 * * u55 * *
|
||
//
|
||
// uplo == blas.Lower
|
||
// On entry: On return:
|
||
// * * a00 * * l00
|
||
// * a10 a11 * l10 l11
|
||
// a20 a21 a22 l20 l21 l22
|
||
// a31 a32 a33 l31 l32 l33
|
||
// a42 a43 a44 l42 l43 l44
|
||
// a53 a54 a55 l53 l54 l55
|
||
func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
|
||
const nbmax = 32
|
||
|
||
switch {
|
||
case uplo != blas.Upper && uplo != blas.Lower:
|
||
panic(badUplo)
|
||
case n < 0:
|
||
panic(nLT0)
|
||
case kd < 0:
|
||
panic(kdLT0)
|
||
case ldab < kd+1:
|
||
panic(badLdA)
|
||
}
|
||
|
||
// Quick return if possible.
|
||
if n == 0 {
|
||
return true
|
||
}
|
||
|
||
if len(ab) < (n-1)*ldab+kd+1 {
|
||
panic(shortAB)
|
||
}
|
||
|
||
opts := string(blas.Upper)
|
||
if uplo == blas.Lower {
|
||
opts = string(blas.Lower)
|
||
}
|
||
nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1)
|
||
// The block size must not exceed the semi-bandwidth kd, and must not
|
||
// exceed the limit set by the size of the local array work.
|
||
nb = min(nb, nbmax)
|
||
|
||
if nb <= 1 || kd < nb {
|
||
// Use unblocked code.
|
||
return impl.Dpbtf2(uplo, n, kd, ab, ldab)
|
||
}
|
||
|
||
// Use blocked code.
|
||
ldwork := nb
|
||
work := make([]float64, nb*ldwork)
|
||
bi := blas64.Implementation()
|
||
if uplo == blas.Upper {
|
||
// Compute the Cholesky factorization of a symmetric band
|
||
// matrix, given the upper triangle of the matrix in band
|
||
// storage.
|
||
|
||
// Process the band matrix one diagonal block at a time.
|
||
for i := 0; i < n; i += nb {
|
||
ib := min(nb, n-i)
|
||
// Factorize the diagonal block.
|
||
ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1)
|
||
if !ok {
|
||
return false
|
||
}
|
||
if i+ib >= n {
|
||
continue
|
||
}
|
||
// Update the relevant part of the trailing submatrix.
|
||
// If A11 denotes the diagonal block which has just been
|
||
// factorized, then we need to update the remaining
|
||
// blocks in the diagram:
|
||
//
|
||
// A11 A12 A13
|
||
// A22 A23
|
||
// A33
|
||
//
|
||
// The numbers of rows and columns in the partitioning
|
||
// are ib, i2, i3 respectively. The blocks A12, A22 and
|
||
// A23 are empty if ib = kd. The upper triangle of A13
|
||
// lies outside the band.
|
||
i2 := min(kd-ib, n-i-ib)
|
||
if i2 > 0 {
|
||
// Update A12.
|
||
bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2,
|
||
1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1)
|
||
// Update A22.
|
||
bi.Dsyrk(blas.Upper, blas.Trans, i2, ib,
|
||
-1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1)
|
||
}
|
||
i3 := min(ib, n-i-kd)
|
||
if i3 > 0 {
|
||
// Copy the lower triangle of A13 into the work array.
|
||
for ii := 0; ii < ib; ii++ {
|
||
for jj := 0; jj <= min(ii, i3-1); jj++ {
|
||
work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj]
|
||
}
|
||
}
|
||
// Update A13 (in the work array).
|
||
bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3,
|
||
1, ab[i*ldab:], ldab-1, work, ldwork)
|
||
// Update A23.
|
||
if i2 > 0 {
|
||
bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib,
|
||
-1, ab[i*ldab+ib:], ldab-1, work, ldwork,
|
||
1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
|
||
}
|
||
// Update A33.
|
||
bi.Dsyrk(blas.Upper, blas.Trans, i3, ib,
|
||
-1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1)
|
||
// Copy the lower triangle of A13 back into place.
|
||
for ii := 0; ii < ib; ii++ {
|
||
for jj := 0; jj <= min(ii, i3-1); jj++ {
|
||
ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj]
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
// Compute the Cholesky factorization of a symmetric band
|
||
// matrix, given the lower triangle of the matrix in band
|
||
// storage.
|
||
|
||
// Process the band matrix one diagonal block at a time.
|
||
for i := 0; i < n; i += nb {
|
||
ib := min(nb, n-i)
|
||
// Factorize the diagonal block.
|
||
ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1)
|
||
if !ok {
|
||
return false
|
||
}
|
||
if i+ib >= n {
|
||
continue
|
||
}
|
||
// Update the relevant part of the trailing submatrix.
|
||
// If A11 denotes the diagonal block which has just been
|
||
// factorized, then we need to update the remaining
|
||
// blocks in the diagram:
|
||
//
|
||
// A11
|
||
// A21 A22
|
||
// A31 A32 A33
|
||
//
|
||
// The numbers of rows and columns in the partitioning
|
||
// are ib, i2, i3 respectively. The blocks A21, A22 and
|
||
// A32 are empty if ib = kd. The lowr triangle of A31
|
||
// lies outside the band.
|
||
i2 := min(kd-ib, n-i-ib)
|
||
if i2 > 0 {
|
||
// Update A21.
|
||
bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib,
|
||
1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
|
||
// Update A22.
|
||
bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib,
|
||
-1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1)
|
||
}
|
||
i3 := min(ib, n-i-kd)
|
||
if i3 > 0 {
|
||
// Copy the upper triangle of A31 into the work array.
|
||
for ii := 0; ii < i3; ii++ {
|
||
for jj := ii; jj < ib; jj++ {
|
||
work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii]
|
||
}
|
||
}
|
||
// Update A31 (in the work array).
|
||
bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib,
|
||
1, ab[i*ldab+kd:], ldab-1, work, ldwork)
|
||
// Update A32.
|
||
if i2 > 0 {
|
||
bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib,
|
||
-1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1,
|
||
1, ab[(i+kd)*ldab+ib:], ldab-1)
|
||
}
|
||
// Update A33.
|
||
bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib,
|
||
-1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1)
|
||
// Copy the upper triangle of A31 back into place.
|
||
for ii := 0; ii < i3; ii++ {
|
||
for jj := ii; jj < ib; jj++ {
|
||
ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj]
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return true
|
||
}
|