mirror of
https://github.com/gonum/gonum.git
synced 2025-10-20 13:55:20 +08:00
144 lines
3.7 KiB
Go
144 lines
3.7 KiB
Go
// Copyright ©2015 The gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package native
|
||
|
||
import (
|
||
"github.com/gonum/blas"
|
||
"github.com/gonum/lapack"
|
||
)
|
||
|
||
// Dormqr multiplies the matrix C by the othogonal matrix Q defined by the
|
||
// slices a and tau. A and tau are as returned from Dgeqrf.
|
||
// C = Q * C if side == blas.Left and trans == blas.NoTrans
|
||
// C = Q^T * C if side == blas.Left and trans == blas.Trans
|
||
// C = C * Q if side == blas.Right and trans == blas.NoTrans
|
||
// C = C * Q^T if side == blas.Right and trans == blas.Trans
|
||
// If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right
|
||
// A is of size k×n. This uses a blocked algorithm.
|
||
//
|
||
// Work is temporary storage, and lwork specifies the usable memory length.
|
||
// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
|
||
// and this function will panic otherwise.
|
||
// Dormqr uses a block algorithm, but the block size is limited
|
||
// by the temporary space available. If lwork == -1, instead of performing Dormqr,
|
||
// the optimal work length will be stored into work[0].
|
||
//
|
||
// tau contains the householder scales and must have length at least k, and
|
||
// this function will panic otherwise.
|
||
func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
|
||
left := side == blas.Left
|
||
notran := trans == blas.NoTrans
|
||
if left {
|
||
checkMatrix(m, k, a, lda)
|
||
} else {
|
||
checkMatrix(n, k, a, lda)
|
||
}
|
||
checkMatrix(m, n, c, ldc)
|
||
|
||
if len(tau) < k {
|
||
panic(badTau)
|
||
}
|
||
|
||
const nbmax = 64
|
||
nw := n
|
||
if side == blas.Right {
|
||
nw = m
|
||
}
|
||
opts := string(side) + string(trans)
|
||
nb := min(nbmax, impl.Ilaenv(1, "DORMQR", opts, m, n, k, -1))
|
||
lworkopt := max(1, nw) * nb
|
||
if lwork == -1 {
|
||
work[0] = float64(lworkopt)
|
||
return
|
||
}
|
||
if left {
|
||
if lwork < n {
|
||
panic(badWork)
|
||
}
|
||
} else {
|
||
if lwork < m {
|
||
panic(badWork)
|
||
}
|
||
}
|
||
if m == 0 || n == 0 || k == 0 {
|
||
return
|
||
}
|
||
nbmin := 2
|
||
|
||
ldwork := nb
|
||
if nb > 1 && nb < k {
|
||
iws := nw * nb
|
||
if lwork < iws {
|
||
nb = lwork / nw
|
||
nbmin = max(2, impl.Ilaenv(2, "DORMQR", opts, m, n, k, -1))
|
||
}
|
||
}
|
||
if nb < nbmin || nb >= k {
|
||
// Call unblocked code
|
||
impl.Dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work)
|
||
return
|
||
}
|
||
ldt := nb
|
||
t := make([]float64, nb*ldt)
|
||
switch {
|
||
case left && notran:
|
||
for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
|
||
ib := min(nb, k-i)
|
||
impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
|
||
a[i*lda+i:], lda,
|
||
tau[i:],
|
||
t, ldt)
|
||
impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
|
||
a[i*lda+i:], lda,
|
||
t, ldt,
|
||
c[i*ldc:], ldc,
|
||
work, ldwork)
|
||
}
|
||
return
|
||
case left && !notran:
|
||
for i := 0; i < k; i += nb {
|
||
ib := min(nb, k-i)
|
||
impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
|
||
a[i*lda+i:], lda,
|
||
tau[i:],
|
||
t, ldt)
|
||
impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
|
||
a[i*lda+i:], lda,
|
||
t, ldt,
|
||
c[i*ldc:], ldc,
|
||
work, ldwork)
|
||
}
|
||
return
|
||
case !left && notran:
|
||
for i := 0; i < k; i += nb {
|
||
ib := min(nb, k-i)
|
||
impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
|
||
a[i*lda+i:], lda,
|
||
tau[i:],
|
||
t, ldt)
|
||
impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
|
||
a[i*lda+i:], lda,
|
||
t, ldt,
|
||
c[i:], ldc,
|
||
work, ldwork)
|
||
}
|
||
return
|
||
case !left && !notran:
|
||
for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
|
||
ib := min(nb, k-i)
|
||
impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
|
||
a[i*lda+i:], lda,
|
||
tau[i:],
|
||
t, ldt)
|
||
impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
|
||
a[i*lda+i:], lda,
|
||
t, ldt,
|
||
c[i:], ldc,
|
||
work, ldwork)
|
||
}
|
||
return
|
||
}
|
||
}
|