mirror of
https://github.com/gonum/gonum.git
synced 2025-10-28 17:31:56 +08:00
change CovarianceMatrix to use MulTrans
MulTrans is much faster than Mul, and by taking the square root of the weights we can always use MulTrans. add test for negative weights add benchmarks for weighted covariance reorder code so simple path is first fix test error message to include fixture number re-add comment on making the returned type symmetric
This commit is contained in:
@@ -7,6 +7,7 @@ package stat
|
|||||||
import (
|
import (
|
||||||
"github.com/gonum/floats"
|
"github.com/gonum/floats"
|
||||||
"github.com/gonum/matrix/mat64"
|
"github.com/gonum/matrix/mat64"
|
||||||
|
"math"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CovarianceMatrix calculates a covariance matrix (also known as a
|
// CovarianceMatrix calculates a covariance matrix (also known as a
|
||||||
@@ -17,16 +18,17 @@ import (
|
|||||||
// The weights wts should have the length equal to the number of rows in
|
// The weights wts should have the length equal to the number of rows in
|
||||||
// input data matrix x. cov should either be a square matrix with the same
|
// input data matrix x. cov should either be a square matrix with the same
|
||||||
// number of columns as the input data matrix x, or nil in which case a new
|
// number of columns as the input data matrix x, or nil in which case a new
|
||||||
// Dense matrix will be constructed.
|
// Dense matrix will be constructed. Weights cannot be negative.
|
||||||
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
|
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
|
||||||
// This is the matrix version of the two-pass algorithm. It doesn't use the
|
// This is the matrix version of the two-pass algorithm. It doesn't use the
|
||||||
// additional floating point error correction that the Covariance function uses
|
// additional floating point error correction that the Covariance function uses
|
||||||
// to reduce the impact of rounding during centering.
|
// to reduce the impact of rounding during centering.
|
||||||
|
|
||||||
|
// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
|
||||||
|
// the returned type from a *mat.Dense to a *mat.Symmetric.
|
||||||
|
|
||||||
r, c := x.Dims()
|
r, c := x.Dims()
|
||||||
|
|
||||||
// TODO(jonlawlor): indicate that the resulting matrix is symmetric, which
|
|
||||||
// should improve performance.
|
|
||||||
if cov == nil {
|
if cov == nil {
|
||||||
cov = mat64.NewDense(c, c, nil)
|
cov = mat64.NewDense(c, c, nil)
|
||||||
} else if covr, covc := cov.Dims(); covr != covc || covc != c {
|
} else if covr, covc := cov.Dims(); covr != covc || covc != c {
|
||||||
@@ -38,35 +40,43 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De
|
|||||||
// Subtract the mean of each of the columns.
|
// Subtract the mean of each of the columns.
|
||||||
for i := 0; i < c; i++ {
|
for i := 0; i < c; i++ {
|
||||||
v := xt.RawRowView(i)
|
v := xt.RawRowView(i)
|
||||||
|
// This will panic with ErrShape if len(wts) != len(v), so
|
||||||
|
// we don't have to check the size later.
|
||||||
mean := Mean(v, wts)
|
mean := Mean(v, wts)
|
||||||
floats.AddConst(-mean, v)
|
floats.AddConst(-mean, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
var xc mat64.Dense
|
|
||||||
xc.TCopy(&xt)
|
|
||||||
|
|
||||||
var n float64
|
var n float64
|
||||||
if wts != nil {
|
if wts == nil {
|
||||||
if wr := len(wts); wr != r {
|
|
||||||
panic(mat64.ErrShape)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Weight the rows.
|
|
||||||
for i := 0; i < c; i++ {
|
|
||||||
v := xt.RawRowView(i)
|
|
||||||
floats.Mul(v, wts)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate the normalization factor.
|
|
||||||
n = floats.Sum(wts)
|
|
||||||
} else {
|
|
||||||
n = float64(r)
|
n = float64(r)
|
||||||
|
|
||||||
|
cov.MulTrans(&xt, false, &xt, true)
|
||||||
|
|
||||||
|
// Scale by the sample size.
|
||||||
|
cov.Scale(1/(n-1), cov)
|
||||||
|
return cov
|
||||||
}
|
}
|
||||||
|
|
||||||
cov.Mul(&xt, &xc)
|
// Multiply by the sqrt of the weights, so that multiplication is symmetric.
|
||||||
|
sqrtwts := make([]float64, r)
|
||||||
|
for i, w := range wts {
|
||||||
|
if w < 0 {
|
||||||
|
panic("stat: negative covariance matrix weights")
|
||||||
|
}
|
||||||
|
sqrtwts[i] = math.Sqrt(w)
|
||||||
|
}
|
||||||
|
// Weight the rows.
|
||||||
|
for i := 0; i < c; i++ {
|
||||||
|
v := xt.RawRowView(i)
|
||||||
|
floats.Mul(v, sqrtwts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the normalization factor.
|
||||||
|
n = floats.Sum(wts)
|
||||||
|
cov.MulTrans(&xt, false, &xt, true)
|
||||||
|
|
||||||
// Scale by the sample size.
|
// Scale by the sample size.
|
||||||
cov.Scale(1/(n-1), cov)
|
cov.Scale(1/(n-1), cov)
|
||||||
|
|
||||||
return cov
|
return cov
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,10 +65,10 @@ func TestCovarianceMatrix(t *testing.T) {
|
|||||||
t.Errorf("%d: expected cov %v, found %v", i, test.ans, c)
|
t.Errorf("%d: expected cov %v, found %v", i, test.ans, c)
|
||||||
}
|
}
|
||||||
if !floats.Equal(d, r.Data) {
|
if !floats.Equal(d, r.Data) {
|
||||||
t.Errorf("%d: data was modified during execution")
|
t.Errorf("%d: data was modified during execution", i)
|
||||||
}
|
}
|
||||||
if !floats.Equal(w, test.weights) {
|
if !floats.Equal(w, test.weights) {
|
||||||
t.Errorf("%d: weights was modified during execution")
|
t.Errorf("%d: weights was modified during execution", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// compare with call to Covariance
|
// compare with call to Covariance
|
||||||
@@ -91,7 +91,9 @@ func TestCovarianceMatrix(t *testing.T) {
|
|||||||
if !Panics(func() { CovarianceMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) {
|
if !Panics(func() { CovarianceMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) {
|
||||||
t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch")
|
t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch")
|
||||||
}
|
}
|
||||||
|
if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
|
||||||
|
t.Errorf("CovarianceMatrix did not panic with negative weights")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// benchmarks
|
// benchmarks
|
||||||
@@ -110,6 +112,17 @@ func benchmarkCovarianceMatrix(b *testing.B, m mat64.Matrix) {
|
|||||||
CovarianceMatrix(nil, m, nil)
|
CovarianceMatrix(nil, m, nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func benchmarkCovarianceMatrixWeighted(b *testing.B, m mat64.Matrix) {
|
||||||
|
r, _ := m.Dims()
|
||||||
|
wts := make([]float64, r)
|
||||||
|
for i := range wts {
|
||||||
|
wts[i] = 0.5
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
CovarianceMatrix(nil, m, wts)
|
||||||
|
}
|
||||||
|
}
|
||||||
func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) {
|
func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) {
|
||||||
_, c := m.Dims()
|
_, c := m.Dims()
|
||||||
res := mat64.NewDense(c, c, nil)
|
res := mat64.NewDense(c, c, nil)
|
||||||
@@ -153,6 +166,40 @@ func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) {
|
|||||||
benchmarkCovarianceMatrix(b, x)
|
benchmarkCovarianceMatrix(b, x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCovarianceMatrixSmallxSmallWeighted(b *testing.B) {
|
||||||
|
// 10 * 10 elements
|
||||||
|
x := randMat(small, small)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
func BenchmarkCovarianceMatrixSmallxMediumWeighted(b *testing.B) {
|
||||||
|
// 10 * 1000 elements
|
||||||
|
x := randMat(small, medium)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCovarianceMatrixMediumxSmallWeighted(b *testing.B) {
|
||||||
|
// 1000 * 10 elements
|
||||||
|
x := randMat(medium, small)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
func BenchmarkCovarianceMatrixMediumxMediumWeighted(b *testing.B) {
|
||||||
|
// 1000 * 1000 elements
|
||||||
|
x := randMat(medium, medium)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCovarianceMatrixLargexSmallWeighted(b *testing.B) {
|
||||||
|
// 1e5 * 10 elements
|
||||||
|
x := randMat(large, small)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCovarianceMatrixHugexSmallWeighted(b *testing.B) {
|
||||||
|
// 1e7 * 10 elements
|
||||||
|
x := randMat(huge, small)
|
||||||
|
benchmarkCovarianceMatrixWeighted(b, x)
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) {
|
func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) {
|
||||||
// 10 * 10 elements
|
// 10 * 10 elements
|
||||||
x := randMat(small, small)
|
x := randMat(small, small)
|
||||||
|
|||||||
Reference in New Issue
Block a user