Change CovarianceMatrix and CorrelationMatrix to use *SymDense instead of *Dense

This commit is contained in:
btracey
2015-09-29 00:23:11 -06:00
parent 68c5c3f393
commit b2b470fdc0
2 changed files with 57 additions and 74 deletions

View File

@@ -13,26 +13,23 @@ import (
// CovarianceMatrix calculates a covariance matrix (also known as a
// variance-covariance matrix) from a matrix of data, using a two-pass
// algorithm. The matrix returned will be symmetric and square.
// algorithm.
//
// The weights wts should have the length equal to the number of rows in
// input data matrix x. If c is nil, then a new matrix with appropriate size will
// be constructed. If c is not nil, it should be a square matrix with the same
// number of columns as the input data matrix x, and it will be used as the receiver
// for the covariance data. Weights cannot be negative.
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
// The weights must have length equal to the number of rows in
// input data matrix x. If cov is nil, then a new matrix with appropriate size will
// be constructed. If cov is not nil, it should have the same number of columns as the
// input data matrix x, and it will be used as the destination for the covariance
// data. Weights must not be negative.
func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
// This is the matrix version of the two-pass algorithm. It doesn't use the
// additional floating point error correction that the Covariance function uses
// to reduce the impact of rounding during centering.
// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
// the returned type from a *mat.Dense to a *mat.Symmetric.
r, c := x.Dims()
if cov == nil {
cov = mat64.NewDense(c, c, nil)
} else if covr, covc := cov.Dims(); covr != covc || covc != c {
cov = mat64.NewSymDense(c, nil)
} else if n := cov.Symmetric(); n != c {
panic(mat64.ErrShape)
}
@@ -41,27 +38,27 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De
// Subtract the mean of each of the columns.
for i := 0; i < c; i++ {
v := xt.RawRowView(i)
// This will panic with ErrShape if len(wts) != len(v), so
// This will panic with ErrShape if len(weights) != len(v), so
// we don't have to check the size later.
mean := Mean(v, wts)
mean := Mean(v, weights)
floats.AddConst(-mean, v)
}
var n float64
if wts == nil {
if weights == nil {
n = float64(r)
cov.Mul(&xt, (&xt).T())
cov.SymOuterK(&xt)
// Scale by the sample size.
cov.Scale(1/(n-1), cov)
cov.ScaleSym(1/(n-1), cov)
return cov
}
// Multiply by the sqrt of the weights, so that multiplication is symmetric.
sqrtwts := make([]float64, r)
for i, w := range wts {
for i, w := range weights {
if w < 0 {
panic("stat: negative covariance matrix weights")
}
@@ -74,54 +71,43 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De
}
// Calculate the normalization factor.
n = floats.Sum(wts)
cov.Mul(&xt, (&xt).T())
n = floats.Sum(weights)
cov.SymOuterK(&xt)
// Scale by the sample size.
cov.Scale(1/(n-1), cov)
cov.ScaleSym(1/(n-1), cov)
return cov
}
// CorrelationMatrix calculates a correlation matrix from a matrix of data,
// using a two-pass algorithm. The matrix returned will be symmetric and square.
// CorrelationMatrix calculates a correlation matrix from a matrix of data
// using a two-pass algorithm.
//
// The weights wts should have the length equal to the number of rows in
// input data matrix x. If c is nil, then a new matrix with appropriate size will
// be constructed. If c is not nil, it should be a square matrix with the same
// number of columns as the input data matrix x, and it will be used as the receiver
// for the correlation data. Weights cannot be negative.
func CorrelationMatrix(c *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
// the returned type from a *mat.Dense to a *mat.Symmetric.
// This will panic if the sizes don't match, or if wts is the wrong size.
c = CovarianceMatrix(c, x, wts)
covToCorr(c)
return c
// The weights must have length equal to the number of rows in
// input data matrix x. If corr is nil, then a new matrix with appropriate size will
// be constructed. If corr is not nil, it should have the same number of columns
// as the input data matrix x, and it will be used as the destination for the
// correlation data. Weights must not be negative.
func CorrelationMatrix(corr *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
// This will panic if the sizes don't match, or if weights is the wrong size.
corr = CovarianceMatrix(corr, x, weights)
covToCorr(corr)
return corr
}
// covToCorr converts a covariance matrix to a correlation matrix.
func covToCorr(c *mat64.Dense) {
// TODO(jonlawlor): use a *mat64.Symmetric as input.
r, _ := c.Dims()
func covToCorr(c *mat64.SymDense) {
r := c.Symmetric()
s := make([]float64, r)
for i := 0; i < r; i++ {
s[i] = 1 / math.Sqrt(c.At(i, i))
}
for i, sx := range s {
row := c.RawRowView(i)
for j, sy := range s {
if i == j {
// Ensure that the diagonal has exactly ones.
row[j] = 1
continue
}
row[j] *= sx
row[j] *= sy
// Ensure that the diagonal has exactly ones.
c.SetSym(i, i, 1)
for j := i + 1; j < r; j++ {
v := c.At(i, j)
c.SetSym(i, j, v*sx*s[j])
}
}
}
@@ -130,26 +116,18 @@ func covToCorr(c *mat64.Dense) {
// The input sigma should be vector of standard deviations corresponding
// to the covariance. It will panic if len(sigma) is not equal to the
// number of rows in the correlation matrix.
func corrToCov(c *mat64.Dense, sigma []float64) {
// TODO(jonlawlor): use a *mat64.Symmetric as input.
func corrToCov(c *mat64.SymDense, sigma []float64) {
r, _ := c.Dims()
if r != len(sigma) {
panic(mat64.ErrShape)
}
for i, sx := range sigma {
row := c.RawRowView(i)
for j, sy := range sigma {
if i == j {
// Ensure that the diagonal has exactly sigma squared.
row[j] = sx * sx
continue
}
row[j] *= sx
row[j] *= sy
// Ensure that the diagonal has exactly sigma squared.
c.SetSym(i, i, sx*sx)
for j := i + 1; j < r; j++ {
v := c.At(i, j)
c.SetSym(i, j, v*sx*sigma[j])
}
}
}

View File

@@ -88,7 +88,7 @@ func TestCovarianceMatrix(t *testing.T) {
if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) {
t.Errorf("CovarianceMatrix did not panic with weight size mismatch")
}
if !Panics(func() { CovarianceMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) {
if !Panics(func() { CovarianceMatrix(mat64.NewSymDense(1, nil), mat64.NewDense(5, 2, nil), nil) }) {
t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch")
}
if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
@@ -182,7 +182,7 @@ func TestCorrelationMatrix(t *testing.T) {
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) {
t.Errorf("CorrelationMatrix did not panic with weight size mismatch")
}
if !Panics(func() { CorrelationMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) {
if !Panics(func() { CorrelationMatrix(mat64.NewSymDense(1, nil), mat64.NewDense(5, 2, nil), nil) }) {
t.Errorf("CorrelationMatrix did not panic with preallocation size mismatch")
}
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
@@ -229,7 +229,7 @@ func TestCorrCov(t *testing.T) {
corr := CorrelationMatrix(nil, test.data, test.weights)
cov := CovarianceMatrix(nil, test.data, test.weights)
r, _ := cov.Dims()
r := cov.Symmetric()
// Get the diagonal elements from cov to determine the sigmas.
sigmas := make([]float64, r)
@@ -237,9 +237,12 @@ func TestCorrCov(t *testing.T) {
sigmas[i] = math.Sqrt(cov.At(i, i))
}
covFromCorr := mat64.DenseCopyOf(corr)
covFromCorr := mat64.NewSymDense(corr.Symmetric(), nil)
covFromCorr.CopySym(corr)
corrToCov(covFromCorr, sigmas)
corrFromCov := mat64.DenseCopyOf(cov)
corrFromCov := mat64.NewSymDense(cov.Symmetric(), nil)
corrFromCov.CopySym(cov)
covToCorr(corrFromCov)
if !mat64.EqualApprox(corr, corrFromCov, 1e-14) {
@@ -249,7 +252,7 @@ func TestCorrCov(t *testing.T) {
t.Errorf("%d: covToCorr did not match direct Covariance calculation. Want: %v, got: %v. ", i, cov, covFromCorr)
}
if !Panics(func() { corrToCov(mat64.NewDense(2, 2, nil), []float64{}) }) {
if !Panics(func() { corrToCov(mat64.NewSymDense(2, nil), []float64{}) }) {
t.Errorf("CorrelationMatrix did not panic with sigma size mismatch")
}
}
@@ -284,7 +287,7 @@ func benchmarkCovarianceMatrixWeighted(b *testing.B, m mat64.Matrix) {
}
func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) {
_, c := m.Dims()
res := mat64.NewDense(c, c, nil)
res := mat64.NewSymDense(c, nil)
b.ResetTimer()
for i := 0; i < b.N; i++ {
CovarianceMatrix(res, m, nil)
@@ -397,10 +400,11 @@ func BenchmarkCovToCorr(b *testing.B) {
// generate a 10x10 covariance matrix
m := randMat(small, small)
c := CovarianceMatrix(nil, m, nil)
cc := mat64.NewSymDense(c.Symmetric(), nil)
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
cc := mat64.DenseCopyOf(c)
cc.CopySym(c)
b.StartTimer()
covToCorr(cc)
}
@@ -410,6 +414,7 @@ func BenchmarkCorrToCov(b *testing.B) {
// generate a 10x10 correlation matrix
m := randMat(small, small)
c := CorrelationMatrix(nil, m, nil)
cc := mat64.NewSymDense(c.Symmetric(), nil)
sigma := make([]float64, small)
for i := range sigma {
sigma[i] = 2
@@ -417,7 +422,7 @@ func BenchmarkCorrToCov(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
cc := mat64.DenseCopyOf(c)
cc.CopySym(c)
b.StartTimer()
corrToCov(cc, sigma)
}