diff --git a/covariancematrix.go b/covariancematrix.go index c3ee2671..706ab5ce 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -12,13 +12,13 @@ import ( // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass -// algorithm. The matrix returned will be symmetric, square, and -// positive-semidefinite. +// algorithm. The matrix returned will be symmetric and square. // // The weights wts should have the length equal to the number of rows in -// input data matrix x. cov should either be a square matrix with the same -// number of columns as the input data matrix x, or nil in which case a new -// Dense matrix will be constructed. Weights cannot be negative. +// input data matrix x. If c is nil, then a new matrix with appropriate size will +// be constructed. If c is not nil, it should be a square matrix with the same +// number of columns as the input data matrix x, and it will be used as the receiver +// for the covariance data. Weights cannot be negative. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses @@ -80,3 +80,75 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De cov.Scale(1/(n-1), cov) return cov } + +// CorrelationMatrix calculates a correlation matrix from a matrix of data, +// using a two-pass algorithm. The matrix returned will be symmetric and square. +// +// The weights wts should have the length equal to the number of rows in +// input data matrix x. If c is nil, then a new matrix with appropriate size will +// be constructed. If c is not nil, it should be a square matrix with the same +// number of columns as the input data matrix x, and it will be used as the receiver +// for the correlation data. Weights cannot be negative. +func CorrelationMatrix(c *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { + + // TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change + // the returned type from a *mat.Dense to a *mat.Symmetric. + + // This will panic if the sizes don't match, or if wts is the wrong size. + c = CovarianceMatrix(c, x, wts) + covToCorr(c) + return c +} + +// covToCorr converts a covariance matrix to a correlation matrix. +func covToCorr(c *mat64.Dense) { + + // TODO(jonlawlor): use a *mat64.Symmetric as input. + + r, _ := c.Dims() + + s := make([]float64, r) + for i := 0; i < r; i++ { + s[i] = 1 / math.Sqrt(c.At(i, i)) + } + for i, sx := range s { + row := c.RawRowView(i) + for j, sy := range s { + if i == j { + // Ensure that the diagonal has exactly ones. + row[j] = 1 + continue + } + row[j] *= sx + row[j] *= sy + } + } +} + +// corrToCov converts a correlation matrix to a covariance matrix. +// The input sigma should be vector of standard deviations corresponding +// to the covariance. It will panic if len(sigma) is not equal to the +// number of rows in the correlation matrix. +func corrToCov(c *mat64.Dense, sigma []float64) { + + // TODO(jonlawlor): use a *mat64.Symmetric as input. + + r, _ := c.Dims() + + if r != len(sigma) { + panic(mat64.ErrShape) + } + + for i, sx := range sigma { + row := c.RawRowView(i) + for j, sy := range sigma { + if i == j { + // Ensure that the diagonal has exactly sigma squared. + row[j] = sx * sx + continue + } + row[j] *= sx + row[j] *= sy + } + } +} diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 75b037b5..ad94468c 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -96,6 +96,165 @@ func TestCovarianceMatrix(t *testing.T) { } } +func TestCorrelationMatrix(t *testing.T) { + for i, test := range []struct { + data *mat64.Dense + weights []float64 + ans *mat64.Dense + }{ + { + data: mat64.NewDense(3, 3, []float64{ + 1, 2, 3, + 3, 4, 5, + 5, 6, 7, + }), + weights: nil, + ans: mat64.NewDense(3, 3, []float64{ + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + }), + }, + { + data: mat64.NewDense(5, 2, []float64{ + -2, -4, + -1, 2, + 0, 0, + 1, -2, + 2, 4, + }), + weights: nil, + ans: mat64.NewDense(2, 2, []float64{ + 1, 0.6, + 0.6, 1, + }), + }, { + data: mat64.NewDense(3, 2, []float64{ + 1, 1, + 2, 4, + 3, 9, + }), + weights: []float64{ + 1, + 1.5, + 1, + }, + ans: mat64.NewDense(2, 2, []float64{ + 1, 0.9868703275903379, + 0.9868703275903379, 1, + }), + }, + } { + // Make a copy of the data to check that it isn't changing. + r := test.data.RawMatrix() + d := make([]float64, len(r.Data)) + copy(d, r.Data) + + w := make([]float64, len(test.weights)) + if test.weights != nil { + copy(w, test.weights) + } + c := CorrelationMatrix(nil, test.data, test.weights) + if !c.Equals(test.ans) { + t.Errorf("%d: expected corr %v, found %v", i, test.ans, c) + } + if !floats.Equal(d, r.Data) { + t.Errorf("%d: data was modified during execution", i) + } + if !floats.Equal(w, test.weights) { + t.Errorf("%d: weights was modified during execution", i) + } + + // compare with call to Covariance + _, cols := c.Dims() + for ci := 0; ci < cols; ci++ { + for cj := 0; cj < cols; cj++ { + x := test.data.Col(nil, ci) + y := test.data.Col(nil, cj) + corr := Correlation(x, y, test.weights) + if math.Abs(corr-c.At(ci, cj)) > 1e-14 { + t.Errorf("CorrMat does not match at (%v, %v). Want %v, got %v.", ci, cj, corr, c.At(ci, cj)) + } + } + } + + } + if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) { + t.Errorf("CorrelationMatrix did not panic with weight size mismatch") + } + if !Panics(func() { CorrelationMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) { + t.Errorf("CorrelationMatrix did not panic with preallocation size mismatch") + } + if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) { + t.Errorf("CorrelationMatrix did not panic with negative weights") + } +} + +func TestCorrCov(t *testing.T) { + // test both Cov2Corr and Cov2Corr + for i, test := range []struct { + data *mat64.Dense + weights []float64 + }{ + { + data: mat64.NewDense(3, 3, []float64{ + 1, 2, 3, + 3, 4, 5, + 5, 6, 7, + }), + weights: nil, + }, + { + data: mat64.NewDense(5, 2, []float64{ + -2, -4, + -1, 2, + 0, 0, + 1, -2, + 2, 4, + }), + weights: nil, + }, { + data: mat64.NewDense(3, 2, []float64{ + 1, 1, + 2, 4, + 3, 9, + }), + weights: []float64{ + 1, + 1.5, + 1, + }, + }, + } { + corr := CorrelationMatrix(nil, test.data, test.weights) + cov := CovarianceMatrix(nil, test.data, test.weights) + + r, _ := cov.Dims() + + // Get the diagonal elements from cov to determine the sigmas. + sigmas := make([]float64, r) + for i := range sigmas { + sigmas[i] = math.Sqrt(cov.At(i, i)) + } + + covFromCorr := mat64.DenseCopyOf(corr) + corrToCov(covFromCorr, sigmas) + corrFromCov := mat64.DenseCopyOf(cov) + covToCorr(corrFromCov) + + if !corr.EqualsApprox(corrFromCov, 1e-14) { + t.Errorf("%d: corrToCov did not match direct Correlation calculation. Want: %v, got: %v. ", i, corr, corrFromCov) + } + if !cov.EqualsApprox(covFromCorr, 1e-14) { + t.Errorf("%d: covToCorr did not match direct Covariance calculation. Want: %v, got: %v. ", i, cov, covFromCorr) + } + + if !Panics(func() { corrToCov(mat64.NewDense(2, 2, nil), []float64{}) }) { + t.Errorf("CorrelationMatrix did not panic with sigma size mismatch") + } + } +} + // benchmarks func randMat(r, c int) mat64.Matrix { @@ -233,3 +392,33 @@ func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) { x := randMat(huge, small) benchmarkCovarianceMatrixInPlace(b, x) } + +func BenchmarkCovToCorr(b *testing.B) { + // generate a 10x10 covariance matrix + m := randMat(small, small) + c := CovarianceMatrix(nil, m, nil) + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + cc := mat64.DenseCopyOf(c) + b.StartTimer() + covToCorr(cc) + } +} + +func BenchmarkCorrToCov(b *testing.B) { + // generate a 10x10 correlation matrix + m := randMat(small, small) + c := CorrelationMatrix(nil, m, nil) + sigma := make([]float64, small) + for i := range sigma { + sigma[i] = 2 + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + cc := mat64.DenseCopyOf(c) + b.StartTimer() + corrToCov(cc, sigma) + } +}