mirror of
https://github.com/gonum/gonum.git
synced 2025-10-28 17:31:56 +08:00
Add CorrelationMatrix and Cov2Corr functions
This change includes a function to calculate correlation the correlation matrix of input data, and unexported functions which can convert between covariance matrices and correlation matrices. There are also tests for CorrelationMatrix, and benchmarks for the conversion functions.
This commit is contained in:
@@ -12,13 +12,13 @@ import (
|
|||||||
|
|
||||||
// CovarianceMatrix calculates a covariance matrix (also known as a
|
// CovarianceMatrix calculates a covariance matrix (also known as a
|
||||||
// variance-covariance matrix) from a matrix of data, using a two-pass
|
// variance-covariance matrix) from a matrix of data, using a two-pass
|
||||||
// algorithm. The matrix returned will be symmetric, square, and
|
// algorithm. The matrix returned will be symmetric and square.
|
||||||
// positive-semidefinite.
|
|
||||||
//
|
//
|
||||||
// The weights wts should have the length equal to the number of rows in
|
// The weights wts should have the length equal to the number of rows in
|
||||||
// input data matrix x. cov should either be a square matrix with the same
|
// input data matrix x. If c is nil, then a new matrix with appropriate size will
|
||||||
// number of columns as the input data matrix x, or nil in which case a new
|
// be constructed. If c is not nil, it should be a square matrix with the same
|
||||||
// Dense matrix will be constructed. Weights cannot be negative.
|
// number of columns as the input data matrix x, and it will be used as the receiver
|
||||||
|
// for the covariance data. Weights cannot be negative.
|
||||||
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
|
func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
|
||||||
// This is the matrix version of the two-pass algorithm. It doesn't use the
|
// This is the matrix version of the two-pass algorithm. It doesn't use the
|
||||||
// additional floating point error correction that the Covariance function uses
|
// additional floating point error correction that the Covariance function uses
|
||||||
@@ -80,3 +80,75 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De
|
|||||||
cov.Scale(1/(n-1), cov)
|
cov.Scale(1/(n-1), cov)
|
||||||
return cov
|
return cov
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CorrelationMatrix calculates a correlation matrix from a matrix of data,
|
||||||
|
// using a two-pass algorithm. The matrix returned will be symmetric and square.
|
||||||
|
//
|
||||||
|
// The weights wts should have the length equal to the number of rows in
|
||||||
|
// input data matrix x. If c is nil, then a new matrix with appropriate size will
|
||||||
|
// be constructed. If c is not nil, it should be a square matrix with the same
|
||||||
|
// number of columns as the input data matrix x, and it will be used as the receiver
|
||||||
|
// for the correlation data. Weights cannot be negative.
|
||||||
|
func CorrelationMatrix(c *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense {
|
||||||
|
|
||||||
|
// TODO(jonlawlor): indicate that the resulting matrix is symmetric, and change
|
||||||
|
// the returned type from a *mat.Dense to a *mat.Symmetric.
|
||||||
|
|
||||||
|
// This will panic if the sizes don't match, or if wts is the wrong size.
|
||||||
|
c = CovarianceMatrix(c, x, wts)
|
||||||
|
covToCorr(c)
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// covToCorr converts a covariance matrix to a correlation matrix.
|
||||||
|
func covToCorr(c *mat64.Dense) {
|
||||||
|
|
||||||
|
// TODO(jonlawlor): use a *mat64.Symmetric as input.
|
||||||
|
|
||||||
|
r, _ := c.Dims()
|
||||||
|
|
||||||
|
s := make([]float64, r)
|
||||||
|
for i := 0; i < r; i++ {
|
||||||
|
s[i] = 1 / math.Sqrt(c.At(i, i))
|
||||||
|
}
|
||||||
|
for i, sx := range s {
|
||||||
|
row := c.RawRowView(i)
|
||||||
|
for j, sy := range s {
|
||||||
|
if i == j {
|
||||||
|
// Ensure that the diagonal has exactly ones.
|
||||||
|
row[j] = 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
row[j] *= sx
|
||||||
|
row[j] *= sy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// corrToCov converts a correlation matrix to a covariance matrix.
|
||||||
|
// The input sigma should be vector of standard deviations corresponding
|
||||||
|
// to the covariance. It will panic if len(sigma) is not equal to the
|
||||||
|
// number of rows in the correlation matrix.
|
||||||
|
func corrToCov(c *mat64.Dense, sigma []float64) {
|
||||||
|
|
||||||
|
// TODO(jonlawlor): use a *mat64.Symmetric as input.
|
||||||
|
|
||||||
|
r, _ := c.Dims()
|
||||||
|
|
||||||
|
if r != len(sigma) {
|
||||||
|
panic(mat64.ErrShape)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, sx := range sigma {
|
||||||
|
row := c.RawRowView(i)
|
||||||
|
for j, sy := range sigma {
|
||||||
|
if i == j {
|
||||||
|
// Ensure that the diagonal has exactly sigma squared.
|
||||||
|
row[j] = sx * sx
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
row[j] *= sx
|
||||||
|
row[j] *= sy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -96,6 +96,165 @@ func TestCovarianceMatrix(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCorrelationMatrix(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
data *mat64.Dense
|
||||||
|
weights []float64
|
||||||
|
ans *mat64.Dense
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
data: mat64.NewDense(3, 3, []float64{
|
||||||
|
1, 2, 3,
|
||||||
|
3, 4, 5,
|
||||||
|
5, 6, 7,
|
||||||
|
}),
|
||||||
|
weights: nil,
|
||||||
|
ans: mat64.NewDense(3, 3, []float64{
|
||||||
|
1, 1, 1,
|
||||||
|
1, 1, 1,
|
||||||
|
1, 1, 1,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
data: mat64.NewDense(5, 2, []float64{
|
||||||
|
-2, -4,
|
||||||
|
-1, 2,
|
||||||
|
0, 0,
|
||||||
|
1, -2,
|
||||||
|
2, 4,
|
||||||
|
}),
|
||||||
|
weights: nil,
|
||||||
|
ans: mat64.NewDense(2, 2, []float64{
|
||||||
|
1, 0.6,
|
||||||
|
0.6, 1,
|
||||||
|
}),
|
||||||
|
}, {
|
||||||
|
data: mat64.NewDense(3, 2, []float64{
|
||||||
|
1, 1,
|
||||||
|
2, 4,
|
||||||
|
3, 9,
|
||||||
|
}),
|
||||||
|
weights: []float64{
|
||||||
|
1,
|
||||||
|
1.5,
|
||||||
|
1,
|
||||||
|
},
|
||||||
|
ans: mat64.NewDense(2, 2, []float64{
|
||||||
|
1, 0.9868703275903379,
|
||||||
|
0.9868703275903379, 1,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
// Make a copy of the data to check that it isn't changing.
|
||||||
|
r := test.data.RawMatrix()
|
||||||
|
d := make([]float64, len(r.Data))
|
||||||
|
copy(d, r.Data)
|
||||||
|
|
||||||
|
w := make([]float64, len(test.weights))
|
||||||
|
if test.weights != nil {
|
||||||
|
copy(w, test.weights)
|
||||||
|
}
|
||||||
|
c := CorrelationMatrix(nil, test.data, test.weights)
|
||||||
|
if !c.Equals(test.ans) {
|
||||||
|
t.Errorf("%d: expected corr %v, found %v", i, test.ans, c)
|
||||||
|
}
|
||||||
|
if !floats.Equal(d, r.Data) {
|
||||||
|
t.Errorf("%d: data was modified during execution", i)
|
||||||
|
}
|
||||||
|
if !floats.Equal(w, test.weights) {
|
||||||
|
t.Errorf("%d: weights was modified during execution", i)
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare with call to Covariance
|
||||||
|
_, cols := c.Dims()
|
||||||
|
for ci := 0; ci < cols; ci++ {
|
||||||
|
for cj := 0; cj < cols; cj++ {
|
||||||
|
x := test.data.Col(nil, ci)
|
||||||
|
y := test.data.Col(nil, cj)
|
||||||
|
corr := Correlation(x, y, test.weights)
|
||||||
|
if math.Abs(corr-c.At(ci, cj)) > 1e-14 {
|
||||||
|
t.Errorf("CorrMat does not match at (%v, %v). Want %v, got %v.", ci, cj, corr, c.At(ci, cj))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) {
|
||||||
|
t.Errorf("CorrelationMatrix did not panic with weight size mismatch")
|
||||||
|
}
|
||||||
|
if !Panics(func() { CorrelationMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) {
|
||||||
|
t.Errorf("CorrelationMatrix did not panic with preallocation size mismatch")
|
||||||
|
}
|
||||||
|
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
|
||||||
|
t.Errorf("CorrelationMatrix did not panic with negative weights")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCorrCov(t *testing.T) {
|
||||||
|
// test both Cov2Corr and Cov2Corr
|
||||||
|
for i, test := range []struct {
|
||||||
|
data *mat64.Dense
|
||||||
|
weights []float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
data: mat64.NewDense(3, 3, []float64{
|
||||||
|
1, 2, 3,
|
||||||
|
3, 4, 5,
|
||||||
|
5, 6, 7,
|
||||||
|
}),
|
||||||
|
weights: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
data: mat64.NewDense(5, 2, []float64{
|
||||||
|
-2, -4,
|
||||||
|
-1, 2,
|
||||||
|
0, 0,
|
||||||
|
1, -2,
|
||||||
|
2, 4,
|
||||||
|
}),
|
||||||
|
weights: nil,
|
||||||
|
}, {
|
||||||
|
data: mat64.NewDense(3, 2, []float64{
|
||||||
|
1, 1,
|
||||||
|
2, 4,
|
||||||
|
3, 9,
|
||||||
|
}),
|
||||||
|
weights: []float64{
|
||||||
|
1,
|
||||||
|
1.5,
|
||||||
|
1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
corr := CorrelationMatrix(nil, test.data, test.weights)
|
||||||
|
cov := CovarianceMatrix(nil, test.data, test.weights)
|
||||||
|
|
||||||
|
r, _ := cov.Dims()
|
||||||
|
|
||||||
|
// Get the diagonal elements from cov to determine the sigmas.
|
||||||
|
sigmas := make([]float64, r)
|
||||||
|
for i := range sigmas {
|
||||||
|
sigmas[i] = math.Sqrt(cov.At(i, i))
|
||||||
|
}
|
||||||
|
|
||||||
|
covFromCorr := mat64.DenseCopyOf(corr)
|
||||||
|
corrToCov(covFromCorr, sigmas)
|
||||||
|
corrFromCov := mat64.DenseCopyOf(cov)
|
||||||
|
covToCorr(corrFromCov)
|
||||||
|
|
||||||
|
if !corr.EqualsApprox(corrFromCov, 1e-14) {
|
||||||
|
t.Errorf("%d: corrToCov did not match direct Correlation calculation. Want: %v, got: %v. ", i, corr, corrFromCov)
|
||||||
|
}
|
||||||
|
if !cov.EqualsApprox(covFromCorr, 1e-14) {
|
||||||
|
t.Errorf("%d: covToCorr did not match direct Covariance calculation. Want: %v, got: %v. ", i, cov, covFromCorr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !Panics(func() { corrToCov(mat64.NewDense(2, 2, nil), []float64{}) }) {
|
||||||
|
t.Errorf("CorrelationMatrix did not panic with sigma size mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// benchmarks
|
// benchmarks
|
||||||
|
|
||||||
func randMat(r, c int) mat64.Matrix {
|
func randMat(r, c int) mat64.Matrix {
|
||||||
@@ -233,3 +392,33 @@ func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) {
|
|||||||
x := randMat(huge, small)
|
x := randMat(huge, small)
|
||||||
benchmarkCovarianceMatrixInPlace(b, x)
|
benchmarkCovarianceMatrixInPlace(b, x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCovToCorr(b *testing.B) {
|
||||||
|
// generate a 10x10 covariance matrix
|
||||||
|
m := randMat(small, small)
|
||||||
|
c := CovarianceMatrix(nil, m, nil)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
b.StopTimer()
|
||||||
|
cc := mat64.DenseCopyOf(c)
|
||||||
|
b.StartTimer()
|
||||||
|
covToCorr(cc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCorrToCov(b *testing.B) {
|
||||||
|
// generate a 10x10 correlation matrix
|
||||||
|
m := randMat(small, small)
|
||||||
|
c := CorrelationMatrix(nil, m, nil)
|
||||||
|
sigma := make([]float64, small)
|
||||||
|
for i := range sigma {
|
||||||
|
sigma[i] = 2
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
b.StopTimer()
|
||||||
|
cc := mat64.DenseCopyOf(c)
|
||||||
|
b.StartTimer()
|
||||||
|
corrToCov(cc, sigma)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user