From c69ec6cd62aa35c044c7bf237753f5bf7ddd6041 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Thu, 6 Nov 2014 23:00:27 -0500 Subject: [PATCH] Simplify covariance sig Changed covariance to remove the need to supply the means. Also implemented the corrected two-pass method to estimate the covariance. --- stat.go | 49 ++++++++++++++++++++++++++++++++----------------- stat_test.go | 13 +++++-------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/stat.go b/stat.go index 591913fb..1d78cfaa 100644 --- a/stat.go +++ b/stat.go @@ -130,38 +130,53 @@ func ChiSquare(obs, exp []float64) float64 { // The lengths of x and y must be equal. If weights is nil then all of the // weights are 1. If weights is not nil, then len(x) must equal len(weights). func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 { - return Covariance(x, meanX, y, meanY, weights) / (stdX * stdY) + return Covariance(x, y, weights) / (stdX * stdY) } -// Covariance returns the weighted covariance between the samples of x and y -// with the given means. +// Covariance returns the weighted covariance between the samples of x and y. // sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1) // The lengths of x and y must be equal. If weights is nil then all of the // weights are 1. If weights is not nil, then len(x) must equal len(weights). -func Covariance(x []float64, meanX float64, y []float64, meanY float64, weights []float64) float64 { +func Covariance(x []float64, y []float64, weights []float64) float64 { + + // don't have a paper for this, but the unweighted adaptation seems natural. + // The weighted version doesn't perform a correction. It seemed like the + // performance would suffer too much. + if len(x) != len(y) { panic("stat: slice length mismatch") } + xu := Mean(x, weights) + yu := Mean(y, weights) + if weights == nil { - var s float64 - for i, v := range x { - s += (v - meanX) * (y[i] - meanY) + var ( + ss float64 + xcompensation float64 + ycompensation float64 + ) + + for i, xv := range x { + yv := y[i] + xd := xv - xu + yd := yv - yu + ss += xd * yd + xcompensation += xd + ycompensation += yd } - s /= float64(len(x) - 1) - return s - } - if len(weights) != len(x) { - panic("stat: slice length mismatch") + return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1) } var ( - s float64 + ss float64 sumWeights float64 ) - for i, v := range x { - s += weights[i] * (v - meanX) * (y[i] - meanY) - sumWeights += weights[i] + + for i, xv := range x { + w := weights[i] + ss += w * (xv - xu) * (y[i] - yu) + sumWeights += w } - return s / (sumWeights - 1) + return ss / (sumWeights - 1) } // CrossEntropy computes the cross-entropy between the two distributions specified diff --git a/stat_test.go b/stat_test.go index d7838aae..428abebf 100644 --- a/stat_test.go +++ b/stat_test.go @@ -98,14 +98,11 @@ func ExampleCovariance() { fmt.Println("about their mean.") x := []float64{8, -3, 7, 8, -4} y := []float64{10, 2, 2, 4, 1} - meanX := Mean(x, nil) - meanY := Mean(y, nil) - cov := Covariance(x, meanX, y, meanY, nil) + cov := Covariance(x, y, nil) fmt.Printf("Cov = %.4f\n", cov) fmt.Println("If datasets move perfectly together, the variance equals the covariance") y2 := []float64{12, 1, 11, 12, 0} - meanY2 := Mean(y2, nil) - cov2 := Covariance(x, meanX, y2, meanY2, nil) + cov2 := Covariance(x, y2, nil) varX := Variance(x, nil) fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX) // Output: @@ -145,17 +142,17 @@ func TestCovariance(t *testing.T) { ans: 3.2, }, } { - c := Covariance(test.p, Mean(test.p, test.weights), test.q, Mean(test.q, test.weights), test.weights) + c := Covariance(test.p, test.q, test.weights) if math.Abs(c-test.ans) > 1e-14 { t.Errorf("Covariance mismatch case %d: Expected %v, Found %v", i, test.ans, c) } } // test the panic states - if !Panics(func() { Covariance(make([]float64, 2), 0.0, make([]float64, 3), 0.0, nil) }) { + if !Panics(func() { Covariance(make([]float64, 2), make([]float64, 3), nil) }) { t.Errorf("Covariance did not panic with x, y length mismatch") } - if !Panics(func() { Covariance(make([]float64, 3), 0.0, make([]float64, 3), 0.0, make([]float64, 2)) }) { + if !Panics(func() { Covariance(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) { t.Errorf("Covariance did not panic with x, weights length mismatch") }