From c69ec6cd62aa35c044c7bf237753f5bf7ddd6041 Mon Sep 17 00:00:00 2001
From: Jonathan J Lawlor <jonathan.lawlor@gmail.com>
Date: Thu, 6 Nov 2014 23:00:27 -0500
Subject: [PATCH] Simplify covariance sig

Changed covariance to remove the need to supply the means.  Also
implemented the corrected two-pass method to estimate the covariance.
---
 stat.go      | 49 ++++++++++++++++++++++++++++++++-----------------
 stat_test.go | 13 +++++--------
 2 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/stat.go b/stat.go
index 591913fb..1d78cfaa 100644
--- a/stat.go
+++ b/stat.go
@@ -130,38 +130,53 @@ func ChiSquare(obs, exp []float64) float64 {
 // The lengths of x and y must be equal. If weights is nil then all of the
 // weights are 1. If weights is not nil, then len(x) must equal len(weights).
 func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 {
-	return Covariance(x, meanX, y, meanY, weights) / (stdX * stdY)
+	return Covariance(x, y, weights) / (stdX * stdY)
 }
 
-// Covariance returns the weighted covariance between the samples of x and y
-// with the given means.
+// Covariance returns the weighted covariance between the samples of x and y.
 //  sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
 // The lengths of x and y must be equal. If weights is nil then all of the
 // weights are 1. If weights is not nil, then len(x) must equal len(weights).
-func Covariance(x []float64, meanX float64, y []float64, meanY float64, weights []float64) float64 {
+func Covariance(x []float64, y []float64, weights []float64) float64 {
+
+	// don't have a paper for this, but the unweighted adaptation seems natural.
+	// The weighted version doesn't perform a correction.  It seemed like the
+	// performance would suffer too much.
+
 	if len(x) != len(y) {
 		panic("stat: slice length mismatch")
 	}
+	xu := Mean(x, weights)
+	yu := Mean(y, weights)
+
 	if weights == nil {
-		var s float64
-		for i, v := range x {
-			s += (v - meanX) * (y[i] - meanY)
+		var (
+			ss            float64
+			xcompensation float64
+			ycompensation float64
+		)
+
+		for i, xv := range x {
+			yv := y[i]
+			xd := xv - xu
+			yd := yv - yu
+			ss += xd * yd
+			xcompensation += xd
+			ycompensation += yd
 		}
-		s /= float64(len(x) - 1)
-		return s
-	}
-	if len(weights) != len(x) {
-		panic("stat: slice length mismatch")
+		return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1)
 	}
 	var (
-		s          float64
+		ss         float64
 		sumWeights float64
 	)
-	for i, v := range x {
-		s += weights[i] * (v - meanX) * (y[i] - meanY)
-		sumWeights += weights[i]
+
+	for i, xv := range x {
+		w := weights[i]
+		ss += w * (xv - xu) * (y[i] - yu)
+		sumWeights += w
 	}
-	return s / (sumWeights - 1)
+	return ss / (sumWeights - 1)
 }
 
 // CrossEntropy computes the cross-entropy between the two distributions specified
diff --git a/stat_test.go b/stat_test.go
index d7838aae..428abebf 100644
--- a/stat_test.go
+++ b/stat_test.go
@@ -98,14 +98,11 @@ func ExampleCovariance() {
 	fmt.Println("about their mean.")
 	x := []float64{8, -3, 7, 8, -4}
 	y := []float64{10, 2, 2, 4, 1}
-	meanX := Mean(x, nil)
-	meanY := Mean(y, nil)
-	cov := Covariance(x, meanX, y, meanY, nil)
+	cov := Covariance(x, y, nil)
 	fmt.Printf("Cov = %.4f\n", cov)
 	fmt.Println("If datasets move perfectly together, the variance equals the covariance")
 	y2 := []float64{12, 1, 11, 12, 0}
-	meanY2 := Mean(y2, nil)
-	cov2 := Covariance(x, meanX, y2, meanY2, nil)
+	cov2 := Covariance(x, y2, nil)
 	varX := Variance(x, nil)
 	fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
 	// Output:
@@ -145,17 +142,17 @@ func TestCovariance(t *testing.T) {
 			ans:     3.2,
 		},
 	} {
-		c := Covariance(test.p, Mean(test.p, test.weights), test.q, Mean(test.q, test.weights), test.weights)
+		c := Covariance(test.p, test.q, test.weights)
 		if math.Abs(c-test.ans) > 1e-14 {
 			t.Errorf("Covariance mismatch case %d: Expected %v, Found %v", i, test.ans, c)
 		}
 	}
 
 	// test the panic states
-	if !Panics(func() { Covariance(make([]float64, 2), 0.0, make([]float64, 3), 0.0, nil) }) {
+	if !Panics(func() { Covariance(make([]float64, 2), make([]float64, 3), nil) }) {
 		t.Errorf("Covariance did not panic with x, y length mismatch")
 	}
-	if !Panics(func() { Covariance(make([]float64, 3), 0.0, make([]float64, 3), 0.0, make([]float64, 2)) }) {
+	if !Panics(func() { Covariance(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
 		t.Errorf("Covariance did not panic with x, weights length mismatch")
 	}