implement new MeanVariance and Variance

Simplified Variance by removing the input mean from the sig, and implemented a MeanVariance function which calculates the corrected two-pass corrected sample variance. It also calculates the mean, so it returns that as well, which should save some calculation under most cases.
2025-10-19 21:44:41 +08:00 · 2014-11-06 20:40:10 -05:00
parent fc395c7961
commit 1d2d682785
2 changed files with 49 additions and 30 deletions
--- a/stat.go
+++ b/stat.go
@@ -768,7 +768,7 @@ func (w weightSorter) Len() int {

 // StdDev returns the population standard deviation with the provided mean.
 func StdDev(x []float64, mean float64, weights []float64) float64 {
-	return math.Sqrt(Variance(x, mean, weights))
+	return math.Sqrt(Variance(x, weights))
 }

 // StdErr returns the standard error in the mean with the given values.
@@ -783,28 +783,50 @@ func StdScore(x, mean, std float64) float64 {
 	return (x - mean) / std
 }

-// Variance computes the weighted sample variance with the provided mean.
+// Variance computes the weighted sample variance
 //  \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
 // If weights is nil then all of the weights are 1. If weights is not nil, then
 // len(x) must equal len(weights).
-func Variance(x []float64, mean float64, weights []float64) float64 {
-	if weights == nil {
-		var s float64
-		for _, v := range x {
-			s += (v - mean) * (v - mean)
-		}
-		return s / float64(len(x)-1)
-	}
-	if len(x) != len(weights) {
-		panic("stat: slice length mismatch")
-	}
-	var (
-		ss         float64
-		sumWeights float64
-	)
-	for i, v := range x {
-		ss += weights[i] * (v - mean) * (v - mean)
-		sumWeights += weights[i]
-	}
-	return ss / (sumWeights - 1)
+func Variance(x, weights []float64) float64 {
+	_, s2 := MeanVariance(x, weights)
+	return s2
+}
+
+// MeanVariance computes the sample mean and variance, where the mean is
+//  \sum_i w_i * x_i / (sum_i w_i) and variance is \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
+// If weights is nil then all of the weights are 1. If weights is not nil, then
+// len(x) must equal len(weights).
+//
+// This uses the corrected two-pass algorithm, from "Algorithms for computing
+// the sample variance: Analysis and recommendations" by Chan, Tony F., Gene H. Golub,
+// and Randall J. LeVeque.
+func MeanVariance(x, weights []float64) (u, s2 float64) {
+
+	// note that this will panic if the slice lens do not match
+	u = Mean(x, weights)
+	var (
+		ss           float64
+		compensation float64
+	)
+	if weights == nil {
+		for _, v := range x {
+			d := v - u
+			ss += d * d
+			compensation += d
+		}
+		s2 = (ss - compensation*compensation/float64(len(x))) / float64(len(x)-1)
+		return
+	}
+
+	var sumWeights float64
+	for i, v := range x {
+		w := weights[i]
+		d := v - u
+		wd := w * d
+		ss += wd * d
+		compensation += wd
+		sumWeights += w
+	}
+	s2 = (ss - compensation*compensation/sumWeights) / (sumWeights - 1)
+	return
 }
--- a/stat_test.go
+++ b/stat_test.go
@@ -106,7 +106,7 @@ func ExampleCovariance() {
 	y2 := []float64{12, 1, 11, 12, 0}
 	meanY2 := Mean(y2, nil)
 	cov2 := Covariance(x, meanX, y2, meanY2, nil)
-	varX := Variance(x, meanX, nil)
+	varX := Variance(x, nil)
 	fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
 	// Output:
 	// Covariance computes the degree to which datasets move together
@@ -1252,13 +1252,12 @@ func TestVariance(t *testing.T) {
 			ans:     4.2857142857142865,
 		},
 	} {
-		mean := Mean(test.x, test.weights)
-		variance := Variance(test.x, mean, test.weights)
+		variance := Variance(test.x, test.weights)
 		if math.Abs(variance-test.ans) > 1e-14 {
 			t.Errorf("Variance mismatch case %d. Expected %v, Found %v", i, test.ans, variance)
 		}
 	}
-	if !Panics(func() { Variance(make([]float64, 3), 0, make([]float64, 2)) }) {
+	if !Panics(func() { Variance(make([]float64, 3), make([]float64, 2)) }) {
 		t.Errorf("Variance did not panic with x, weights length mismatch")
 	}

@@ -1266,13 +1265,11 @@ func TestVariance(t *testing.T) {

 func ExampleVariance() {
 	x := []float64{8, 2, -9, 15, 4}
-	mean := Mean(x, nil)
-	variance := Variance(x, mean, nil)
+	variance := Variance(x, nil)
 	fmt.Printf("The variance of the samples is %.4f\n", variance)

 	weights := []float64{2, 2, 6, 7, 1}
-	weightedMean := Mean(x, weights)
-	weightedVariance := Variance(x, weightedMean, weights)
+	weightedVariance := Variance(x, weights)
 	fmt.Printf("The weighted variance of the samples is %.4f\n", weightedVariance)
 	// Output:
 	// The variance of the samples is 77.5000