implement new MeanVariance and Variance

Simplified Variance by removing the input mean from the sig, and
implemented a MeanVariance function which calculates the corrected
two-pass corrected sample variance.  It also calculates the mean, so it
returns that as well, which should save some calculation under most
cases.
This commit is contained in:
Jonathan J Lawlor
2014-11-06 20:40:10 -05:00
parent fc395c7961
commit 1d2d682785
2 changed files with 49 additions and 30 deletions

66
stat.go
View File

@@ -768,7 +768,7 @@ func (w weightSorter) Len() int {
// StdDev returns the population standard deviation with the provided mean.
func StdDev(x []float64, mean float64, weights []float64) float64 {
return math.Sqrt(Variance(x, mean, weights))
return math.Sqrt(Variance(x, weights))
}
// StdErr returns the standard error in the mean with the given values.
@@ -783,28 +783,50 @@ func StdScore(x, mean, std float64) float64 {
return (x - mean) / std
}
// Variance computes the weighted sample variance with the provided mean.
// Variance computes the weighted sample variance
// \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
// If weights is nil then all of the weights are 1. If weights is not nil, then
// len(x) must equal len(weights).
func Variance(x []float64, mean float64, weights []float64) float64 {
if weights == nil {
var s float64
for _, v := range x {
s += (v - mean) * (v - mean)
}
return s / float64(len(x)-1)
}
if len(x) != len(weights) {
panic("stat: slice length mismatch")
}
var (
ss float64
sumWeights float64
)
for i, v := range x {
ss += weights[i] * (v - mean) * (v - mean)
sumWeights += weights[i]
}
return ss / (sumWeights - 1)
func Variance(x, weights []float64) float64 {
_, s2 := MeanVariance(x, weights)
return s2
}
// MeanVariance computes the sample mean and variance, where the mean is
// \sum_i w_i * x_i / (sum_i w_i) and variance is \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
// If weights is nil then all of the weights are 1. If weights is not nil, then
// len(x) must equal len(weights).
//
// This uses the corrected two-pass algorithm, from "Algorithms for computing
// the sample variance: Analysis and recommendations" by Chan, Tony F., Gene H. Golub,
// and Randall J. LeVeque.
func MeanVariance(x, weights []float64) (u, s2 float64) {
// note that this will panic if the slice lens do not match
u = Mean(x, weights)
var (
ss float64
compensation float64
)
if weights == nil {
for _, v := range x {
d := v - u
ss += d * d
compensation += d
}
s2 = (ss - compensation*compensation/float64(len(x))) / float64(len(x)-1)
return
}
var sumWeights float64
for i, v := range x {
w := weights[i]
d := v - u
wd := w * d
ss += wd * d
compensation += wd
sumWeights += w
}
s2 = (ss - compensation*compensation/sumWeights) / (sumWeights - 1)
return
}

View File

@@ -106,7 +106,7 @@ func ExampleCovariance() {
y2 := []float64{12, 1, 11, 12, 0}
meanY2 := Mean(y2, nil)
cov2 := Covariance(x, meanX, y2, meanY2, nil)
varX := Variance(x, meanX, nil)
varX := Variance(x, nil)
fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
// Output:
// Covariance computes the degree to which datasets move together
@@ -1252,13 +1252,12 @@ func TestVariance(t *testing.T) {
ans: 4.2857142857142865,
},
} {
mean := Mean(test.x, test.weights)
variance := Variance(test.x, mean, test.weights)
variance := Variance(test.x, test.weights)
if math.Abs(variance-test.ans) > 1e-14 {
t.Errorf("Variance mismatch case %d. Expected %v, Found %v", i, test.ans, variance)
}
}
if !Panics(func() { Variance(make([]float64, 3), 0, make([]float64, 2)) }) {
if !Panics(func() { Variance(make([]float64, 3), make([]float64, 2)) }) {
t.Errorf("Variance did not panic with x, weights length mismatch")
}
@@ -1266,13 +1265,11 @@ func TestVariance(t *testing.T) {
func ExampleVariance() {
x := []float64{8, 2, -9, 15, 4}
mean := Mean(x, nil)
variance := Variance(x, mean, nil)
variance := Variance(x, nil)
fmt.Printf("The variance of the samples is %.4f\n", variance)
weights := []float64{2, 2, 6, 7, 1}
weightedMean := Mean(x, weights)
weightedVariance := Variance(x, weightedMean, weights)
weightedVariance := Variance(x, weights)
fmt.Printf("The weighted variance of the samples is %.4f\n", weightedVariance)
// Output:
// The variance of the samples is 77.5000