mirror of
https://github.com/gonum/gonum.git
synced 2025-10-19 21:44:41 +08:00
Simplify covariance sig
Changed covariance to remove the need to supply the means. Also implemented the corrected two-pass method to estimate the covariance.
This commit is contained in:
49
stat.go
49
stat.go
@@ -130,38 +130,53 @@ func ChiSquare(obs, exp []float64) float64 {
|
|||||||
// The lengths of x and y must be equal. If weights is nil then all of the
|
// The lengths of x and y must be equal. If weights is nil then all of the
|
||||||
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
|
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
|
||||||
func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 {
|
func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 {
|
||||||
return Covariance(x, meanX, y, meanY, weights) / (stdX * stdY)
|
return Covariance(x, y, weights) / (stdX * stdY)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Covariance returns the weighted covariance between the samples of x and y
|
// Covariance returns the weighted covariance between the samples of x and y.
|
||||||
// with the given means.
|
|
||||||
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
|
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
|
||||||
// The lengths of x and y must be equal. If weights is nil then all of the
|
// The lengths of x and y must be equal. If weights is nil then all of the
|
||||||
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
|
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
|
||||||
func Covariance(x []float64, meanX float64, y []float64, meanY float64, weights []float64) float64 {
|
func Covariance(x []float64, y []float64, weights []float64) float64 {
|
||||||
|
|
||||||
|
// don't have a paper for this, but the unweighted adaptation seems natural.
|
||||||
|
// The weighted version doesn't perform a correction. It seemed like the
|
||||||
|
// performance would suffer too much.
|
||||||
|
|
||||||
if len(x) != len(y) {
|
if len(x) != len(y) {
|
||||||
panic("stat: slice length mismatch")
|
panic("stat: slice length mismatch")
|
||||||
}
|
}
|
||||||
|
xu := Mean(x, weights)
|
||||||
|
yu := Mean(y, weights)
|
||||||
|
|
||||||
if weights == nil {
|
if weights == nil {
|
||||||
var s float64
|
var (
|
||||||
for i, v := range x {
|
ss float64
|
||||||
s += (v - meanX) * (y[i] - meanY)
|
xcompensation float64
|
||||||
|
ycompensation float64
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, xv := range x {
|
||||||
|
yv := y[i]
|
||||||
|
xd := xv - xu
|
||||||
|
yd := yv - yu
|
||||||
|
ss += xd * yd
|
||||||
|
xcompensation += xd
|
||||||
|
ycompensation += yd
|
||||||
}
|
}
|
||||||
s /= float64(len(x) - 1)
|
return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1)
|
||||||
return s
|
|
||||||
}
|
|
||||||
if len(weights) != len(x) {
|
|
||||||
panic("stat: slice length mismatch")
|
|
||||||
}
|
}
|
||||||
var (
|
var (
|
||||||
s float64
|
ss float64
|
||||||
sumWeights float64
|
sumWeights float64
|
||||||
)
|
)
|
||||||
for i, v := range x {
|
|
||||||
s += weights[i] * (v - meanX) * (y[i] - meanY)
|
for i, xv := range x {
|
||||||
sumWeights += weights[i]
|
w := weights[i]
|
||||||
|
ss += w * (xv - xu) * (y[i] - yu)
|
||||||
|
sumWeights += w
|
||||||
}
|
}
|
||||||
return s / (sumWeights - 1)
|
return ss / (sumWeights - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CrossEntropy computes the cross-entropy between the two distributions specified
|
// CrossEntropy computes the cross-entropy between the two distributions specified
|
||||||
|
13
stat_test.go
13
stat_test.go
@@ -98,14 +98,11 @@ func ExampleCovariance() {
|
|||||||
fmt.Println("about their mean.")
|
fmt.Println("about their mean.")
|
||||||
x := []float64{8, -3, 7, 8, -4}
|
x := []float64{8, -3, 7, 8, -4}
|
||||||
y := []float64{10, 2, 2, 4, 1}
|
y := []float64{10, 2, 2, 4, 1}
|
||||||
meanX := Mean(x, nil)
|
cov := Covariance(x, y, nil)
|
||||||
meanY := Mean(y, nil)
|
|
||||||
cov := Covariance(x, meanX, y, meanY, nil)
|
|
||||||
fmt.Printf("Cov = %.4f\n", cov)
|
fmt.Printf("Cov = %.4f\n", cov)
|
||||||
fmt.Println("If datasets move perfectly together, the variance equals the covariance")
|
fmt.Println("If datasets move perfectly together, the variance equals the covariance")
|
||||||
y2 := []float64{12, 1, 11, 12, 0}
|
y2 := []float64{12, 1, 11, 12, 0}
|
||||||
meanY2 := Mean(y2, nil)
|
cov2 := Covariance(x, y2, nil)
|
||||||
cov2 := Covariance(x, meanX, y2, meanY2, nil)
|
|
||||||
varX := Variance(x, nil)
|
varX := Variance(x, nil)
|
||||||
fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
|
fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
|
||||||
// Output:
|
// Output:
|
||||||
@@ -145,17 +142,17 @@ func TestCovariance(t *testing.T) {
|
|||||||
ans: 3.2,
|
ans: 3.2,
|
||||||
},
|
},
|
||||||
} {
|
} {
|
||||||
c := Covariance(test.p, Mean(test.p, test.weights), test.q, Mean(test.q, test.weights), test.weights)
|
c := Covariance(test.p, test.q, test.weights)
|
||||||
if math.Abs(c-test.ans) > 1e-14 {
|
if math.Abs(c-test.ans) > 1e-14 {
|
||||||
t.Errorf("Covariance mismatch case %d: Expected %v, Found %v", i, test.ans, c)
|
t.Errorf("Covariance mismatch case %d: Expected %v, Found %v", i, test.ans, c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// test the panic states
|
// test the panic states
|
||||||
if !Panics(func() { Covariance(make([]float64, 2), 0.0, make([]float64, 3), 0.0, nil) }) {
|
if !Panics(func() { Covariance(make([]float64, 2), make([]float64, 3), nil) }) {
|
||||||
t.Errorf("Covariance did not panic with x, y length mismatch")
|
t.Errorf("Covariance did not panic with x, y length mismatch")
|
||||||
}
|
}
|
||||||
if !Panics(func() { Covariance(make([]float64, 3), 0.0, make([]float64, 3), 0.0, make([]float64, 2)) }) {
|
if !Panics(func() { Covariance(make([]float64, 3), make([]float64, 3), make([]float64, 2)) }) {
|
||||||
t.Errorf("Covariance did not panic with x, weights length mismatch")
|
t.Errorf("Covariance did not panic with x, weights length mismatch")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user