From 63a1bac14caf4af78af3619efbb22fd6aa6666ab Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 00:49:46 -0500 Subject: [PATCH 01/38] Initial work on covariancematrix --- covariancematrix.go | 48 +++++++++++++++++++++++++++++++++++++ covariancematrix_test.go | 52 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 covariancematrix.go create mode 100644 covariancematrix_test.go diff --git a/covariancematrix.go b/covariancematrix.go new file mode 100644 index 00000000..27e0af30 --- /dev/null +++ b/covariancematrix.go @@ -0,0 +1,48 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "github.com/gonum/matrix/mat64" +) + +// CovarianceMatrix calculates a covariance matrix (also known as a +// variance-covariance matrix) from a matrix of data. +func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { + + // matrix version of the two pass algorithm. This doesn't use + // the correction found in the Covariance and Variance functions. + + r, _ := x.Dims() + b := ones(1, r) + b.Mul(b, x) + b.Scale(1/float64(r), b) + + // todo: avoid unneeded memory expansion here. + mu := new(mat64.Dense) + mu.Mul(ones(r,1),b) + + // this could also be done with a clone & row viewer + xc := mat64.DenseCopyOf(x) + xc.Sub(xc, mu) + + // todo: avoid matrix copy + xt := new(mat64.Dense) + xt.TCopy(xc) + + ss := new(mat64.Dense) + ss.Mul(xt, xc) + ss.Scale(1/float64(r-1), ss) + return ss +} + +// ones is a matrix of all ones. +func ones(r, c int) *mat64.Dense { + x := make([]float64, r*c) + for i := range x { + x[i] = 1 + } + return mat64.NewDense(r, c, x) +} diff --git a/covariancematrix_test.go b/covariancematrix_test.go new file mode 100644 index 00000000..4a216b50 --- /dev/null +++ b/covariancematrix_test.go @@ -0,0 +1,52 @@ +// Copyright ©2014 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package stat + +import ( + "testing" + + "github.com/gonum/blas/cblas" + "github.com/gonum/floats" + "github.com/gonum/matrix/mat64" +) + +func init() { + mat64.Register(cblas.Blas{}) +} + +func TestCovarianceMatrix(t *testing.T) { + for i, test := range []struct { + mat mat64.Matrix + r, c int + x []float64 + }{ + { + mat: mat64.NewDense(5, 2, []float64{ + -2, -4, + -1, 2, + 0, 0, + 1, -2, + 2, 4, + }), + r: 2, + c: 2, + x: []float64{ + 2.5, 3, + 3, 10, + }, + }, + } { + c := CovarianceMatrix(test.mat).RawMatrix() + if c.Rows != test.r { + t.Errorf("%d: expected rows %d, found %d", i, test.r, c.Rows) + } + if c.Cols != test.c { + t.Errorf("%d: expected cols %d, found %d", i, test.c, c.Cols) + } + if !floats.Equal(test.x, c.Data) { + t.Errorf("%d: expected data %#q, found %#q", i, test.x, c.Data) + } + } +} From 6a930548691e1640c7c0460b68ef46097579ea17 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 01:18:45 -0500 Subject: [PATCH 02/38] run go fmt oops --- covariancematrix.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 27e0af30..95d010ee 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -11,27 +11,27 @@ import ( // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data. func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { - + // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. - + r, _ := x.Dims() b := ones(1, r) b.Mul(b, x) b.Scale(1/float64(r), b) - + // todo: avoid unneeded memory expansion here. mu := new(mat64.Dense) - mu.Mul(ones(r,1),b) - + mu.Mul(ones(r, 1), b) + // this could also be done with a clone & row viewer xc := mat64.DenseCopyOf(x) xc.Sub(xc, mu) - + // todo: avoid matrix copy xt := new(mat64.Dense) xt.TCopy(xc) - + ss := new(mat64.Dense) ss.Mul(xt, xc) ss.Scale(1/float64(r-1), ss) From 90ad2f48c7bbab5d00f2338e6c9d9f26419b56a3 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 14:16:30 -0500 Subject: [PATCH 03/38] add benchmarks --- covariancematrix_test.go | 96 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 4a216b50..b09bc5d3 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -5,6 +5,7 @@ package stat import ( + "math/rand" "testing" "github.com/gonum/blas/cblas" @@ -50,3 +51,98 @@ func TestCovarianceMatrix(t *testing.T) { } } } + +// benchmarks + +func randMat(r, c int) mat64.Matrix { + x := make([]float64, r*c) + for i := range x { + x[i] = rand.Float64() + } + return mat64.NewDense(r, c, x) +} + +func benchmarkCovarianceMatrix(b *testing.B, m mat64.Matrix) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + CovarianceMatrix(m) + } +} + +func BenchmarkCovarianceMatrixSmallxSmall(b *testing.B) { + // 10 * 10 elements + x := randMat(SMALL, SMALL) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixSmallxMedium(b *testing.B) { + // 10 * 1000 elements + x := randMat(SMALL, MEDIUM) + benchmarkCovarianceMatrix(b, x) +} + +/*func BenchmarkCovarianceMatrixSmallxLarge(b *testing.B) { + x := randMat(SMALL, LARGE) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixSmallxHuge(b *testing.B) { + x := randMat(SMALL, HUGE) + benchmarkCovarianceMatrix(b, x) +}*/ + +func BenchmarkCovarianceMatrixMediumxSmall(b *testing.B) { + // 1000 * 10 elements + x := randMat(MEDIUM, SMALL) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixMediumxMedium(b *testing.B) { + // 1000 * 1000 elements + x := randMat(MEDIUM, MEDIUM) + benchmarkCovarianceMatrix(b, x) +} + +/*func BenchmarkCovarianceMatrixMediumxLarge(b *testing.B) { + x := randMat(MEDIUM, LARGE) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixMediumxHuge(b *testing.B) { + x := randMat(MEDIUM, HUGE) + benchmarkCovarianceMatrix(b, x) +}*/ + +func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) { + // 1e5 * 10 elements + x := randMat(LARGE, SMALL) + benchmarkCovarianceMatrix(b, x) +} + +/*func BenchmarkCovarianceMatrixLargexMedium(b *testing.B) { + x := randMat(LARGE, MEDIUM) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixLargexLarge(b *testing.B) { + x := randMat(LARGE, LARGE) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixLargexHuge(b *testing.B) { + x := randMat(LARGE, HUGE) + benchmarkCovarianceMatrix(b, x) +}*/ + +func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) { + // 1e7 * 10 elements + x := randMat(HUGE, SMALL) + benchmarkCovarianceMatrix(b, x) +} + +/*func BenchmarkCovarianceMatrixHugexMedium(b *testing.B) { + x := randMat(HUGE, MEDIUM) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixHugexLarge(b *testing.B) { + x := randMat(HUGE, LARGE) + benchmarkCovarianceMatrix(b, x) +} +func BenchmarkCovarianceMatrixHugexHuge(b *testing.B) { + x := randMat(HUGE, HUGE) + benchmarkCovarianceMatrix(b, x) +}*/ From 17b440e6be3743ee9e51d7abb0848a91a490bcb8 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 14:35:21 -0500 Subject: [PATCH 04/38] improve performance and decrease memory use --- covariancematrix.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 95d010ee..45a797b5 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -19,14 +19,18 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { b := ones(1, r) b.Mul(b, x) b.Scale(1/float64(r), b) - + // todo: avoid unneeded memory expansion here. - mu := new(mat64.Dense) - mu.Mul(ones(r, 1), b) - + mu := b.RowView(0) + // this could also be done with a clone & row viewer xc := mat64.DenseCopyOf(x) - xc.Sub(xc, mu) + for i := 0; i < r; i++ { + rv := xc.RowView(i) + for j, mean := range(mu) { + rv[j] -= mean + } + } // todo: avoid matrix copy xt := new(mat64.Dense) From 35fb0d04134928a92a3b12682312a5933a6d08a1 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 15:14:24 -0500 Subject: [PATCH 05/38] improve comments --- covariancematrix.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 45a797b5..d32f0e0b 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -16,26 +16,28 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // the correction found in the Covariance and Variance functions. r, _ := x.Dims() + + // determine the mean of each of the columns b := ones(1, r) b.Mul(b, x) b.Scale(1/float64(r), b) - - // todo: avoid unneeded memory expansion here. mu := b.RowView(0) - - // this could also be done with a clone & row viewer + + // subtract the mean from the data xc := mat64.DenseCopyOf(x) for i := 0; i < r; i++ { rv := xc.RowView(i) - for j, mean := range(mu) { + for j, mean := range mu { rv[j] -= mean - } + } } - // todo: avoid matrix copy + // todo: avoid matrix copy? xt := new(mat64.Dense) xt.TCopy(xc) + // It would be nice if we could indicate that this was a symmetric + // matrix. ss := new(mat64.Dense) ss.Mul(xt, xc) ss.Scale(1/float64(r-1), ss) From 27e74b895e2cc00609eb097b52900a4605f192d4 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 15:14:40 -0500 Subject: [PATCH 06/38] only perform shorter benchmarks --- covariancematrix_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index b09bc5d3..4a4cb89b 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -116,7 +116,8 @@ func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) { } /*func BenchmarkCovarianceMatrixLargexMedium(b *testing.B) { - x := randMat(LARGE, MEDIUM) + // 1e5 * 1000 elements + x := randMat(LARGE, MEDIUM) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixLargexLarge(b *testing.B) { @@ -135,7 +136,8 @@ func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) { } /*func BenchmarkCovarianceMatrixHugexMedium(b *testing.B) { - x := randMat(HUGE, MEDIUM) + // 1e7 * 1000 elements + x := randMat(HUGE, MEDIUM) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixHugexLarge(b *testing.B) { From 8043a909f50281a2822f9bcd96372c2cb301812a Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 15:37:10 -0500 Subject: [PATCH 07/38] use var in place of new --- covariancematrix.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index d32f0e0b..96737874 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -33,15 +33,15 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { } // todo: avoid matrix copy? - xt := new(mat64.Dense) + var xt mat64.Dense xt.TCopy(xc) // It would be nice if we could indicate that this was a symmetric // matrix. - ss := new(mat64.Dense) - ss.Mul(xt, xc) - ss.Scale(1/float64(r-1), ss) - return ss + var ss mat64.Dense + ss.Mul(&xt, xc) + ss.Scale(1/float64(r-1), &ss) + return &ss } // ones is a matrix of all ones. From db6bc68a0c6221e15ec5a176bd31ab84d3f794b9 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 23:11:17 -0500 Subject: [PATCH 08/38] example implementation of pure go covariancematrix --- covariancematrix.go | 113 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/covariancematrix.go b/covariancematrix.go index 96737874..ccb3f8dd 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -5,18 +5,113 @@ package stat import ( + "sync" +// "runtime" + "github.com/gonum/matrix/mat64" ) +type covMatElem struct { + i, j int + v float64 +} + +type covMatSlice struct { + i, j int + x, y []float64 +} + // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data. func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. + r, c := x.Dims() + + if x, ok := x.(mat64.Vectorer); ok { + cols := make([][]float64, c) + // perform the covariance or variance as required + blockSize := 1024 + if blockSize > c { + blockSize = c + } + var wg sync.WaitGroup + wg.Add(c) + for j := 0; j < c; j++ { + go func(j int) { + // pull the columns out and subtract the means + cols[j] = make([]float64, r) + x.Col(cols[j], j) + mean := Mean(cols[j], nil) + for i := range cols[j] { + cols[j][i] -= mean + } + wg.Done() + }(j) + } + wg.Wait() + + colCh := make(chan covMatSlice, blockSize) + resCh := make(chan covMatElem, blockSize) - r, _ := x.Dims() + wg.Add(blockSize) + go func() { + wg.Wait() + close(resCh) + }() + for i := 0; i < blockSize; i++ { + go func(in <-chan covMatSlice, out chan<- covMatElem) { + for { + xy, more := <-in + if !more { + wg.Done() + return + } + + if xy.i == xy.j { + out <- covMatElem{ + i: xy.i, + j: xy.j, + v: centeredVariance(xy.x), + } + continue + } + out <- covMatElem{ + i: xy.i, + j: xy.j, + v: centeredCovariance(xy.x, xy.y), + } + } + }(colCh, resCh) + } + go func(out chan<- covMatSlice) { + for i := 0; i < c; i++ { + for j := 0; j <= i; j++ { + out <- covMatSlice{ + i: i, + j: j, + x: cols[i], + y: cols[j], + } + } + } + close(out) + }(colCh) + // create the output matrix + m := mat64.NewDense(c, c, nil) + for { + c, more := <-resCh + if !more { + return m + } + m.Set(c.i, c.j, c.v) + if c.i != c.j { + m.Set(c.j, c.i, c.v) + } + } + } // determine the mean of each of the columns b := ones(1, r) b.Mul(b, x) @@ -52,3 +147,19 @@ func ones(r, c int) *mat64.Dense { } return mat64.NewDense(r, c, x) } + +func centeredVariance(x []float64) float64 { + var ss float64 + for _, xv := range x { + ss += xv * xv + } + return ss / float64(len(x)-1) +} + +func centeredCovariance(x, y []float64) float64 { + var ss float64 + for i, xv := range x { + ss += xv * y[i] + } + return ss / float64(len(x)-1) +} From ae1d5e1ddc75677055879c1d52481305d177fdca Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 15 Nov 2014 23:22:05 -0500 Subject: [PATCH 09/38] simplify example implementation --- covariancematrix.go | 42 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index ccb3f8dd..3a9afd88 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -11,10 +11,6 @@ import ( "github.com/gonum/matrix/mat64" ) -type covMatElem struct { - i, j int - v float64 -} type covMatSlice struct { i, j int @@ -53,16 +49,11 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { wg.Wait() colCh := make(chan covMatSlice, blockSize) - resCh := make(chan covMatElem, blockSize) wg.Add(blockSize) - go func() { - wg.Wait() - close(resCh) - }() - + m := mat64.NewDense(c, c, nil) for i := 0; i < blockSize; i++ { - go func(in <-chan covMatSlice, out chan<- covMatElem) { + go func(in <-chan covMatSlice) { for { xy, more := <-in if !more { @@ -71,20 +62,14 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { } if xy.i == xy.j { - out <- covMatElem{ - i: xy.i, - j: xy.j, - v: centeredVariance(xy.x), - } + m.Set(xy.i, xy.j, centeredVariance(xy.x)) continue } - out <- covMatElem{ - i: xy.i, - j: xy.j, - v: centeredCovariance(xy.x, xy.y), + v := centeredCovariance(xy.x, xy.y) + m.Set(xy.i, xy.j, v) + m.Set(xy.j, xy.i, v) } - } - }(colCh, resCh) + }(colCh) } go func(out chan<- covMatSlice) { for i := 0; i < c; i++ { @@ -100,17 +85,8 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { close(out) }(colCh) // create the output matrix - m := mat64.NewDense(c, c, nil) - for { - c, more := <-resCh - if !more { - return m - } - m.Set(c.i, c.j, c.v) - if c.i != c.j { - m.Set(c.j, c.i, c.v) - } - } + wg.Wait() + return m } // determine the mean of each of the columns b := ones(1, r) From 132baec6d9b27a12257a6cde91c34bfe929112f0 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 22:24:01 -0500 Subject: [PATCH 10/38] implement tests for when there is no BLAS --- covariancematrix_test.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 4a4cb89b..f312bf62 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -8,16 +8,17 @@ import ( "math/rand" "testing" - "github.com/gonum/blas/cblas" + "github.com/gonum/blas/goblas" "github.com/gonum/floats" "github.com/gonum/matrix/mat64" ) func init() { - mat64.Register(cblas.Blas{}) + mat64.Register(goblas.Blas{}) } func TestCovarianceMatrix(t *testing.T) { + blasEngine := mat64.Registered() for i, test := range []struct { mat mat64.Matrix r, c int @@ -39,17 +40,32 @@ func TestCovarianceMatrix(t *testing.T) { }, }, } { + // tests with a blas engine + mat64.Register(goblas.Blas{}) c := CovarianceMatrix(test.mat).RawMatrix() if c.Rows != test.r { - t.Errorf("%d: expected rows %d, found %d", i, test.r, c.Rows) + t.Errorf("BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) } if c.Cols != test.c { - t.Errorf("%d: expected cols %d, found %d", i, test.c, c.Cols) + t.Errorf("BLAS %d: expected cols %d, found %d", i, test.c, c.Cols) } if !floats.Equal(test.x, c.Data) { - t.Errorf("%d: expected data %#q, found %#q", i, test.x, c.Data) + t.Errorf("BLAS %d: expected data %#q, found %#q", i, test.x, c.Data) + } + // tests without a blas engine + mat64.Register(nil) + c = CovarianceMatrix(test.mat).RawMatrix() + if c.Rows != test.r { + t.Errorf("No BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) + } + if c.Cols != test.c { + t.Errorf("No BLAS %d: expected cols %d, found %d", i, test.c, c.Cols) + } + if !floats.Equal(test.x, c.Data) { + t.Errorf("No BLAS %d: expected data %#q, found %#q", i, test.x, c.Data) } } + mat64.Register(blasEngine) } // benchmarks From 945308d34b2d9799be625815e7320f38764babbb Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 22:24:29 -0500 Subject: [PATCH 11/38] improve godoc for CovarianceMatrix --- covariancematrix.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/covariancematrix.go b/covariancematrix.go index 3a9afd88..c4d9ad60 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -18,7 +18,11 @@ type covMatSlice struct { } // CovarianceMatrix calculates a covariance matrix (also known as a -// variance-covariance matrix) from a matrix of data. +// variance-covariance matrix) from a matrix of data, using a two-pass +// algorithm. It will have better performance if a BLAS engine is +// registered in gonum/matrix/mat64. +// +// The matrix returned will be symmetric, square, and positive-semidefinite. func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use From 4b7d9b397ad504258a43ce7f0dd4328be231e7a3 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 22:25:45 -0500 Subject: [PATCH 12/38] add help for non-exported variance and covariance --- covariancematrix.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/covariancematrix.go b/covariancematrix.go index c4d9ad60..e88dff40 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -128,6 +128,8 @@ func ones(r, c int) *mat64.Dense { return mat64.NewDense(r, c, x) } +// centeredVariance calculates the sum of squares of a single +// series, for calculating variance. func centeredVariance(x []float64) float64 { var ss float64 for _, xv := range x { @@ -136,6 +138,9 @@ func centeredVariance(x []float64) float64 { return ss / float64(len(x)-1) } +// centeredCovariance calculates the sum of squares of two +// series, for calculating variance. The input lengths are +// assumed to be identical. func centeredCovariance(x, y []float64) float64 { var ss float64 for i, xv := range x { From a4f07999f233976850667182a3b3e7414932cad8 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 22:26:52 -0500 Subject: [PATCH 13/38] split out non-blas impl into new function Splitting out the non blas path makes the code flow easier to read. --- covariancematrix.go | 165 ++++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 75 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index e88dff40..cf6b5b08 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -6,17 +6,10 @@ package stat import ( "sync" -// "runtime" - + "github.com/gonum/matrix/mat64" ) - -type covMatSlice struct { - i, j int - x, y []float64 -} - // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass // algorithm. It will have better performance if a BLAS engine is @@ -27,71 +20,12 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. - r, c := x.Dims() - - if x, ok := x.(mat64.Vectorer); ok { - cols := make([][]float64, c) - // perform the covariance or variance as required - blockSize := 1024 - if blockSize > c { - blockSize = c - } - var wg sync.WaitGroup - wg.Add(c) - for j := 0; j < c; j++ { - go func(j int) { - // pull the columns out and subtract the means - cols[j] = make([]float64, r) - x.Col(cols[j], j) - mean := Mean(cols[j], nil) - for i := range cols[j] { - cols[j][i] -= mean - } - wg.Done() - }(j) - } - wg.Wait() - - colCh := make(chan covMatSlice, blockSize) - - wg.Add(blockSize) - m := mat64.NewDense(c, c, nil) - for i := 0; i < blockSize; i++ { - go func(in <-chan covMatSlice) { - for { - xy, more := <-in - if !more { - wg.Done() - return - } - - if xy.i == xy.j { - m.Set(xy.i, xy.j, centeredVariance(xy.x)) - continue - } - v := centeredCovariance(xy.x, xy.y) - m.Set(xy.i, xy.j, v) - m.Set(xy.j, xy.i, v) - } - }(colCh) - } - go func(out chan<- covMatSlice) { - for i := 0; i < c; i++ { - for j := 0; j <= i; j++ { - out <- covMatSlice{ - i: i, - j: j, - x: cols[i], - y: cols[j], - } - } - } - close(out) - }(colCh) - // create the output matrix - wg.Wait() - return m + if mat64.Registered() == nil { + // implementation that doesn't rely on a blasEngine + return covarianceMatrixWithoutBLAS(x) } + r, _ := x.Dims() + // determine the mean of each of the columns b := ones(1, r) b.Mul(b, x) @@ -107,18 +41,99 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { } } - // todo: avoid matrix copy? var xt mat64.Dense xt.TCopy(xc) - // It would be nice if we could indicate that this was a symmetric - // matrix. + // TODO: indicate that the resulting matrix is symmetric, which + // should improve performance. var ss mat64.Dense ss.Mul(&xt, xc) ss.Scale(1/float64(r-1), &ss) return &ss } +type covMatSlice struct { + i, j int + x, y []float64 +} + +func covarianceMatrixWithoutBLAS(x mat64.Matrix) *mat64.Dense { + r, c := x.Dims() + + // split out the matrix into columns + cols := make([][]float64, c) + for j := range cols { + cols[j] = make([]float64, r) + } + + if xRaw, ok := x.(mat64.RawMatrixer); ok { + for k, v := range xRaw.RawMatrix().Data { + i := k / c + j := k % c + cols[j][i] = v + } + } else { + for j := 0; j < c; j++ { + for i := 0; i < r; i++ { + cols[j][i] = x.At(i, j) + } + } + } + + // center the columns + for j := range cols { + mean := Mean(cols[j], nil) + for i := range cols[j] { + cols[j][i] -= mean + } + } + + blockSize := 1024 + if blockSize > c { + blockSize = c + } + var wg sync.WaitGroup + wg.Add(blockSize) + colCh := make(chan covMatSlice, blockSize) + + m := mat64.NewDense(c, c, nil) + for i := 0; i < blockSize; i++ { + go func(in <-chan covMatSlice) { + for { + xy, more := <-in + if !more { + wg.Done() + return + } + + if xy.i == xy.j { + m.Set(xy.i, xy.j, centeredVariance(xy.x)) + continue + } + v := centeredCovariance(xy.x, xy.y) + m.Set(xy.i, xy.j, v) + m.Set(xy.j, xy.i, v) + } + }(colCh) + } + go func(out chan<- covMatSlice) { + for i := 0; i < c; i++ { + for j := 0; j <= i; j++ { + out <- covMatSlice{ + i: i, + j: j, + x: cols[i], + y: cols[j], + } + } + } + close(out) + }(colCh) + // create the output matrix + wg.Wait() + return m +} + // ones is a matrix of all ones. func ones(r, c int) *mat64.Dense { x := make([]float64, r*c) From 64307801171f0baf5d9acf9ac4c512376e0af36f Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 22:31:17 -0500 Subject: [PATCH 14/38] go fmt --- covariancematrix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covariancematrix.go b/covariancematrix.go index cf6b5b08..adbcc33c 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -15,7 +15,7 @@ import ( // algorithm. It will have better performance if a BLAS engine is // registered in gonum/matrix/mat64. // -// The matrix returned will be symmetric, square, and positive-semidefinite. +// The matrix returned will be symmetric, square, and positive-semidefinite. func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use From cbcfcd0f7de5f61ea4eaa065613d0d3c0ee00c6a Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 23:57:36 -0500 Subject: [PATCH 15/38] remove tests for non-blas code path --- covariancematrix_test.go | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index f312bf62..e97aec1e 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -18,7 +18,6 @@ func init() { } func TestCovarianceMatrix(t *testing.T) { - blasEngine := mat64.Registered() for i, test := range []struct { mat mat64.Matrix r, c int @@ -40,8 +39,6 @@ func TestCovarianceMatrix(t *testing.T) { }, }, } { - // tests with a blas engine - mat64.Register(goblas.Blas{}) c := CovarianceMatrix(test.mat).RawMatrix() if c.Rows != test.r { t.Errorf("BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) @@ -52,20 +49,7 @@ func TestCovarianceMatrix(t *testing.T) { if !floats.Equal(test.x, c.Data) { t.Errorf("BLAS %d: expected data %#q, found %#q", i, test.x, c.Data) } - // tests without a blas engine - mat64.Register(nil) - c = CovarianceMatrix(test.mat).RawMatrix() - if c.Rows != test.r { - t.Errorf("No BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) - } - if c.Cols != test.c { - t.Errorf("No BLAS %d: expected cols %d, found %d", i, test.c, c.Cols) - } - if !floats.Equal(test.x, c.Data) { - t.Errorf("No BLAS %d: expected data %#q, found %#q", i, test.x, c.Data) - } } - mat64.Register(blasEngine) } // benchmarks From 989dd3093fc8cde97aef9fae94d0d6079ea2e430 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 17 Nov 2014 23:58:32 -0500 Subject: [PATCH 16/38] remove non-blas codepath Also put ones directly into CovarianceMatrix function --- covariancematrix.go | 128 +++----------------------------------------- 1 file changed, 7 insertions(+), 121 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index adbcc33c..513dfafe 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -5,29 +5,27 @@ package stat import ( - "sync" - "github.com/gonum/matrix/mat64" ) // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass -// algorithm. It will have better performance if a BLAS engine is -// registered in gonum/matrix/mat64. +// algorithm. It requires a registered BLAS engine in gonum/matrix/mat64. // // The matrix returned will be symmetric, square, and positive-semidefinite. func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. - if mat64.Registered() == nil { - // implementation that doesn't rely on a blasEngine - return covarianceMatrixWithoutBLAS(x) - } + r, _ := x.Dims() // determine the mean of each of the columns - b := ones(1, r) + ones := make([]float64, r) + for i := range ones { + ones[i] = 1 + } + b := mat64.NewDense(1, r, ones) b.Mul(b, x) b.Scale(1/float64(r), b) mu := b.RowView(0) @@ -51,115 +49,3 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { ss.Scale(1/float64(r-1), &ss) return &ss } - -type covMatSlice struct { - i, j int - x, y []float64 -} - -func covarianceMatrixWithoutBLAS(x mat64.Matrix) *mat64.Dense { - r, c := x.Dims() - - // split out the matrix into columns - cols := make([][]float64, c) - for j := range cols { - cols[j] = make([]float64, r) - } - - if xRaw, ok := x.(mat64.RawMatrixer); ok { - for k, v := range xRaw.RawMatrix().Data { - i := k / c - j := k % c - cols[j][i] = v - } - } else { - for j := 0; j < c; j++ { - for i := 0; i < r; i++ { - cols[j][i] = x.At(i, j) - } - } - } - - // center the columns - for j := range cols { - mean := Mean(cols[j], nil) - for i := range cols[j] { - cols[j][i] -= mean - } - } - - blockSize := 1024 - if blockSize > c { - blockSize = c - } - var wg sync.WaitGroup - wg.Add(blockSize) - colCh := make(chan covMatSlice, blockSize) - - m := mat64.NewDense(c, c, nil) - for i := 0; i < blockSize; i++ { - go func(in <-chan covMatSlice) { - for { - xy, more := <-in - if !more { - wg.Done() - return - } - - if xy.i == xy.j { - m.Set(xy.i, xy.j, centeredVariance(xy.x)) - continue - } - v := centeredCovariance(xy.x, xy.y) - m.Set(xy.i, xy.j, v) - m.Set(xy.j, xy.i, v) - } - }(colCh) - } - go func(out chan<- covMatSlice) { - for i := 0; i < c; i++ { - for j := 0; j <= i; j++ { - out <- covMatSlice{ - i: i, - j: j, - x: cols[i], - y: cols[j], - } - } - } - close(out) - }(colCh) - // create the output matrix - wg.Wait() - return m -} - -// ones is a matrix of all ones. -func ones(r, c int) *mat64.Dense { - x := make([]float64, r*c) - for i := range x { - x[i] = 1 - } - return mat64.NewDense(r, c, x) -} - -// centeredVariance calculates the sum of squares of a single -// series, for calculating variance. -func centeredVariance(x []float64) float64 { - var ss float64 - for _, xv := range x { - ss += xv * xv - } - return ss / float64(len(x)-1) -} - -// centeredCovariance calculates the sum of squares of two -// series, for calculating variance. The input lengths are -// assumed to be identical. -func centeredCovariance(x, y []float64) float64 { - var ss float64 - for i, xv := range x { - ss += xv * y[i] - } - return ss / float64(len(x)-1) -} From e5b54c7f4c7337eeaea6d657f6e777111bd2ab1e Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 18 Nov 2014 19:23:37 -0500 Subject: [PATCH 17/38] implement tests for in place and weighted covariance --- covariancematrix_test.go | 124 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 7 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index e97aec1e..043ae3c6 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -19,9 +19,9 @@ func init() { func TestCovarianceMatrix(t *testing.T) { for i, test := range []struct { - mat mat64.Matrix - r, c int - x []float64 + mat, weights mat64.Matrix + r, c int + x []float64 }{ { mat: mat64.NewDense(5, 2, []float64{ @@ -31,15 +31,37 @@ func TestCovarianceMatrix(t *testing.T) { 1, -2, 2, 4, }), - r: 2, - c: 2, + weights: nil, + r: 2, + c: 2, x: []float64{ 2.5, 3, 3, 10, }, + }, { + mat: mat64.NewDense(5, 2, []float64{ + -2, -4, + -1, 2, + 0, 0, + 1, -2, + 2, 4, + }), + weights: mat64.NewDense(5, 1, []float64{ + 1.5, + .5, + 1.5, + .5, + 1, + }), + r: 2, + c: 2, + x: []float64{ + 2.75, 4.5, + 4.5, 11, + }, }, } { - c := CovarianceMatrix(test.mat).RawMatrix() + c := CovarianceMatrix(nil, test.mat, test.weights).RawMatrix() if c.Rows != test.r { t.Errorf("BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) } @@ -65,7 +87,15 @@ func randMat(r, c int) mat64.Matrix { func benchmarkCovarianceMatrix(b *testing.B, m mat64.Matrix) { b.ResetTimer() for i := 0; i < b.N; i++ { - CovarianceMatrix(m) + CovarianceMatrix(nil, m, nil) + } +} +func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) { + _, c := m.Dims() + res := mat64.NewDense(c, c, nil) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CovarianceMatrix(res, m, nil) } } @@ -148,3 +178,83 @@ func BenchmarkCovarianceMatrixHugexHuge(b *testing.B) { x := randMat(HUGE, HUGE) benchmarkCovarianceMatrix(b, x) }*/ + +func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) { + // 10 * 10 elements + x := randMat(SMALL, SMALL) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixSmallxMediumInPlace(b *testing.B) { + // 10 * 1000 elements + x := randMat(SMALL, MEDIUM) + benchmarkCovarianceMatrixInPlace(b, x) +} + +/*func BenchmarkCovarianceMatrixSmallxLargeInPlace(b *testing.B) { + x := randMat(SMALL, LARGE) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixSmallxHugeInPlace(b *testing.B) { + x := randMat(SMALL, HUGE) + benchmarkCovarianceMatrixInPlace(b, x) +}*/ + +func BenchmarkCovarianceMatrixMediumxSmallInPlace(b *testing.B) { + // 1000 * 10 elements + x := randMat(MEDIUM, SMALL) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixMediumxMediumInPlace(b *testing.B) { + // 1000 * 1000 elements + x := randMat(MEDIUM, MEDIUM) + benchmarkCovarianceMatrixInPlace(b, x) +} + +/*func BenchmarkCovarianceMatrixMediumxLargeInPlace(b *testing.B) { + x := randMat(MEDIUM, LARGE) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixMediumxHugeInPlace(b *testing.B) { + x := randMat(MEDIUM, HUGE) + benchmarkCovarianceMatrixInPlace(b, x) +}*/ + +func BenchmarkCovarianceMatrixLargexSmallInPlace(b *testing.B) { + // 1e5 * 10 elements + x := randMat(LARGE, SMALL) + benchmarkCovarianceMatrixInPlace(b, x) +} + +/*func BenchmarkCovarianceMatrixLargexMediumInPlace(b *testing.B) { + // 1e5 * 1000 elements + x := randMat(LARGE, MEDIUM) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixLargexLargeInPlace(b *testing.B) { + x := randMat(LARGE, LARGE) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixLargexHugeInPlace(b *testing.B) { + x := randMat(LARGE, HUGE) + benchmarkCovarianceMatrixInPlace(b, x) +}*/ + +func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) { + // 1e7 * 10 elements + x := randMat(HUGE, SMALL) + benchmarkCovarianceMatrixInPlace(b, x) +} + +/*func BenchmarkCovarianceMatrixHugexMediumInPlace(b *testing.B) { + // 1e7 * 1000 elements + x := randMat(HUGE, MEDIUM) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixHugexLargeInPlace(b *testing.B) { + x := randMat(HUGE, LARGE) + benchmarkCovarianceMatrixInPlace(b, x) +} +func BenchmarkCovarianceMatrixHugexHugeInPlace(b *testing.B) { + x := randMat(HUGE, HUGE) + benchmarkCovarianceMatrixInPlace(b, x) +}*/ From 14b1ccae5f61b4ee5d97b99919b94aa06f379061 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 18 Nov 2014 19:24:36 -0500 Subject: [PATCH 18/38] impl weighted cov matrix and in place assignment --- covariancematrix.go | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 513dfafe..141e8c8a 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -13,12 +13,12 @@ import ( // algorithm. It requires a registered BLAS engine in gonum/matrix/mat64. // // The matrix returned will be symmetric, square, and positive-semidefinite. -func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { +func CovarianceMatrix(cov *mat64.Dense, x, wts mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. - r, _ := x.Dims() + r, c := x.Dims() // determine the mean of each of the columns ones := make([]float64, r) @@ -32,20 +32,46 @@ func CovarianceMatrix(x mat64.Matrix) *mat64.Dense { // subtract the mean from the data xc := mat64.DenseCopyOf(x) + for i := 0; i < r; i++ { rv := xc.RowView(i) for j, mean := range mu { rv[j] -= mean } } - var xt mat64.Dense xt.TCopy(xc) + // normalization factor, typical n-1 + var N float64 + if wts != nil { + // should this be col major or row major? + if wr, wc := wts.Dims(); wr != r || wc != 1 { + panic("matrix length mismatch") + } + + for i := 0; i < r; i++ { + rv := xc.RowView(i) + w := wts.At(i, 0) + N += w + for j := 0; j < c; j++ { + rv[j] *= w + } + } + N = 1 / (N - 1) + } else { + N = 1 / float64(r-1) + } + // TODO: indicate that the resulting matrix is symmetric, which // should improve performance. - var ss mat64.Dense - ss.Mul(&xt, xc) - ss.Scale(1/float64(r-1), &ss) - return &ss + if cov == nil { + cov = mat64.NewDense(c, c, nil) + } else if covr, covc := cov.Dims(); covr != covc || covc != c { + panic("matrix size mismatch") + } + + cov.Mul(&xt, xc) + cov.Scale(N, cov) + return cov } From 893b8756787e4c33e860c1ff7628d682f269279c Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 18 Nov 2014 20:14:21 -0500 Subject: [PATCH 19/38] expand on help text --- covariancematrix.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/covariancematrix.go b/covariancematrix.go index 141e8c8a..7d6c71ba 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -13,6 +13,11 @@ import ( // algorithm. It requires a registered BLAS engine in gonum/matrix/mat64. // // The matrix returned will be symmetric, square, and positive-semidefinite. +// +// The weights matrix wts should have the same number of rows as the input +// data matrix x. cov should be a square matrix with the same number of +// columns as the input data matrix x, or if it is nil then a new Dense +// matrix will be constructed. func CovarianceMatrix(cov *mat64.Dense, x, wts mat64.Matrix) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use From 3d936bfda0a3429a631c736fe18b156a7b57b72e Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 22 Nov 2014 16:36:18 -0500 Subject: [PATCH 20/38] add tests for covariancematrix panic --- covariancematrix_test.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 043ae3c6..0f11d75c 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -63,15 +63,22 @@ func TestCovarianceMatrix(t *testing.T) { } { c := CovarianceMatrix(nil, test.mat, test.weights).RawMatrix() if c.Rows != test.r { - t.Errorf("BLAS %d: expected rows %d, found %d", i, test.r, c.Rows) + t.Errorf("%d: expected rows %d, found %d", i, test.r, c.Rows) } if c.Cols != test.c { - t.Errorf("BLAS %d: expected cols %d, found %d", i, test.c, c.Cols) + t.Errorf("%d: expected cols %d, found %d", i, test.c, c.Cols) } if !floats.Equal(test.x, c.Data) { - t.Errorf("BLAS %d: expected data %#q, found %#q", i, test.x, c.Data) + t.Errorf("%d: expected data %#q, found %#q", i, test.x, c.Data) } } + if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.NewDense(1, 1, nil)) }) { + t.Errorf("CovarianceMatrix did not panic with weight size mismatch") + } + if !Panics(func() { CovarianceMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) { + t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch") + } + } // benchmarks From 86e8ba26e0e028c89c8c62424e3279bb93119d11 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 29 Nov 2014 14:49:28 -0500 Subject: [PATCH 21/38] change covariance matrix to accept weight vector mat64.Matrix would accept matrixes with more than one column. Enforcing size by using a Vec makes more sense. --- covariancematrix.go | 7 +++---- covariancematrix_test.go | 11 ++++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 7d6c71ba..18b27e58 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -18,7 +18,7 @@ import ( // data matrix x. cov should be a square matrix with the same number of // columns as the input data matrix x, or if it is nil then a new Dense // matrix will be constructed. -func CovarianceMatrix(cov *mat64.Dense, x, wts mat64.Matrix) *mat64.Dense { +func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts mat64.Vec) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. @@ -50,9 +50,8 @@ func CovarianceMatrix(cov *mat64.Dense, x, wts mat64.Matrix) *mat64.Dense { // normalization factor, typical n-1 var N float64 if wts != nil { - // should this be col major or row major? - if wr, wc := wts.Dims(); wr != r || wc != 1 { - panic("matrix length mismatch") + if wr, _ := wts.Dims(); wr != r { + panic("weight vector length mismatch") } for i := 0; i < r; i++ { diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 0f11d75c..6b6ac700 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -19,9 +19,10 @@ func init() { func TestCovarianceMatrix(t *testing.T) { for i, test := range []struct { - mat, weights mat64.Matrix - r, c int - x []float64 + mat mat64.Matrix + weights mat64.Vec + r, c int + x []float64 }{ { mat: mat64.NewDense(5, 2, []float64{ @@ -46,7 +47,7 @@ func TestCovarianceMatrix(t *testing.T) { 1, -2, 2, 4, }), - weights: mat64.NewDense(5, 1, []float64{ + weights: mat64.Vec([]float64{ 1.5, .5, 1.5, @@ -72,7 +73,7 @@ func TestCovarianceMatrix(t *testing.T) { t.Errorf("%d: expected data %#q, found %#q", i, test.x, c.Data) } } - if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.NewDense(1, 1, nil)) }) { + if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.Vec([]float64{})) }) { t.Errorf("CovarianceMatrix did not panic with weight size mismatch") } if !Panics(func() { CovarianceMatrix(mat64.NewDense(1, 1, nil), mat64.NewDense(5, 2, nil), nil) }) { From 2121128140e6f370b73cc555e8903fc297b1d17d Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sat, 29 Nov 2014 15:33:07 -0500 Subject: [PATCH 22/38] convert At call to range wts is a []float64, which means we can range over it, which will be faster than calling At in a loop. --- covariancematrix.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 18b27e58..3100859c 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -54,9 +54,8 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts mat64.Vec) *mat64.De panic("weight vector length mismatch") } - for i := 0; i < r; i++ { + for i, w := range wts { rv := xc.RowView(i) - w := wts.At(i, 0) N += w for j := 0; j < c; j++ { rv[j] *= w From 0d3fcca1ebe998c995e0feb139bfe23e9c74b58f Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 2 Dec 2014 20:47:56 -0500 Subject: [PATCH 23/38] change mat64.Vec to []float64 --- covariancematrix.go | 4 ++-- covariancematrix_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 3100859c..f2ebbcb3 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -18,7 +18,7 @@ import ( // data matrix x. cov should be a square matrix with the same number of // columns as the input data matrix x, or if it is nil then a new Dense // matrix will be constructed. -func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts mat64.Vec) *mat64.Dense { +func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // matrix version of the two pass algorithm. This doesn't use // the correction found in the Covariance and Variance functions. @@ -50,7 +50,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts mat64.Vec) *mat64.De // normalization factor, typical n-1 var N float64 if wts != nil { - if wr, _ := wts.Dims(); wr != r { + if wr := len(wts); wr != r { panic("weight vector length mismatch") } diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 6b6ac700..67c13073 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -47,13 +47,13 @@ func TestCovarianceMatrix(t *testing.T) { 1, -2, 2, 4, }), - weights: mat64.Vec([]float64{ + weights: []float64{ 1.5, .5, 1.5, .5, 1, - }), + }, r: 2, c: 2, x: []float64{ From bce5c81adc0df2ebf4c2f3f1f606e75d85360e38 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 2 Dec 2014 21:08:47 -0500 Subject: [PATCH 24/38] update doc, add test for mat64.Vec --- covariancematrix.go | 4 ++-- covariancematrix_test.go | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index f2ebbcb3..455f75d7 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -14,8 +14,8 @@ import ( // // The matrix returned will be symmetric, square, and positive-semidefinite. // -// The weights matrix wts should have the same number of rows as the input -// data matrix x. cov should be a square matrix with the same number of +// The weights wts should have the same number of elements as the rows in +// input data matrix x. cov should be a square matrix with the same number of // columns as the input data matrix x, or if it is nil then a new Dense // matrix will be constructed. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 67c13073..4c806b24 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -60,6 +60,27 @@ func TestCovarianceMatrix(t *testing.T) { 2.75, 4.5, 4.5, 11, }, + }, { + mat: mat64.NewDense(5, 2, []float64{ + -2, -4, + -1, 2, + 0, 0, + 1, -2, + 2, 4, + }), + weights: mat64.Vec([]float64{ + 1.5, + .5, + 1.5, + .5, + 1, + }), + r: 2, + c: 2, + x: []float64{ + 2.75, 4.5, + 4.5, 11, + }, }, } { c := CovarianceMatrix(nil, test.mat, test.weights).RawMatrix() From 0e425f0d5514b9a6bc8d8b704c01676dfc01cd19 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sun, 21 Dec 2014 18:54:44 -0500 Subject: [PATCH 25/38] Improve comment on what algorithm is used --- covariancematrix.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 455f75d7..0cbd03a7 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -19,10 +19,8 @@ import ( // columns as the input data matrix x, or if it is nil then a new Dense // matrix will be constructed. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { - - // matrix version of the two pass algorithm. This doesn't use + // This is the matrix version of the two-pass algorithm. It doesn't use // the correction found in the Covariance and Variance functions. - r, c := x.Dims() // determine the mean of each of the columns From 831ced4cf71bd8c0a59db0de4fd6b62a77a57e0f Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sun, 21 Dec 2014 18:55:52 -0500 Subject: [PATCH 26/38] Improve comment on normalization factor. --- covariancematrix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covariancematrix.go b/covariancematrix.go index 0cbd03a7..e8f4370e 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -45,7 +45,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De var xt mat64.Dense xt.TCopy(xc) - // normalization factor, typical n-1 + // Calculate the normalization factor, which is typically N-1. var N float64 if wts != nil { if wr := len(wts); wr != r { From 7aa58b6a9432c78cd52a68be6c06c17eeab6d54d Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sun, 21 Dec 2014 18:57:00 -0500 Subject: [PATCH 27/38] remove unused benchmark functions --- covariancematrix_test.go | 92 ---------------------------------------- 1 file changed, 92 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 4c806b24..9af15e28 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -139,15 +139,6 @@ func BenchmarkCovarianceMatrixSmallxMedium(b *testing.B) { benchmarkCovarianceMatrix(b, x) } -/*func BenchmarkCovarianceMatrixSmallxLarge(b *testing.B) { - x := randMat(SMALL, LARGE) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixSmallxHuge(b *testing.B) { - x := randMat(SMALL, HUGE) - benchmarkCovarianceMatrix(b, x) -}*/ - func BenchmarkCovarianceMatrixMediumxSmall(b *testing.B) { // 1000 * 10 elements x := randMat(MEDIUM, SMALL) @@ -159,55 +150,18 @@ func BenchmarkCovarianceMatrixMediumxMedium(b *testing.B) { benchmarkCovarianceMatrix(b, x) } -/*func BenchmarkCovarianceMatrixMediumxLarge(b *testing.B) { - x := randMat(MEDIUM, LARGE) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixMediumxHuge(b *testing.B) { - x := randMat(MEDIUM, HUGE) - benchmarkCovarianceMatrix(b, x) -}*/ - func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) { // 1e5 * 10 elements x := randMat(LARGE, SMALL) benchmarkCovarianceMatrix(b, x) } -/*func BenchmarkCovarianceMatrixLargexMedium(b *testing.B) { - // 1e5 * 1000 elements - x := randMat(LARGE, MEDIUM) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixLargexLarge(b *testing.B) { - x := randMat(LARGE, LARGE) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixLargexHuge(b *testing.B) { - x := randMat(LARGE, HUGE) - benchmarkCovarianceMatrix(b, x) -}*/ - func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) { // 1e7 * 10 elements x := randMat(HUGE, SMALL) benchmarkCovarianceMatrix(b, x) } -/*func BenchmarkCovarianceMatrixHugexMedium(b *testing.B) { - // 1e7 * 1000 elements - x := randMat(HUGE, MEDIUM) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixHugexLarge(b *testing.B) { - x := randMat(HUGE, LARGE) - benchmarkCovarianceMatrix(b, x) -} -func BenchmarkCovarianceMatrixHugexHuge(b *testing.B) { - x := randMat(HUGE, HUGE) - benchmarkCovarianceMatrix(b, x) -}*/ - func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) { // 10 * 10 elements x := randMat(SMALL, SMALL) @@ -219,15 +173,6 @@ func BenchmarkCovarianceMatrixSmallxMediumInPlace(b *testing.B) { benchmarkCovarianceMatrixInPlace(b, x) } -/*func BenchmarkCovarianceMatrixSmallxLargeInPlace(b *testing.B) { - x := randMat(SMALL, LARGE) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixSmallxHugeInPlace(b *testing.B) { - x := randMat(SMALL, HUGE) - benchmarkCovarianceMatrixInPlace(b, x) -}*/ - func BenchmarkCovarianceMatrixMediumxSmallInPlace(b *testing.B) { // 1000 * 10 elements x := randMat(MEDIUM, SMALL) @@ -239,51 +184,14 @@ func BenchmarkCovarianceMatrixMediumxMediumInPlace(b *testing.B) { benchmarkCovarianceMatrixInPlace(b, x) } -/*func BenchmarkCovarianceMatrixMediumxLargeInPlace(b *testing.B) { - x := randMat(MEDIUM, LARGE) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixMediumxHugeInPlace(b *testing.B) { - x := randMat(MEDIUM, HUGE) - benchmarkCovarianceMatrixInPlace(b, x) -}*/ - func BenchmarkCovarianceMatrixLargexSmallInPlace(b *testing.B) { // 1e5 * 10 elements x := randMat(LARGE, SMALL) benchmarkCovarianceMatrixInPlace(b, x) } -/*func BenchmarkCovarianceMatrixLargexMediumInPlace(b *testing.B) { - // 1e5 * 1000 elements - x := randMat(LARGE, MEDIUM) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixLargexLargeInPlace(b *testing.B) { - x := randMat(LARGE, LARGE) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixLargexHugeInPlace(b *testing.B) { - x := randMat(LARGE, HUGE) - benchmarkCovarianceMatrixInPlace(b, x) -}*/ - func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) { // 1e7 * 10 elements x := randMat(HUGE, SMALL) benchmarkCovarianceMatrixInPlace(b, x) } - -/*func BenchmarkCovarianceMatrixHugexMediumInPlace(b *testing.B) { - // 1e7 * 1000 elements - x := randMat(HUGE, MEDIUM) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixHugexLargeInPlace(b *testing.B) { - x := randMat(HUGE, LARGE) - benchmarkCovarianceMatrixInPlace(b, x) -} -func BenchmarkCovarianceMatrixHugexHugeInPlace(b *testing.B) { - x := randMat(HUGE, HUGE) - benchmarkCovarianceMatrixInPlace(b, x) -}*/ From 00773a9c4fada2982e92a625435f2b9a1205e2f6 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sun, 21 Dec 2014 18:57:58 -0500 Subject: [PATCH 28/38] change panic to use mat64.ErrShape --- covariancematrix.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index e8f4370e..fecef9da 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -49,7 +49,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De var N float64 if wts != nil { if wr := len(wts); wr != r { - panic("weight vector length mismatch") + panic(mat64.ErrShape) } for i, w := range wts { @@ -69,7 +69,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De if cov == nil { cov = mat64.NewDense(c, c, nil) } else if covr, covc := cov.Dims(); covr != covc || covc != c { - panic("matrix size mismatch") + panic(mat64.ErrShape) } cov.Mul(&xt, xc) From f757c287d459bedd23961ec410e7f4b857df0946 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Sun, 21 Dec 2014 19:07:01 -0500 Subject: [PATCH 29/38] change benchmark constant names to lowercase Changed SMALL, MEDIUM, LARGE, and HUGE to small, medum, large, and huge, respectively. --- covariancematrix_test.go | 24 ++-- moments_bench_test.go | 304 +++++++++++++++++++-------------------- 2 files changed, 164 insertions(+), 164 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 9af15e28..cc42939b 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -130,68 +130,68 @@ func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) { func BenchmarkCovarianceMatrixSmallxSmall(b *testing.B) { // 10 * 10 elements - x := randMat(SMALL, SMALL) + x := randMat(small, small) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixSmallxMedium(b *testing.B) { // 10 * 1000 elements - x := randMat(SMALL, MEDIUM) + x := randMat(small, medium) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixMediumxSmall(b *testing.B) { // 1000 * 10 elements - x := randMat(MEDIUM, SMALL) + x := randMat(medium, small) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixMediumxMedium(b *testing.B) { // 1000 * 1000 elements - x := randMat(MEDIUM, MEDIUM) + x := randMat(medium, medium) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) { // 1e5 * 10 elements - x := randMat(LARGE, SMALL) + x := randMat(large, small) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) { // 1e7 * 10 elements - x := randMat(HUGE, SMALL) + x := randMat(huge, small) benchmarkCovarianceMatrix(b, x) } func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) { // 10 * 10 elements - x := randMat(SMALL, SMALL) + x := randMat(small, small) benchmarkCovarianceMatrixInPlace(b, x) } func BenchmarkCovarianceMatrixSmallxMediumInPlace(b *testing.B) { // 10 * 1000 elements - x := randMat(SMALL, MEDIUM) + x := randMat(small, medium) benchmarkCovarianceMatrixInPlace(b, x) } func BenchmarkCovarianceMatrixMediumxSmallInPlace(b *testing.B) { // 1000 * 10 elements - x := randMat(MEDIUM, SMALL) + x := randMat(medium, small) benchmarkCovarianceMatrixInPlace(b, x) } func BenchmarkCovarianceMatrixMediumxMediumInPlace(b *testing.B) { // 1000 * 1000 elements - x := randMat(MEDIUM, MEDIUM) + x := randMat(medium, medium) benchmarkCovarianceMatrixInPlace(b, x) } func BenchmarkCovarianceMatrixLargexSmallInPlace(b *testing.B) { // 1e5 * 10 elements - x := randMat(LARGE, SMALL) + x := randMat(large, small) benchmarkCovarianceMatrixInPlace(b, x) } func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) { // 1e7 * 10 elements - x := randMat(HUGE, SMALL) + x := randMat(huge, small) benchmarkCovarianceMatrixInPlace(b, x) } diff --git a/moments_bench_test.go b/moments_bench_test.go index 980bf352..6df41394 100644 --- a/moments_bench_test.go +++ b/moments_bench_test.go @@ -17,10 +17,10 @@ import ( ) const ( - SMALL = 10 - MEDIUM = 1000 - LARGE = 100000 - HUGE = 10000000 + small = 10 + medium = 1000 + large = 100000 + huge = 10000000 ) // tests for unweighted versions @@ -41,46 +41,46 @@ func benchmarkMean(b *testing.B, s, wts []float64) { } func BenchmarkMeanSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkMean(b, s, nil) } func BenchmarkMeanMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkMean(b, s, nil) } func BenchmarkMeanLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkMean(b, s, nil) } func BenchmarkMeanHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkMean(b, s, nil) } func BenchmarkMeanSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkMean(b, s, wts) } func BenchmarkMeanMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkMean(b, s, wts) } func BenchmarkMeanLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkMean(b, s, wts) } func BenchmarkMeanHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkMean(b, s, wts) } @@ -92,46 +92,46 @@ func benchmarkVariance(b *testing.B, s, wts []float64) { } func BenchmarkVarianceSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkVariance(b, s, nil) } func BenchmarkVarianceMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkVariance(b, s, nil) } func BenchmarkVarianceLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkVariance(b, s, nil) } func BenchmarkVarianceHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkVariance(b, s, nil) } func BenchmarkVarianceSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkVariance(b, s, wts) } func BenchmarkVarianceMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkVariance(b, s, wts) } func BenchmarkVarianceLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkVariance(b, s, wts) } func BenchmarkVarianceHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkVariance(b, s, wts) } @@ -143,46 +143,46 @@ func benchmarkStdDev(b *testing.B, s, wts []float64) { } func BenchmarkStdDevSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkStdDev(b, s, nil) } func BenchmarkStdDevMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkStdDev(b, s, nil) } func BenchmarkStdDevLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkStdDev(b, s, nil) } func BenchmarkStdDevHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkStdDev(b, s, nil) } func BenchmarkStdDevSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkStdDev(b, s, wts) } func BenchmarkStdDevMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkStdDev(b, s, wts) } func BenchmarkStdDevLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkStdDev(b, s, wts) } func BenchmarkStdDevHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkStdDev(b, s, wts) } @@ -194,46 +194,46 @@ func benchmarkMeanVariance(b *testing.B, s, wts []float64) { } func BenchmarkMeanVarianceSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkMeanVariance(b, s, nil) } func BenchmarkMeanVarianceMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkMeanVariance(b, s, nil) } func BenchmarkMeanVarianceLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkMeanVariance(b, s, nil) } func BenchmarkMeanVarianceHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkMeanVariance(b, s, nil) } func BenchmarkMeanVarianceSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkMeanVariance(b, s, wts) } func BenchmarkMeanVarianceMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkMeanVariance(b, s, wts) } func BenchmarkMeanVarianceLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkMeanVariance(b, s, wts) } func BenchmarkMeanVarianceHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkMeanVariance(b, s, wts) } @@ -245,46 +245,46 @@ func benchmarkMeanStdDev(b *testing.B, s, wts []float64) { } func BenchmarkMeanStdDevSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkMeanStdDev(b, s, nil) } func BenchmarkMeanStdDevMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkMeanStdDev(b, s, nil) } func BenchmarkMeanStdDevLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkMeanStdDev(b, s, nil) } func BenchmarkMeanStdDevHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkMeanStdDev(b, s, nil) } func BenchmarkMeanStdDevSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkMeanStdDev(b, s, wts) } func BenchmarkMeanStdDevMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkMeanStdDev(b, s, wts) } func BenchmarkMeanStdDevLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkMeanStdDev(b, s, wts) } func BenchmarkMeanStdDevHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkMeanStdDev(b, s, wts) } @@ -296,54 +296,54 @@ func benchmarkCovariance(b *testing.B, s1, s2, wts []float64) { } func BenchmarkCovarianceSmall(b *testing.B) { - s1 := RandomSlice(SMALL) - s2 := RandomSlice(SMALL) + s1 := RandomSlice(small) + s2 := RandomSlice(small) benchmarkCovariance(b, s1, s2, nil) } func BenchmarkCovarianceMedium(b *testing.B) { - s1 := RandomSlice(MEDIUM) - s2 := RandomSlice(MEDIUM) + s1 := RandomSlice(medium) + s2 := RandomSlice(medium) benchmarkCovariance(b, s1, s2, nil) } func BenchmarkCovarianceLarge(b *testing.B) { - s1 := RandomSlice(LARGE) - s2 := RandomSlice(LARGE) + s1 := RandomSlice(large) + s2 := RandomSlice(large) benchmarkCovariance(b, s1, s2, nil) } func BenchmarkCovarianceHuge(b *testing.B) { - s1 := RandomSlice(HUGE) - s2 := RandomSlice(HUGE) + s1 := RandomSlice(huge) + s2 := RandomSlice(huge) benchmarkCovariance(b, s1, s2, nil) } func BenchmarkCovarianceSmallWeighted(b *testing.B) { - s1 := RandomSlice(SMALL) - s2 := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s1 := RandomSlice(small) + s2 := RandomSlice(small) + wts := RandomSlice(small) benchmarkCovariance(b, s1, s2, wts) } func BenchmarkCovarianceMediumWeighted(b *testing.B) { - s1 := RandomSlice(MEDIUM) - s2 := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s1 := RandomSlice(medium) + s2 := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkCovariance(b, s1, s2, wts) } func BenchmarkCovarianceLargeWeighted(b *testing.B) { - s1 := RandomSlice(LARGE) - s2 := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s1 := RandomSlice(large) + s2 := RandomSlice(large) + wts := RandomSlice(large) benchmarkCovariance(b, s1, s2, wts) } func BenchmarkCovarianceHugeWeighted(b *testing.B) { - s1 := RandomSlice(HUGE) - s2 := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s1 := RandomSlice(huge) + s2 := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkCovariance(b, s1, s2, wts) } @@ -355,54 +355,54 @@ func benchmarkCorrelation(b *testing.B, s1, s2, wts []float64) { } func BenchmarkCorrelationSmall(b *testing.B) { - s1 := RandomSlice(SMALL) - s2 := RandomSlice(SMALL) + s1 := RandomSlice(small) + s2 := RandomSlice(small) benchmarkCorrelation(b, s1, s2, nil) } func BenchmarkCorrelationMedium(b *testing.B) { - s1 := RandomSlice(MEDIUM) - s2 := RandomSlice(MEDIUM) + s1 := RandomSlice(medium) + s2 := RandomSlice(medium) benchmarkCorrelation(b, s1, s2, nil) } func BenchmarkCorrelationLarge(b *testing.B) { - s1 := RandomSlice(LARGE) - s2 := RandomSlice(LARGE) + s1 := RandomSlice(large) + s2 := RandomSlice(large) benchmarkCorrelation(b, s1, s2, nil) } func BenchmarkCorrelationHuge(b *testing.B) { - s1 := RandomSlice(HUGE) - s2 := RandomSlice(HUGE) + s1 := RandomSlice(huge) + s2 := RandomSlice(huge) benchmarkCorrelation(b, s1, s2, nil) } func BenchmarkCorrelationSmallWeighted(b *testing.B) { - s1 := RandomSlice(SMALL) - s2 := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s1 := RandomSlice(small) + s2 := RandomSlice(small) + wts := RandomSlice(small) benchmarkCorrelation(b, s1, s2, wts) } func BenchmarkCorrelationMediumWeighted(b *testing.B) { - s1 := RandomSlice(MEDIUM) - s2 := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s1 := RandomSlice(medium) + s2 := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkCorrelation(b, s1, s2, wts) } func BenchmarkCorrelationLargeWeighted(b *testing.B) { - s1 := RandomSlice(LARGE) - s2 := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s1 := RandomSlice(large) + s2 := RandomSlice(large) + wts := RandomSlice(large) benchmarkCorrelation(b, s1, s2, wts) } func BenchmarkCorrelationHugeWeighted(b *testing.B) { - s1 := RandomSlice(HUGE) - s2 := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s1 := RandomSlice(huge) + s2 := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkCorrelation(b, s1, s2, wts) } @@ -414,46 +414,46 @@ func benchmarkSkew(b *testing.B, s, wts []float64) { } func BenchmarkSkewSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkSkew(b, s, nil) } func BenchmarkSkewMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkSkew(b, s, nil) } func BenchmarkSkewLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkSkew(b, s, nil) } func BenchmarkSkewHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkSkew(b, s, nil) } func BenchmarkSkewSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkSkew(b, s, wts) } func BenchmarkSkewMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkSkew(b, s, wts) } func BenchmarkSkewLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkSkew(b, s, wts) } func BenchmarkSkewHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkSkew(b, s, wts) } @@ -465,46 +465,46 @@ func benchmarkExKurtosis(b *testing.B, s, wts []float64) { } func BenchmarkExKurtosisSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkExKurtosis(b, s, nil) } func BenchmarkExKurtosisMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkExKurtosis(b, s, nil) } func BenchmarkExKurtosisLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkExKurtosis(b, s, nil) } func BenchmarkExKurtosisHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkExKurtosis(b, s, nil) } func BenchmarkExKurtosisSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkExKurtosis(b, s, wts) } func BenchmarkExKurtosisMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkExKurtosis(b, s, wts) } func BenchmarkExKurtosisLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkExKurtosis(b, s, wts) } func BenchmarkExKurtosisHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkExKurtosis(b, s, wts) } @@ -516,46 +516,46 @@ func benchmarkMoment(b *testing.B, n float64, s, wts []float64) { } func BenchmarkMomentSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkMoment(b, 5, s, nil) } func BenchmarkMomentMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkMoment(b, 5, s, nil) } func BenchmarkMomentLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkMoment(b, 5, s, nil) } func BenchmarkMomentHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkMoment(b, 5, s, nil) } func BenchmarkMomentSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkMoment(b, 5, s, wts) } func BenchmarkMomentMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkMoment(b, 5, s, wts) } func BenchmarkMomentLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkMoment(b, 5, s, wts) } func BenchmarkMomentHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkMoment(b, 5, s, wts) } @@ -567,45 +567,45 @@ func benchmarkMomentAbout(b *testing.B, n float64, s []float64, mean float64, wt } func BenchmarkMomentAboutSmall(b *testing.B) { - s := RandomSlice(SMALL) + s := RandomSlice(small) benchmarkMomentAbout(b, 5, s, 0, nil) } func BenchmarkMomentAboutMedium(b *testing.B) { - s := RandomSlice(MEDIUM) + s := RandomSlice(medium) benchmarkMomentAbout(b, 5, s, 0, nil) } func BenchmarkMomentAboutLarge(b *testing.B) { - s := RandomSlice(LARGE) + s := RandomSlice(large) benchmarkMomentAbout(b, 5, s, 0, nil) } func BenchmarkMomentAboutHuge(b *testing.B) { - s := RandomSlice(HUGE) + s := RandomSlice(huge) benchmarkMomentAbout(b, 5, s, 0, nil) } func BenchmarkMomentAboutSmallWeighted(b *testing.B) { - s := RandomSlice(SMALL) - wts := RandomSlice(SMALL) + s := RandomSlice(small) + wts := RandomSlice(small) benchmarkMomentAbout(b, 5, s, 0, wts) } func BenchmarkMomentAboutMediumWeighted(b *testing.B) { - s := RandomSlice(MEDIUM) - wts := RandomSlice(MEDIUM) + s := RandomSlice(medium) + wts := RandomSlice(medium) benchmarkMomentAbout(b, 5, s, 0, wts) } func BenchmarkMomentAboutLargeWeighted(b *testing.B) { - s := RandomSlice(LARGE) - wts := RandomSlice(LARGE) + s := RandomSlice(large) + wts := RandomSlice(large) benchmarkMomentAbout(b, 5, s, 0, wts) } func BenchmarkMomentAboutHugeWeighted(b *testing.B) { - s := RandomSlice(HUGE) - wts := RandomSlice(HUGE) + s := RandomSlice(huge) + wts := RandomSlice(huge) benchmarkMomentAbout(b, 5, s, 0, wts) } From d50d7fb38343479334a351943488d834d4a77d93 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 22 Dec 2014 19:58:09 -0500 Subject: [PATCH 30/38] correct weighted test, and make fixture clearer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed the names of the test table to be clearer. Also made the answer a matrix and used Dense.Equals instead of performing the comparison on r, c, and the raw data. Also, the old results for the weighted case were wrong. I’ve added the same items into the stat_test.go file. --- covariancematrix_test.go | 70 +++++++++++----------------------------- stat_test.go | 10 ++++++ 2 files changed, 28 insertions(+), 52 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index cc42939b..67ca298d 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -9,7 +9,6 @@ import ( "testing" "github.com/gonum/blas/goblas" - "github.com/gonum/floats" "github.com/gonum/matrix/mat64" ) @@ -18,14 +17,16 @@ func init() { } func TestCovarianceMatrix(t *testing.T) { + + // An alternate way to test this is to call the Variance + // and Covariance functions and ensure that the results are identical. for i, test := range []struct { - mat mat64.Matrix + data mat64.Matrix weights mat64.Vec - r, c int - x []float64 + ans mat64.Matrix }{ { - mat: mat64.NewDense(5, 2, []float64{ + data: mat64.NewDense(5, 2, []float64{ -2, -4, -1, 2, 0, 0, @@ -33,65 +34,30 @@ func TestCovarianceMatrix(t *testing.T) { 2, 4, }), weights: nil, - r: 2, - c: 2, - x: []float64{ + ans: mat64.NewDense(2, 2, []float64{ 2.5, 3, 3, 10, - }, + }), }, { - mat: mat64.NewDense(5, 2, []float64{ - -2, -4, - -1, 2, - 0, 0, - 1, -2, + data: mat64.NewDense(3, 2, []float64{ + 1, 1, 2, 4, + 3, 9, }), weights: []float64{ + 1, 1.5, - .5, - 1.5, - .5, 1, }, - r: 2, - c: 2, - x: []float64{ - 2.75, 4.5, - 4.5, 11, - }, - }, { - mat: mat64.NewDense(5, 2, []float64{ - -2, -4, - -1, 2, - 0, 0, - 1, -2, - 2, 4, + ans: mat64.NewDense(2, 2, []float64{ + .8, 3.2, + 3.2, 13.142857142857146, }), - weights: mat64.Vec([]float64{ - 1.5, - .5, - 1.5, - .5, - 1, - }), - r: 2, - c: 2, - x: []float64{ - 2.75, 4.5, - 4.5, 11, - }, }, } { - c := CovarianceMatrix(nil, test.mat, test.weights).RawMatrix() - if c.Rows != test.r { - t.Errorf("%d: expected rows %d, found %d", i, test.r, c.Rows) - } - if c.Cols != test.c { - t.Errorf("%d: expected cols %d, found %d", i, test.c, c.Cols) - } - if !floats.Equal(test.x, c.Data) { - t.Errorf("%d: expected data %#q, found %#q", i, test.x, c.Data) + c := CovarianceMatrix(nil, test.data, test.weights) + if !c.Equals(test.ans) { + t.Errorf("%d: expected cov %v, found %v", i, test.ans, c) } } if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.Vec([]float64{})) }) { diff --git a/stat_test.go b/stat_test.go index c646aad9..a0ee39fa 100644 --- a/stat_test.go +++ b/stat_test.go @@ -1256,6 +1256,16 @@ func TestVariance(t *testing.T) { weights: []float64{2, 1, 2, 1, 1}, ans: 4.2857142857142865, }, + { + x: []float64{1, 4, 9}, + weights: []float64{1, 1.5, 1}, + ans: 13.142857142857146, + }, + { + x: []float64{1, 2, 3}, + weights: []float64{1, 1.5, 1}, + ans: .8, + }, } { variance := Variance(test.x, test.weights) if math.Abs(variance-test.ans) > 1e-14 { From 7357da958de2c93000c513f972b1ade359c5608f Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 22 Dec 2014 22:04:07 -0500 Subject: [PATCH 31/38] Change to simpler impl By taking advantage of some gonum functions, it is possible to remove much of the code related to centering the columns. Also tidied up many of the comments. --- covariancematrix.go | 95 +++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index fecef9da..8f712534 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -5,66 +5,25 @@ package stat import ( + "github.com/gonum/floats" "github.com/gonum/matrix/mat64" ) // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass -// algorithm. It requires a registered BLAS engine in gonum/matrix/mat64. +// algorithm. The matrix returned will be symmetric, square, and +// positive-semidefinite. // -// The matrix returned will be symmetric, square, and positive-semidefinite. -// -// The weights wts should have the same number of elements as the rows in -// input data matrix x. cov should be a square matrix with the same number of -// columns as the input data matrix x, or if it is nil then a new Dense -// matrix will be constructed. +// The weights wts should have the length equal to the number of rows in +// input data matrix x. cov should either be a square matrix with the same +// number of columns as the input data matrix x, or nil in which case a new +// Dense matrix will be constructed. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { // This is the matrix version of the two-pass algorithm. It doesn't use // the correction found in the Covariance and Variance functions. r, c := x.Dims() - // determine the mean of each of the columns - ones := make([]float64, r) - for i := range ones { - ones[i] = 1 - } - b := mat64.NewDense(1, r, ones) - b.Mul(b, x) - b.Scale(1/float64(r), b) - mu := b.RowView(0) - - // subtract the mean from the data - xc := mat64.DenseCopyOf(x) - - for i := 0; i < r; i++ { - rv := xc.RowView(i) - for j, mean := range mu { - rv[j] -= mean - } - } - var xt mat64.Dense - xt.TCopy(xc) - - // Calculate the normalization factor, which is typically N-1. - var N float64 - if wts != nil { - if wr := len(wts); wr != r { - panic(mat64.ErrShape) - } - - for i, w := range wts { - rv := xc.RowView(i) - N += w - for j := 0; j < c; j++ { - rv[j] *= w - } - } - N = 1 / (N - 1) - } else { - N = 1 / float64(r-1) - } - - // TODO: indicate that the resulting matrix is symmetric, which + // TODO(jonlawlor): indicate that the resulting matrix is symmetric, which // should improve performance. if cov == nil { cov = mat64.NewDense(c, c, nil) @@ -72,7 +31,41 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De panic(mat64.ErrShape) } - cov.Mul(&xt, xc) - cov.Scale(N, cov) + var xt mat64.Dense + xt.TCopy(x) + // Subtract the mean of each of the columns. + for i := 0; i < c; i++ { + v := xt.RowView(i) + mean := Mean(v, wts) + floats.AddConst(-mean, v) + } + + var xc mat64.Dense + xc.TCopy(&xt) + + var N, scale float64 + if wts != nil { + if wr := len(wts); wr != r { + panic(mat64.ErrShape) + } + + // Weight the rows. + for i := 0; i < c; i++ { + v := xt.RowView(i) + floats.Mul(v, wts) + } + + // Calculate the normalization factor. + N = floats.Sum(wts) + } else { + N = float64(r) + } + + cov.Mul(&xt, &xc) + + // Scale by the sample size. + scale = 1 / (N - 1) + cov.Scale(scale, cov) + return cov } From c540265584f99905c6e1ca2cc06747f9daec7546 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Mon, 22 Dec 2014 22:04:23 -0500 Subject: [PATCH 32/38] go fmt --- covariancematrix.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 8f712534..d6b70e91 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -11,7 +11,7 @@ import ( // CovarianceMatrix calculates a covariance matrix (also known as a // variance-covariance matrix) from a matrix of data, using a two-pass -// algorithm. The matrix returned will be symmetric, square, and +// algorithm. The matrix returned will be symmetric, square, and // positive-semidefinite. // // The weights wts should have the length equal to the number of rows in @@ -39,16 +39,16 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De mean := Mean(v, wts) floats.AddConst(-mean, v) } - + var xc mat64.Dense xc.TCopy(&xt) - + var N, scale float64 if wts != nil { if wr := len(wts); wr != r { panic(mat64.ErrShape) } - + // Weight the rows. for i := 0; i < c; i++ { v := xt.RowView(i) From 10947b5d63556b39209fa4d4f588b3eb4566f585 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 23 Dec 2014 20:15:00 -0500 Subject: [PATCH 33/38] expand algo comment --- covariancematrix.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index d6b70e91..b55f0b5c 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -19,8 +19,10 @@ import ( // number of columns as the input data matrix x, or nil in which case a new // Dense matrix will be constructed. func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.Dense { - // This is the matrix version of the two-pass algorithm. It doesn't use - // the correction found in the Covariance and Variance functions. + // This is the matrix version of the two-pass algorithm. It doesn't use the + // additional floating point error correction that the Covariance function uses + // to reduce the impact of rounding during centering. + r, c := x.Dims() // TODO(jonlawlor): indicate that the resulting matrix is symmetric, which From 93f33faa81a56bcd8e264d4c5835c990f9b85b59 Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 23 Dec 2014 21:03:12 -0500 Subject: [PATCH 34/38] add tests for immutability --- covariancematrix.go | 2 +- covariancematrix_test.go | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index b55f0b5c..335d2930 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -22,7 +22,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De // This is the matrix version of the two-pass algorithm. It doesn't use the // additional floating point error correction that the Covariance function uses // to reduce the impact of rounding during centering. - + r, c := x.Dims() // TODO(jonlawlor): indicate that the resulting matrix is symmetric, which diff --git a/covariancematrix_test.go b/covariancematrix_test.go index 67ca298d..e193961d 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/gonum/blas/goblas" + "github.com/gonum/floats" "github.com/gonum/matrix/mat64" ) @@ -21,9 +22,9 @@ func TestCovarianceMatrix(t *testing.T) { // An alternate way to test this is to call the Variance // and Covariance functions and ensure that the results are identical. for i, test := range []struct { - data mat64.Matrix + data *mat64.Dense weights mat64.Vec - ans mat64.Matrix + ans *mat64.Dense }{ { data: mat64.NewDense(5, 2, []float64{ @@ -55,10 +56,26 @@ func TestCovarianceMatrix(t *testing.T) { }), }, } { + // Make a copy of the data to check that it isn't changing. + r := test.data.RawMatrix() + d := make([]float64, len(r.Data)) + copy(d, r.Data) + + w := make([]float64, len(test.weights)) + if test.weights != nil { + copy(w, test.weights) + } c := CovarianceMatrix(nil, test.data, test.weights) if !c.Equals(test.ans) { t.Errorf("%d: expected cov %v, found %v", i, test.ans, c) } + if !floats.Equal(d, r.Data) { + t.Errorf("%d: data was modified during execution") + } + if !floats.Equal(w, test.weights) { + t.Errorf("%d: weights was modified during execution") + } + } if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.Vec([]float64{})) }) { t.Errorf("CovarianceMatrix did not panic with weight size mismatch") From fb2fe6268d3d2da0e1fb44f2f854859e16fc5c9c Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 23 Dec 2014 21:41:15 -0500 Subject: [PATCH 35/38] fix weighted covariance implementation Weighted covariance accidentally used squared weights. Added a test case and fixed implementation. --- stat.go | 6 +++--- stat_test.go | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/stat.go b/stat.go index ee2ad0b6..387d29c8 100644 --- a/stat.go +++ b/stat.go @@ -236,10 +236,10 @@ func Covariance(x, y, weights []float64) float64 { w := weights[i] yv := y[i] wxd := w * (xv - xu) - wyd := w * (yv - yu) - ss += wxd * wyd + yd := (yv - yu) + ss += wxd * yd xcompensation += wxd - ycompensation += wyd + ycompensation += w * yd sumWeights += w } // xcompensation and ycompensation are from Chan, et. al. diff --git a/stat_test.go b/stat_test.go index a0ee39fa..51e9a54c 100644 --- a/stat_test.go +++ b/stat_test.go @@ -128,6 +128,12 @@ func TestCovariance(t *testing.T) { weights: []float64{1, 1.5, 1}, ans: 3.2, }, + { + p: []float64{1, 4, 9}, + q: []float64{1, 4, 9}, + weights: []float64{1, 1.5, 1}, + ans: 13.142857142857146, + }, } { c := Covariance(test.p, test.q, test.weights) if math.Abs(c-test.ans) > 1e-14 { From 0328ead3be4f65307796a822683f780db68168ed Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 23 Dec 2014 21:43:26 -0500 Subject: [PATCH 36/38] Test comparison between CovarianceMatrix and Covariance Added consistency check. Surprise! It turns out that the Covariance implementation was wrong in the weighted case, sometimes. --- covariancematrix_test.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index e193961d..e1f05f9d 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -5,8 +5,10 @@ package stat import ( + "math" "math/rand" "testing" + "fmt" "github.com/gonum/blas/goblas" "github.com/gonum/floats" @@ -76,6 +78,22 @@ func TestCovarianceMatrix(t *testing.T) { t.Errorf("%d: weights was modified during execution") } + // compare with call to Covariance + _, cols := c.Dims() + for ci := 0; ci < cols; ci++ { + for cj := 0; cj < cols; cj++ { + x := test.data.Col(nil, ci) + y := test.data.Col(nil, cj) + cov := Covariance(x, y, test.weights) + if math.Abs(cov-c.At(ci, cj)) > 1e-14 { + fmt.Println(x) + fmt.Println(y) + fmt.Println(test.weights) + t.Errorf("CovMat does not match at (%v, %v). Want %v, got %v.", ci, cj, cov, c.At(ci, cj)) + } + } + } + } if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), mat64.Vec([]float64{})) }) { t.Errorf("CovarianceMatrix did not panic with weight size mismatch") From 37b73e7875782098a104f7082c0e357a6939993b Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Tue, 23 Dec 2014 21:44:20 -0500 Subject: [PATCH 37/38] go fmt, and remove test logging --- covariancematrix_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/covariancematrix_test.go b/covariancematrix_test.go index e1f05f9d..63cef50a 100644 --- a/covariancematrix_test.go +++ b/covariancematrix_test.go @@ -8,7 +8,6 @@ import ( "math" "math/rand" "testing" - "fmt" "github.com/gonum/blas/goblas" "github.com/gonum/floats" @@ -86,9 +85,6 @@ func TestCovarianceMatrix(t *testing.T) { y := test.data.Col(nil, cj) cov := Covariance(x, y, test.weights) if math.Abs(cov-c.At(ci, cj)) > 1e-14 { - fmt.Println(x) - fmt.Println(y) - fmt.Println(test.weights) t.Errorf("CovMat does not match at (%v, %v). Want %v, got %v.", ci, cj, cov, c.At(ci, cj)) } } From 07dd9008038785b68d8599aa993f04bf6675b90e Mon Sep 17 00:00:00 2001 From: Jonathan J Lawlor Date: Wed, 24 Dec 2014 15:22:21 -0500 Subject: [PATCH 38/38] change the name of the N variable to n --- covariancematrix.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/covariancematrix.go b/covariancematrix.go index 335d2930..06515ed6 100644 --- a/covariancematrix.go +++ b/covariancematrix.go @@ -45,7 +45,7 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De var xc mat64.Dense xc.TCopy(&xt) - var N, scale float64 + var n, scale float64 if wts != nil { if wr := len(wts); wr != r { panic(mat64.ErrShape) @@ -58,15 +58,15 @@ func CovarianceMatrix(cov *mat64.Dense, x mat64.Matrix, wts []float64) *mat64.De } // Calculate the normalization factor. - N = floats.Sum(wts) + n = floats.Sum(wts) } else { - N = float64(r) + n = float64(r) } cov.Mul(&xt, &xc) // Scale by the sample size. - scale = 1 / (N - 1) + scale = 1 / (n - 1) cov.Scale(scale, cov) return cov