From 11453e6b050e453bbd00cccb5caab75ba96d54d4 Mon Sep 17 00:00:00 2001 From: kortschak Date: Fri, 2 Jun 2017 12:59:30 +0930 Subject: [PATCH] stat/spatial: new package for spatial statistic measures --- stat/spatial/spatial.go | 132 ++++++++++++++ stat/spatial/spatial_areal_example_test.go | 74 ++++++++ stat/spatial/spatial_example_test.go | 75 ++++++++ stat/spatial/spatial_test.go | 197 +++++++++++++++++++++ 4 files changed, 478 insertions(+) create mode 100644 stat/spatial/spatial.go create mode 100644 stat/spatial/spatial_areal_example_test.go create mode 100644 stat/spatial/spatial_example_test.go create mode 100644 stat/spatial/spatial_test.go diff --git a/stat/spatial/spatial.go b/stat/spatial/spatial.go new file mode 100644 index 00000000..2d244207 --- /dev/null +++ b/stat/spatial/spatial.go @@ -0,0 +1,132 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package spatial // import "gonum.org/v1/gonum/stat/spatial" + +import ( + "math" + + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat" +) + +// TODO(kortschak): Implement weighted routines. +// TODO(kortschak): Make use of banded matrices when they exist in mat. + +// GetisOrdGStar returns the Local Getis-Ord G*i statistic for element of of the +// weighted data using the provided locality matrix. The returned value is a z-score. +// +// G^*_i = num_i / den_i +// +// num_i = \sum_j (w_{ij} x_j) - \bar X \sum_j w_{ij} +// den_i = S \sqrt(((n \sum_j w_{ij}^2 - (\sum_j w_{ij})^2))/(n - 1)) +// \bar X = (\sum_j x_j) / n +// S = \sqrt((\sum_j x_j^2)/n - (\bar X)^2) +// +// GetisOrdGStar will panic if locality is not a square matrix with dimensions the +// same as the length of data or if i is not a valid index into data. +// +// See doi.org/10.1111%2Fj.1538-4632.1995.tb00912.x. +// +// Weighted Getis-Ord G*i is not currently implemented and GetisOrdGStar will +// panic if weights is not nil. +func GetisOrdGStar(i int, data, weights []float64, locality mat.Matrix) float64 { + if weights != nil { + panic("spatial: weighted data not yet implemented") + } + r, c := locality.Dims() + if r != len(data) || c != len(data) { + panic("spatial: data length mismatch") + } + + n := float64(len(data)) + mean, std := stat.MeanStdDev(data, weights) + var dwd, dww, sw float64 + for j, v := range data { + w := locality.At(i, j) + sw += w + dwd += w * v + dww += w * w + } + s := std * math.Sqrt((n-1)/n) + + return (dwd - mean*sw) / (s * math.Sqrt((n*dww-sw*sw)/(n-1))) +} + +// GlobalMoransI performs Global Moran's I calculation of spatial autocorrelation +// for the given data using the provided locality matrix. GlobalMoransI returns +// Moran's I, Var(I) and the z-score associated with those values. +// GlobalMoransI will panic if locality is not a square matrix with dimensions the +// same as the length of data. +// +// See https://doi.org/10.1111%2Fj.1538-4632.2007.00708.x. +// +// Weighted Global Moran's I is not currently implemented and GlobalMoransI will +// panic if weights is not nil. +func GlobalMoransI(data, weights []float64, locality mat.Matrix) (i, v, z float64) { + if weights != nil { + panic("spatial: weighted data not yet implemented") + } + if r, c := locality.Dims(); r != len(data) || c != len(data) { + panic("spatial: data length mismatch") + } + mean := stat.Mean(data, nil) + + // Calculate Moran's I for the data. + var num, den, sum float64 + for i, xi := range data { + zi := xi - mean + den += zi * zi + for j, xj := range data { + w := locality.At(i, j) + sum += w + zj := xj - mean + num += w * zi * zj + } + } + i = (float64(len(data)) / sum) * (num / den) + + // Calculate Moran's E(I) for the data. + e := -1 / float64(len(data)-1) + + // Calculate Moran's Var(I) for the data. + // http://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-how-spatial-autocorrelation-moran-s-i-spatial-st.htm + // http://pro.arcgis.com/en/pro-app/tool-reference/spatial-statistics/h-global-morans-i-additional-math.htm + var s0, s1, s2 float64 + var var2, var4 float64 + for i, v := range data { + v -= mean + v *= v + var2 += v + var4 += v * v + + var p2 float64 + for j := range data { + wij := locality.At(i, j) + wji := locality.At(j, i) + + s0 += wij + + v := wij + wji + s1 += v * v + + p2 += v + } + s2 += p2 * p2 + } + s1 *= 0.5 + + n := float64(len(data)) + a := n * ((n*n-3*n+3)*s1 - n*s2 + 3*s0*s0) + c := (n - 1) * (n - 2) * (n - 3) * s0 * s0 + d := var4 / (var2 * var2) + b := d * ((n*n-n)*s1 - 2*n*s2 + 6*s0*s0) + + v = (a-b)/c - e*e + + // Calculate z-score associated with Moran's I for the data. + z = (i - e) / math.Sqrt(v) + + return i, v, z +} diff --git a/stat/spatial/spatial_areal_example_test.go b/stat/spatial/spatial_areal_example_test.go new file mode 100644 index 00000000..1f849292 --- /dev/null +++ b/stat/spatial/spatial_areal_example_test.go @@ -0,0 +1,74 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package spatial_test + +import ( + "fmt" + "math" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat/spatial" +) + +// Euclid is a mat.Matrix whose elements refects the Euclidean +// distance between a series of unit-separated points strided +// to be arranged in an x by y grid. +type Euclid struct{ x, y int } + +func (e Euclid) Dims() (r, c int) { return e.x * e.y, e.x * e.y } +func (e Euclid) At(i, j int) float64 { + d := e.x * e.y + if i < 0 || d <= i || j < 0 || d <= j { + panic("bounds error") + } + if i == j { + return 0 + } + x := float64(j%e.x - i%e.x) + y := float64(j/e.x - i/e.x) + return 1 / math.Hypot(x, y) +} +func (e Euclid) T() mat.Matrix { return mat.Transpose{e} } + +func ExampleGlobalMoransI_areal() { + locality := Euclid{10, 10} + + data1 := []float64{ + 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, + 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, + 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, + } + i1, _, z1 := spatial.GlobalMoransI(data1, nil, locality) + + data2 := []float64{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + } + i2, _, z2 := spatial.GlobalMoransI(data2, nil, locality) + + fmt.Printf("%v scattered points Moran's I=%.4v z-score=%.4v\n", floats.Sum(data1), i1, z1) + fmt.Printf("%v clustered points Moran's I=%.4v z-score=%.4v\n", floats.Sum(data2), i2, z2) + + // Output: + // + // 24 scattered points Moran's I=-0.02999 z-score=-1.913 + // 24 clustered points Moran's I=0.09922 z-score=10.52 +} diff --git a/stat/spatial/spatial_example_test.go b/stat/spatial/spatial_example_test.go new file mode 100644 index 00000000..4a3f8d82 --- /dev/null +++ b/stat/spatial/spatial_example_test.go @@ -0,0 +1,75 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package spatial_test + +import ( + "fmt" + + "gonum.org/v1/gonum/mat" + "gonum.org/v1/gonum/stat/spatial" +) + +func ExampleGlobalMoransI_linear() { + data := []float64{0, 0, 0, 1, 1, 1, 0, 1, 0, 0} + + // The locality here describes spatial neighbor + // relationships. + locality := mat.NewDense(10, 10, []float64{ + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + }) + + i, _, z := spatial.GlobalMoransI(data, nil, locality) + + fmt.Printf("Moran's I=%.4v z-score=%.4v\n", i, z) + + // Output: + // + // Moran's I=0.1111 z-score=0.6335 +} + +func ExampleGetisOrd() { + data := []float64{0, 0, 0, 1, 1, 1, 0, 1, 0, 0} + + // The locality here describes spatial neighbor + // relationships including self. + locality := mat.NewDense(10, 10, []float64{ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + }) + + for i, v := range data { + fmt.Printf("v=%v G*i=% .4v\n", v, spatial.GetisOrdGStar(i, data, nil, locality)) + } + + // Output: + // + // v=0 G*i=-1.225 + // v=0 G*i=-1.604 + // v=0 G*i=-0.2673 + // v=1 G*i= 1.069 + // v=1 G*i= 2.405 + // v=1 G*i= 1.069 + // v=0 G*i= 1.069 + // v=1 G*i=-0.2673 + // v=0 G*i=-0.2673 + // v=0 G*i=-1.225 +} diff --git a/stat/spatial/spatial_test.go b/stat/spatial/spatial_test.go new file mode 100644 index 00000000..1e281974 --- /dev/null +++ b/stat/spatial/spatial_test.go @@ -0,0 +1,197 @@ +// Copyright ©2017 The gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package spatial + +import ( + "math" + "math/rand" + "testing" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +func simpleAdjacency(n, wide int, diag bool) *mat.Dense { + m := mat.NewDense(n, n, nil) + for i := 0; i < n; i++ { + for j := 1; j <= wide; j++ { + if j > i { + continue + } + m.Set(i-j, i, 1) + m.Set(i, i-j, 1) + } + if diag { + m.Set(i, i, 1) + } + } + return m +} + +var spatialTests = []struct { + from, to float64 + n, wide int + fn func(float64, int, *rand.Rand) float64 + locality func(n, wide int, diag bool) *mat.Dense + + // Values for MoranI and z-score are obtained from + // an R reference implementation. + wantMoranI float64 + wantZ float64 + + // The value for expected number of significant + // segments is obtained from visual inspection + // of the plotted data. + wantSegs int +}{ + { + from: 0, to: 1, n: 1000, wide: 1, + fn: func(_ float64, _ int, rnd *rand.Rand) float64 { + return rnd.Float64() + }, + locality: simpleAdjacency, + + wantMoranI: -0.0019631298955953233, + wantZ: -0.03039477405151108, + wantSegs: 0, + }, + { + from: -math.Pi / 2, to: 3 * math.Pi / 2, n: 1000, wide: 1, + fn: func(x float64, _ int, _ *rand.Rand) float64 { + y := math.Sin(x) + if math.Abs(y) > 0.5 { + y *= 1/math.Abs(y) - 1 + } + return y * math.Sin(x*2) + }, + locality: simpleAdjacency, + + wantMoranI: 1.0008149537991464, + wantZ: 31.648547078779092, + wantSegs: 4, + }, + { + from: 0, to: 1, n: 1000, wide: 1, + fn: func(_ float64, _ int, rnd *rand.Rand) float64 { + return rnd.NormFloat64() + }, + locality: simpleAdjacency, + + wantMoranI: 0.031195199553564902, + wantZ: 1.0171161514080056, + wantSegs: 0, + }, + { + from: 0, to: 1, n: 1000, wide: 1, + fn: func(x float64, _ int, rnd *rand.Rand) float64 { + if rnd.Float64() < 0.5 { + return rnd.NormFloat64() + 5 + } + return rnd.NormFloat64() + }, + locality: simpleAdjacency, + + wantMoranI: -0.016245135637562223, + wantZ: -0.48157993864993476, + wantSegs: 0, + }, + { + from: 0, to: 1, n: 1000, wide: 1, + fn: func(x float64, i int, rnd *rand.Rand) float64 { + if i%2 == 0 { + return rnd.NormFloat64() + 5 + } + return rnd.NormFloat64() + }, + locality: simpleAdjacency, + + wantMoranI: -0.8565268969272998, + wantZ: -27.027057520918113, + wantSegs: 0, + }, + { + from: 0, to: 1, n: 1000, wide: 1, + fn: func(_ float64, i int, _ *rand.Rand) float64 { + return float64(i % 2) + }, + locality: simpleAdjacency, + + wantMoranI: -1, + wantZ: -31.559531064275987, + wantSegs: 0, + }, +} + +func TestGetisOrd(t *testing.T) { + for ti, test := range spatialTests { + rnd := rand.New(rand.NewSource(1)) + data := make([]float64, test.n) + step := (test.to - test.from) / float64(test.n) + for i := range data { + data[i] = test.fn(test.from+step*float64(i), i, rnd) + } + locality := test.locality(test.n, test.wide, true) + + nseg := getisOrdSegments(data, nil, locality) + if nseg != test.wantSegs { + t.Errorf("unexpected number of significant segments for test %d: got:%d want:%d", + ti, nseg, test.wantSegs) + } + } +} + +// getisOrdSegments returns the number of contiguously significant G*i segemtns in +// data. This allows an intuitive validation of the function in lieu of a reference +// implementation. +func getisOrdSegments(data, weight []float64, locality mat.Matrix) int { + const thresh = 2 + var nseg int + segstart := -1 + for i := range data { + gi := GetisOrdGStar(i, data, weight, locality) + if segstart != -1 { + if math.Abs(gi) < thresh { + // Filter short segments. + if i-segstart < 5 { + segstart = -1 + continue + } + + segstart = -1 + nseg++ + } + continue + } + if math.Abs(gi) >= thresh { + segstart = i + } + } + if segstart != -1 && len(data)-segstart >= 5 { + nseg++ + } + return nseg +} + +func TestGlobalMoransI(t *testing.T) { + const tol = 1e-14 + for ti, test := range spatialTests { + rnd := rand.New(rand.NewSource(1)) + data := make([]float64, test.n) + step := (test.to - test.from) / float64(test.n) + for i := range data { + data[i] = test.fn(test.from+step*float64(i), i, rnd) + } + locality := test.locality(test.n, test.wide, false) + + gotI, _, gotZ := GlobalMoransI(data, nil, locality) + + if !floats.EqualWithinAbsOrRel(gotI, test.wantMoranI, tol, tol) { + t.Errorf("unexpected Moran's I value for test %d: got:%v want:%v", ti, gotI, test.wantMoranI) + } + if !floats.EqualWithinAbsOrRel(gotZ, test.wantZ, tol, tol) { + t.Errorf("unexpected Moran's I z-score for test %d: got:%v want:%v", ti, gotZ, test.wantZ) + } + } +}