stat: imported stat as a subtree

This commit is contained in:
Brendan Tracey
2017-05-23 00:02:52 -06:00
78 changed files with 15548 additions and 0 deletions

23
stat/.travis.yml Normal file
View File

@@ -0,0 +1,23 @@
sudo: false
language: go
# Versions of go that are explicitly supported by gonum.
go:
- 1.5.4
- 1.6.3
- 1.7.3
# Required for coverage.
before_install:
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
# Get deps, build, test, and ensure the code is gofmt'ed.
# If we are building as gonum, then we have access to the coveralls api key, so we can run coverage as well.
script:
- go get -d -t -v ./...
- go build -v ./...
- go test -v ./...
- test -z "$(gofmt -d .)"
- if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash ./.travis/test-coverage.sh; fi

35
stat/.travis/test-coverage.sh Executable file
View File

@@ -0,0 +1,35 @@
#!/bin/bash
PROFILE_OUT=$PWD/profile.out
ACC_OUT=$PWD/acc.out
testCover() {
# set the return value to 0 (succesful)
retval=0
# get the directory to check from the parameter. Default to '.'
d=${1:-.}
# skip if there are no Go files here
ls $d/*.go &> /dev/null || return $retval
# switch to the directory to check
pushd $d > /dev/null
# create the coverage profile
coverageresult=`go test -v -coverprofile=$PROFILE_OUT`
# output the result so we can check the shell output
echo ${coverageresult}
# append the results to acc.out if coverage didn't fail, else set the retval to 1 (failed)
( [[ ${coverageresult} == *FAIL* ]] && retval=1 ) || ( [ -f $PROFILE_OUT ] && grep -v "mode: set" $PROFILE_OUT >> $ACC_OUT )
# return to our working dir
popd > /dev/null
# return our return value
return $retval
}
# Init acc.out
echo "mode: set" > $ACC_OUT
# Run test coverage on all directories containing go files
find . -maxdepth 10 -type d | while read d; do testCover $d || exit; done
# Upload the coverage profile to coveralls.io
[ -n "$COVERALLS_TOKEN" ] && goveralls -coverprofile=$ACC_OUT -service=travis-ci -repotoken $COVERALLS_TOKEN

13
stat/README.md Normal file
View File

@@ -0,0 +1,13 @@
# Gonum Stat [![Build Status](https://travis-ci.org/gonum/stat.svg?branch=master)](https://travis-ci.org/gonum/stat) [![Coverage Status](https://coveralls.io/repos/gonum/stat/badge.svg?branch=master&service=github)](https://coveralls.io/github/gonum/stat?branch=master) [![GoDoc](https://godoc.org/github.com/gonum/stat?status.svg)](https://godoc.org/github.com/gonum/stat)
This is a statistics package for the Go language.
## Issues
If you find any bugs, feel free to file an issue on the github issue tracker. Discussions on API changes, added features, code review, or similar requests are preferred on the gonum-dev Google Group.
https://groups.google.com/forum/#!forum/gonum-dev
## License
Please see github.com/gonum/license for general license information, contributors, authors, etc on the Gonum suite of packages.

531
stat/boston_data_test.go Normal file
View File

@@ -0,0 +1,531 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import "github.com/gonum/matrix/mat64"
// Boston Housing Data of Harrison and Rubinfeld (1978)
// http://dx.doi.org/10.1016/0095-0696(78)90006-2
// http://lib.stat.cmu.edu/datasets/boston
// Columns are;
// per capita crime rate by town,
// proportion of non-retail business acres per town,
// nitric oxide concentration (parts per 10 million),
// weighted distances to Boston employment centers,
// index of accessibility to radial highways,
// pupil-teacher ratio by town,
// proportion of blacks by town,
// average number of rooms per dwelling,
// proportion of owner-occupied units built prior to 1940,
// full-value property-tax rate per $10000,
// median value of owner-occupied homes in $1000s.
var bostonData = mat64.NewDense(506, 11, []float64{
0.00632, 2.31000, 0.53800, 4.09000, 1.00000, 15.30000, 396.90000, 6.57500, 65.20000, 296.00000, 24.00000,
0.02731, 7.07000, 0.46900, 4.96710, 2.00000, 17.80000, 396.90000, 6.42100, 78.90000, 242.00000, 21.60000,
0.02729, 7.07000, 0.46900, 4.96710, 2.00000, 17.80000, 392.83000, 7.18500, 61.10000, 242.00000, 34.70000,
0.03237, 2.18000, 0.45800, 6.06220, 3.00000, 18.70000, 394.63000, 6.99800, 45.80000, 222.00000, 33.40000,
0.06905, 2.18000, 0.45800, 6.06220, 3.00000, 18.70000, 396.90000, 7.14700, 54.20000, 222.00000, 36.20000,
0.02985, 2.18000, 0.45800, 6.06220, 3.00000, 18.70000, 394.12000, 6.43000, 58.70000, 222.00000, 28.70000,
0.08829, 7.87000, 0.52400, 5.56050, 5.00000, 15.20000, 395.60000, 6.01200, 66.60000, 311.00000, 22.90000,
0.14455, 7.87000, 0.52400, 5.95050, 5.00000, 15.20000, 396.90000, 6.17200, 96.10000, 311.00000, 27.10000,
0.21124, 7.87000, 0.52400, 6.08210, 5.00000, 15.20000, 386.63000, 5.63100, 100.00000, 311.00000, 16.50000,
0.17004, 7.87000, 0.52400, 6.59210, 5.00000, 15.20000, 386.71000, 6.00400, 85.90000, 311.00000, 18.90000,
0.22489, 7.87000, 0.52400, 6.34670, 5.00000, 15.20000, 392.52000, 6.37700, 94.30000, 311.00000, 15.00000,
0.11747, 7.87000, 0.52400, 6.22670, 5.00000, 15.20000, 396.90000, 6.00900, 82.90000, 311.00000, 18.90000,
0.09378, 7.87000, 0.52400, 5.45090, 5.00000, 15.20000, 390.50000, 5.88900, 39.00000, 311.00000, 21.70000,
0.62976, 8.14000, 0.53800, 4.70750, 4.00000, 21.00000, 396.90000, 5.94900, 61.80000, 307.00000, 20.40000,
0.63796, 8.14000, 0.53800, 4.46190, 4.00000, 21.00000, 380.02000, 6.09600, 84.50000, 307.00000, 18.20000,
0.62739, 8.14000, 0.53800, 4.49860, 4.00000, 21.00000, 395.62000, 5.83400, 56.50000, 307.00000, 19.90000,
1.05393, 8.14000, 0.53800, 4.49860, 4.00000, 21.00000, 386.85000, 5.93500, 29.30000, 307.00000, 23.10000,
0.78420, 8.14000, 0.53800, 4.25790, 4.00000, 21.00000, 386.75000, 5.99000, 81.70000, 307.00000, 17.50000,
0.80271, 8.14000, 0.53800, 3.79650, 4.00000, 21.00000, 288.99000, 5.45600, 36.60000, 307.00000, 20.20000,
0.72580, 8.14000, 0.53800, 3.79650, 4.00000, 21.00000, 390.95000, 5.72700, 69.50000, 307.00000, 18.20000,
1.25179, 8.14000, 0.53800, 3.79790, 4.00000, 21.00000, 376.57000, 5.57000, 98.10000, 307.00000, 13.60000,
0.85204, 8.14000, 0.53800, 4.01230, 4.00000, 21.00000, 392.53000, 5.96500, 89.20000, 307.00000, 19.60000,
1.23247, 8.14000, 0.53800, 3.97690, 4.00000, 21.00000, 396.90000, 6.14200, 91.70000, 307.00000, 15.20000,
0.98843, 8.14000, 0.53800, 4.09520, 4.00000, 21.00000, 394.54000, 5.81300, 100.00000, 307.00000, 14.50000,
0.75026, 8.14000, 0.53800, 4.39960, 4.00000, 21.00000, 394.33000, 5.92400, 94.10000, 307.00000, 15.60000,
0.84054, 8.14000, 0.53800, 4.45460, 4.00000, 21.00000, 303.42000, 5.59900, 85.70000, 307.00000, 13.90000,
0.67191, 8.14000, 0.53800, 4.68200, 4.00000, 21.00000, 376.88000, 5.81300, 90.30000, 307.00000, 16.60000,
0.95577, 8.14000, 0.53800, 4.45340, 4.00000, 21.00000, 306.38000, 6.04700, 88.80000, 307.00000, 14.80000,
0.77299, 8.14000, 0.53800, 4.45470, 4.00000, 21.00000, 387.94000, 6.49500, 94.40000, 307.00000, 18.40000,
1.00245, 8.14000, 0.53800, 4.23900, 4.00000, 21.00000, 380.23000, 6.67400, 87.30000, 307.00000, 21.00000,
1.13081, 8.14000, 0.53800, 4.23300, 4.00000, 21.00000, 360.17000, 5.71300, 94.10000, 307.00000, 12.70000,
1.35472, 8.14000, 0.53800, 4.17500, 4.00000, 21.00000, 376.73000, 6.07200, 100.00000, 307.00000, 14.50000,
1.38799, 8.14000, 0.53800, 3.99000, 4.00000, 21.00000, 232.60000, 5.95000, 82.00000, 307.00000, 13.20000,
1.15172, 8.14000, 0.53800, 3.78720, 4.00000, 21.00000, 358.77000, 5.70100, 95.00000, 307.00000, 13.10000,
1.61282, 8.14000, 0.53800, 3.75980, 4.00000, 21.00000, 248.31000, 6.09600, 96.90000, 307.00000, 13.50000,
0.06417, 5.96000, 0.49900, 3.36030, 5.00000, 19.20000, 396.90000, 5.93300, 68.20000, 279.00000, 18.90000,
0.09744, 5.96000, 0.49900, 3.37790, 5.00000, 19.20000, 377.56000, 5.84100, 61.40000, 279.00000, 20.00000,
0.08014, 5.96000, 0.49900, 3.93420, 5.00000, 19.20000, 396.90000, 5.85000, 41.50000, 279.00000, 21.00000,
0.17505, 5.96000, 0.49900, 3.84730, 5.00000, 19.20000, 393.43000, 5.96600, 30.20000, 279.00000, 24.70000,
0.02763, 2.95000, 0.42800, 5.40110, 3.00000, 18.30000, 395.63000, 6.59500, 21.80000, 252.00000, 30.80000,
0.03359, 2.95000, 0.42800, 5.40110, 3.00000, 18.30000, 395.62000, 7.02400, 15.80000, 252.00000, 34.90000,
0.12744, 6.91000, 0.44800, 5.72090, 3.00000, 17.90000, 385.41000, 6.77000, 2.90000, 233.00000, 26.60000,
0.14150, 6.91000, 0.44800, 5.72090, 3.00000, 17.90000, 383.37000, 6.16900, 6.60000, 233.00000, 25.30000,
0.15936, 6.91000, 0.44800, 5.72090, 3.00000, 17.90000, 394.46000, 6.21100, 6.50000, 233.00000, 24.70000,
0.12269, 6.91000, 0.44800, 5.72090, 3.00000, 17.90000, 389.39000, 6.06900, 40.00000, 233.00000, 21.20000,
0.17142, 6.91000, 0.44800, 5.10040, 3.00000, 17.90000, 396.90000, 5.68200, 33.80000, 233.00000, 19.30000,
0.18836, 6.91000, 0.44800, 5.10040, 3.00000, 17.90000, 396.90000, 5.78600, 33.30000, 233.00000, 20.00000,
0.22927, 6.91000, 0.44800, 5.68940, 3.00000, 17.90000, 392.74000, 6.03000, 85.50000, 233.00000, 16.60000,
0.25387, 6.91000, 0.44800, 5.87000, 3.00000, 17.90000, 396.90000, 5.39900, 95.30000, 233.00000, 14.40000,
0.21977, 6.91000, 0.44800, 6.08770, 3.00000, 17.90000, 396.90000, 5.60200, 62.00000, 233.00000, 19.40000,
0.08873, 5.64000, 0.43900, 6.81470, 4.00000, 16.80000, 395.56000, 5.96300, 45.70000, 243.00000, 19.70000,
0.04337, 5.64000, 0.43900, 6.81470, 4.00000, 16.80000, 393.97000, 6.11500, 63.00000, 243.00000, 20.50000,
0.05360, 5.64000, 0.43900, 6.81470, 4.00000, 16.80000, 396.90000, 6.51100, 21.10000, 243.00000, 25.00000,
0.04981, 5.64000, 0.43900, 6.81470, 4.00000, 16.80000, 396.90000, 5.99800, 21.40000, 243.00000, 23.40000,
0.01360, 4.00000, 0.41000, 7.31970, 3.00000, 21.10000, 396.90000, 5.88800, 47.60000, 469.00000, 18.90000,
0.01311, 1.22000, 0.40300, 8.69660, 5.00000, 17.90000, 395.93000, 7.24900, 21.90000, 226.00000, 35.40000,
0.02055, 0.74000, 0.41000, 9.18760, 2.00000, 17.30000, 396.90000, 6.38300, 35.70000, 313.00000, 24.70000,
0.01432, 1.32000, 0.41100, 8.32480, 5.00000, 15.10000, 392.90000, 6.81600, 40.50000, 256.00000, 31.60000,
0.15445, 5.13000, 0.45300, 7.81480, 8.00000, 19.70000, 390.68000, 6.14500, 29.20000, 284.00000, 23.30000,
0.10328, 5.13000, 0.45300, 6.93200, 8.00000, 19.70000, 396.90000, 5.92700, 47.20000, 284.00000, 19.60000,
0.14932, 5.13000, 0.45300, 7.22540, 8.00000, 19.70000, 395.11000, 5.74100, 66.20000, 284.00000, 18.70000,
0.17171, 5.13000, 0.45300, 6.81850, 8.00000, 19.70000, 378.08000, 5.96600, 93.40000, 284.00000, 16.00000,
0.11027, 5.13000, 0.45300, 7.22550, 8.00000, 19.70000, 396.90000, 6.45600, 67.80000, 284.00000, 22.20000,
0.12650, 5.13000, 0.45300, 7.98090, 8.00000, 19.70000, 395.58000, 6.76200, 43.40000, 284.00000, 25.00000,
0.01951, 1.38000, 0.41610, 9.22290, 3.00000, 18.60000, 393.24000, 7.10400, 59.50000, 216.00000, 33.00000,
0.03584, 3.37000, 0.39800, 6.61150, 4.00000, 16.10000, 396.90000, 6.29000, 17.80000, 337.00000, 23.50000,
0.04379, 3.37000, 0.39800, 6.61150, 4.00000, 16.10000, 396.90000, 5.78700, 31.10000, 337.00000, 19.40000,
0.05789, 6.07000, 0.40900, 6.49800, 4.00000, 18.90000, 396.21000, 5.87800, 21.40000, 345.00000, 22.00000,
0.13554, 6.07000, 0.40900, 6.49800, 4.00000, 18.90000, 396.90000, 5.59400, 36.80000, 345.00000, 17.40000,
0.12816, 6.07000, 0.40900, 6.49800, 4.00000, 18.90000, 396.90000, 5.88500, 33.00000, 345.00000, 20.90000,
0.08826, 10.81000, 0.41300, 5.28730, 4.00000, 19.20000, 383.73000, 6.41700, 6.60000, 305.00000, 24.20000,
0.15876, 10.81000, 0.41300, 5.28730, 4.00000, 19.20000, 376.94000, 5.96100, 17.50000, 305.00000, 21.70000,
0.09164, 10.81000, 0.41300, 5.28730, 4.00000, 19.20000, 390.91000, 6.06500, 7.80000, 305.00000, 22.80000,
0.19539, 10.81000, 0.41300, 5.28730, 4.00000, 19.20000, 377.17000, 6.24500, 6.20000, 305.00000, 23.40000,
0.07896, 12.83000, 0.43700, 4.25150, 5.00000, 18.70000, 394.92000, 6.27300, 6.00000, 398.00000, 24.10000,
0.09512, 12.83000, 0.43700, 4.50260, 5.00000, 18.70000, 383.23000, 6.28600, 45.00000, 398.00000, 21.40000,
0.10153, 12.83000, 0.43700, 4.05220, 5.00000, 18.70000, 373.66000, 6.27900, 74.50000, 398.00000, 20.00000,
0.08707, 12.83000, 0.43700, 4.09050, 5.00000, 18.70000, 386.96000, 6.14000, 45.80000, 398.00000, 20.80000,
0.05646, 12.83000, 0.43700, 5.01410, 5.00000, 18.70000, 386.40000, 6.23200, 53.70000, 398.00000, 21.20000,
0.08387, 12.83000, 0.43700, 4.50260, 5.00000, 18.70000, 396.06000, 5.87400, 36.60000, 398.00000, 20.30000,
0.04113, 4.86000, 0.42600, 5.40070, 4.00000, 19.00000, 396.90000, 6.72700, 33.50000, 281.00000, 28.00000,
0.04462, 4.86000, 0.42600, 5.40070, 4.00000, 19.00000, 395.63000, 6.61900, 70.40000, 281.00000, 23.90000,
0.03659, 4.86000, 0.42600, 5.40070, 4.00000, 19.00000, 396.90000, 6.30200, 32.20000, 281.00000, 24.80000,
0.03551, 4.86000, 0.42600, 5.40070, 4.00000, 19.00000, 390.64000, 6.16700, 46.70000, 281.00000, 22.90000,
0.05059, 4.49000, 0.44900, 4.77940, 3.00000, 18.50000, 396.90000, 6.38900, 48.00000, 247.00000, 23.90000,
0.05735, 4.49000, 0.44900, 4.43770, 3.00000, 18.50000, 392.30000, 6.63000, 56.10000, 247.00000, 26.60000,
0.05188, 4.49000, 0.44900, 4.42720, 3.00000, 18.50000, 395.99000, 6.01500, 45.10000, 247.00000, 22.50000,
0.07151, 4.49000, 0.44900, 3.74760, 3.00000, 18.50000, 395.15000, 6.12100, 56.80000, 247.00000, 22.20000,
0.05660, 3.41000, 0.48900, 3.42170, 2.00000, 17.80000, 396.90000, 7.00700, 86.30000, 270.00000, 23.60000,
0.05302, 3.41000, 0.48900, 3.41450, 2.00000, 17.80000, 396.06000, 7.07900, 63.10000, 270.00000, 28.70000,
0.04684, 3.41000, 0.48900, 3.09230, 2.00000, 17.80000, 392.18000, 6.41700, 66.10000, 270.00000, 22.60000,
0.03932, 3.41000, 0.48900, 3.09210, 2.00000, 17.80000, 393.55000, 6.40500, 73.90000, 270.00000, 22.00000,
0.04203, 15.04000, 0.46400, 3.66590, 4.00000, 18.20000, 395.01000, 6.44200, 53.60000, 270.00000, 22.90000,
0.02875, 15.04000, 0.46400, 3.66590, 4.00000, 18.20000, 396.33000, 6.21100, 28.90000, 270.00000, 25.00000,
0.04294, 15.04000, 0.46400, 3.61500, 4.00000, 18.20000, 396.90000, 6.24900, 77.30000, 270.00000, 20.60000,
0.12204, 2.89000, 0.44500, 3.49520, 2.00000, 18.00000, 357.98000, 6.62500, 57.80000, 276.00000, 28.40000,
0.11504, 2.89000, 0.44500, 3.49520, 2.00000, 18.00000, 391.83000, 6.16300, 69.60000, 276.00000, 21.40000,
0.12083, 2.89000, 0.44500, 3.49520, 2.00000, 18.00000, 396.90000, 8.06900, 76.00000, 276.00000, 38.70000,
0.08187, 2.89000, 0.44500, 3.49520, 2.00000, 18.00000, 393.53000, 7.82000, 36.90000, 276.00000, 43.80000,
0.06860, 2.89000, 0.44500, 3.49520, 2.00000, 18.00000, 396.90000, 7.41600, 62.50000, 276.00000, 33.20000,
0.14866, 8.56000, 0.52000, 2.77780, 5.00000, 20.90000, 394.76000, 6.72700, 79.90000, 384.00000, 27.50000,
0.11432, 8.56000, 0.52000, 2.85610, 5.00000, 20.90000, 395.58000, 6.78100, 71.30000, 384.00000, 26.50000,
0.22876, 8.56000, 0.52000, 2.71470, 5.00000, 20.90000, 70.80000, 6.40500, 85.40000, 384.00000, 18.60000,
0.21161, 8.56000, 0.52000, 2.71470, 5.00000, 20.90000, 394.47000, 6.13700, 87.40000, 384.00000, 19.30000,
0.13960, 8.56000, 0.52000, 2.42100, 5.00000, 20.90000, 392.69000, 6.16700, 90.00000, 384.00000, 20.10000,
0.13262, 8.56000, 0.52000, 2.10690, 5.00000, 20.90000, 394.05000, 5.85100, 96.70000, 384.00000, 19.50000,
0.17120, 8.56000, 0.52000, 2.21100, 5.00000, 20.90000, 395.67000, 5.83600, 91.90000, 384.00000, 19.50000,
0.13117, 8.56000, 0.52000, 2.12240, 5.00000, 20.90000, 387.69000, 6.12700, 85.20000, 384.00000, 20.40000,
0.12802, 8.56000, 0.52000, 2.43290, 5.00000, 20.90000, 395.24000, 6.47400, 97.10000, 384.00000, 19.80000,
0.26363, 8.56000, 0.52000, 2.54510, 5.00000, 20.90000, 391.23000, 6.22900, 91.20000, 384.00000, 19.40000,
0.10793, 8.56000, 0.52000, 2.77780, 5.00000, 20.90000, 393.49000, 6.19500, 54.40000, 384.00000, 21.70000,
0.10084, 10.01000, 0.54700, 2.67750, 6.00000, 17.80000, 395.59000, 6.71500, 81.60000, 432.00000, 22.80000,
0.12329, 10.01000, 0.54700, 2.35340, 6.00000, 17.80000, 394.95000, 5.91300, 92.90000, 432.00000, 18.80000,
0.22212, 10.01000, 0.54700, 2.54800, 6.00000, 17.80000, 396.90000, 6.09200, 95.40000, 432.00000, 18.70000,
0.14231, 10.01000, 0.54700, 2.25650, 6.00000, 17.80000, 388.74000, 6.25400, 84.20000, 432.00000, 18.50000,
0.17134, 10.01000, 0.54700, 2.46310, 6.00000, 17.80000, 344.91000, 5.92800, 88.20000, 432.00000, 18.30000,
0.13158, 10.01000, 0.54700, 2.73010, 6.00000, 17.80000, 393.30000, 6.17600, 72.50000, 432.00000, 21.20000,
0.15098, 10.01000, 0.54700, 2.74740, 6.00000, 17.80000, 394.51000, 6.02100, 82.60000, 432.00000, 19.20000,
0.13058, 10.01000, 0.54700, 2.47750, 6.00000, 17.80000, 338.63000, 5.87200, 73.10000, 432.00000, 20.40000,
0.14476, 10.01000, 0.54700, 2.75920, 6.00000, 17.80000, 391.50000, 5.73100, 65.20000, 432.00000, 19.30000,
0.06899, 25.65000, 0.58100, 2.25770, 2.00000, 19.10000, 389.15000, 5.87000, 69.70000, 188.00000, 22.00000,
0.07165, 25.65000, 0.58100, 2.19740, 2.00000, 19.10000, 377.67000, 6.00400, 84.10000, 188.00000, 20.30000,
0.09299, 25.65000, 0.58100, 2.08690, 2.00000, 19.10000, 378.09000, 5.96100, 92.90000, 188.00000, 20.50000,
0.15038, 25.65000, 0.58100, 1.94440, 2.00000, 19.10000, 370.31000, 5.85600, 97.00000, 188.00000, 17.30000,
0.09849, 25.65000, 0.58100, 2.00630, 2.00000, 19.10000, 379.38000, 5.87900, 95.80000, 188.00000, 18.80000,
0.16902, 25.65000, 0.58100, 1.99290, 2.00000, 19.10000, 385.02000, 5.98600, 88.40000, 188.00000, 21.40000,
0.38735, 25.65000, 0.58100, 1.75720, 2.00000, 19.10000, 359.29000, 5.61300, 95.60000, 188.00000, 15.70000,
0.25915, 21.89000, 0.62400, 1.78830, 4.00000, 21.20000, 392.11000, 5.69300, 96.00000, 437.00000, 16.20000,
0.32543, 21.89000, 0.62400, 1.81250, 4.00000, 21.20000, 396.90000, 6.43100, 98.80000, 437.00000, 18.00000,
0.88125, 21.89000, 0.62400, 1.97990, 4.00000, 21.20000, 396.90000, 5.63700, 94.70000, 437.00000, 14.30000,
0.34006, 21.89000, 0.62400, 2.11850, 4.00000, 21.20000, 395.04000, 6.45800, 98.90000, 437.00000, 19.20000,
1.19294, 21.89000, 0.62400, 2.27100, 4.00000, 21.20000, 396.90000, 6.32600, 97.70000, 437.00000, 19.60000,
0.59005, 21.89000, 0.62400, 2.32740, 4.00000, 21.20000, 385.76000, 6.37200, 97.90000, 437.00000, 23.00000,
0.32982, 21.89000, 0.62400, 2.46990, 4.00000, 21.20000, 388.69000, 5.82200, 95.40000, 437.00000, 18.40000,
0.97617, 21.89000, 0.62400, 2.34600, 4.00000, 21.20000, 262.76000, 5.75700, 98.40000, 437.00000, 15.60000,
0.55778, 21.89000, 0.62400, 2.11070, 4.00000, 21.20000, 394.67000, 6.33500, 98.20000, 437.00000, 18.10000,
0.32264, 21.89000, 0.62400, 1.96690, 4.00000, 21.20000, 378.25000, 5.94200, 93.50000, 437.00000, 17.40000,
0.35233, 21.89000, 0.62400, 1.84980, 4.00000, 21.20000, 394.08000, 6.45400, 98.40000, 437.00000, 17.10000,
0.24980, 21.89000, 0.62400, 1.66860, 4.00000, 21.20000, 392.04000, 5.85700, 98.20000, 437.00000, 13.30000,
0.54452, 21.89000, 0.62400, 1.66870, 4.00000, 21.20000, 396.90000, 6.15100, 97.90000, 437.00000, 17.80000,
0.29090, 21.89000, 0.62400, 1.61190, 4.00000, 21.20000, 388.08000, 6.17400, 93.60000, 437.00000, 14.00000,
1.62864, 21.89000, 0.62400, 1.43940, 4.00000, 21.20000, 396.90000, 5.01900, 100.00000, 437.00000, 14.40000,
3.32105, 19.58000, 0.87100, 1.32160, 5.00000, 14.70000, 396.90000, 5.40300, 100.00000, 403.00000, 13.40000,
4.09740, 19.58000, 0.87100, 1.41180, 5.00000, 14.70000, 396.90000, 5.46800, 100.00000, 403.00000, 15.60000,
2.77974, 19.58000, 0.87100, 1.34590, 5.00000, 14.70000, 396.90000, 4.90300, 97.80000, 403.00000, 11.80000,
2.37934, 19.58000, 0.87100, 1.41910, 5.00000, 14.70000, 172.91000, 6.13000, 100.00000, 403.00000, 13.80000,
2.15505, 19.58000, 0.87100, 1.51660, 5.00000, 14.70000, 169.27000, 5.62800, 100.00000, 403.00000, 15.60000,
2.36862, 19.58000, 0.87100, 1.46080, 5.00000, 14.70000, 391.71000, 4.92600, 95.70000, 403.00000, 14.60000,
2.33099, 19.58000, 0.87100, 1.52960, 5.00000, 14.70000, 356.99000, 5.18600, 93.80000, 403.00000, 17.80000,
2.73397, 19.58000, 0.87100, 1.52570, 5.00000, 14.70000, 351.85000, 5.59700, 94.90000, 403.00000, 15.40000,
1.65660, 19.58000, 0.87100, 1.61800, 5.00000, 14.70000, 372.80000, 6.12200, 97.30000, 403.00000, 21.50000,
1.49632, 19.58000, 0.87100, 1.59160, 5.00000, 14.70000, 341.60000, 5.40400, 100.00000, 403.00000, 19.60000,
1.12658, 19.58000, 0.87100, 1.61020, 5.00000, 14.70000, 343.28000, 5.01200, 88.00000, 403.00000, 15.30000,
2.14918, 19.58000, 0.87100, 1.62320, 5.00000, 14.70000, 261.95000, 5.70900, 98.50000, 403.00000, 19.40000,
1.41385, 19.58000, 0.87100, 1.74940, 5.00000, 14.70000, 321.02000, 6.12900, 96.00000, 403.00000, 17.00000,
3.53501, 19.58000, 0.87100, 1.74550, 5.00000, 14.70000, 88.01000, 6.15200, 82.60000, 403.00000, 15.60000,
2.44668, 19.58000, 0.87100, 1.73640, 5.00000, 14.70000, 88.63000, 5.27200, 94.00000, 403.00000, 13.10000,
1.22358, 19.58000, 0.60500, 1.87730, 5.00000, 14.70000, 363.43000, 6.94300, 97.40000, 403.00000, 41.30000,
1.34284, 19.58000, 0.60500, 1.75730, 5.00000, 14.70000, 353.89000, 6.06600, 100.00000, 403.00000, 24.30000,
1.42502, 19.58000, 0.87100, 1.76590, 5.00000, 14.70000, 364.31000, 6.51000, 100.00000, 403.00000, 23.30000,
1.27346, 19.58000, 0.60500, 1.79840, 5.00000, 14.70000, 338.92000, 6.25000, 92.60000, 403.00000, 27.00000,
1.46336, 19.58000, 0.60500, 1.97090, 5.00000, 14.70000, 374.43000, 7.48900, 90.80000, 403.00000, 50.00000,
1.83377, 19.58000, 0.60500, 2.04070, 5.00000, 14.70000, 389.61000, 7.80200, 98.20000, 403.00000, 50.00000,
1.51902, 19.58000, 0.60500, 2.16200, 5.00000, 14.70000, 388.45000, 8.37500, 93.90000, 403.00000, 50.00000,
2.24236, 19.58000, 0.60500, 2.42200, 5.00000, 14.70000, 395.11000, 5.85400, 91.80000, 403.00000, 22.70000,
2.92400, 19.58000, 0.60500, 2.28340, 5.00000, 14.70000, 240.16000, 6.10100, 93.00000, 403.00000, 25.00000,
2.01019, 19.58000, 0.60500, 2.04590, 5.00000, 14.70000, 369.30000, 7.92900, 96.20000, 403.00000, 50.00000,
1.80028, 19.58000, 0.60500, 2.42590, 5.00000, 14.70000, 227.61000, 5.87700, 79.20000, 403.00000, 23.80000,
2.30040, 19.58000, 0.60500, 2.10000, 5.00000, 14.70000, 297.09000, 6.31900, 96.10000, 403.00000, 23.80000,
2.44953, 19.58000, 0.60500, 2.26250, 5.00000, 14.70000, 330.04000, 6.40200, 95.20000, 403.00000, 22.30000,
1.20742, 19.58000, 0.60500, 2.42590, 5.00000, 14.70000, 292.29000, 5.87500, 94.60000, 403.00000, 17.40000,
2.31390, 19.58000, 0.60500, 2.38870, 5.00000, 14.70000, 348.13000, 5.88000, 97.30000, 403.00000, 19.10000,
0.13914, 4.05000, 0.51000, 2.59610, 5.00000, 16.60000, 396.90000, 5.57200, 88.50000, 296.00000, 23.10000,
0.09178, 4.05000, 0.51000, 2.64630, 5.00000, 16.60000, 395.50000, 6.41600, 84.10000, 296.00000, 23.60000,
0.08447, 4.05000, 0.51000, 2.70190, 5.00000, 16.60000, 393.23000, 5.85900, 68.70000, 296.00000, 22.60000,
0.06664, 4.05000, 0.51000, 3.13230, 5.00000, 16.60000, 390.96000, 6.54600, 33.10000, 296.00000, 29.40000,
0.07022, 4.05000, 0.51000, 3.55490, 5.00000, 16.60000, 393.23000, 6.02000, 47.20000, 296.00000, 23.20000,
0.05425, 4.05000, 0.51000, 3.31750, 5.00000, 16.60000, 395.60000, 6.31500, 73.40000, 296.00000, 24.60000,
0.06642, 4.05000, 0.51000, 2.91530, 5.00000, 16.60000, 391.27000, 6.86000, 74.40000, 296.00000, 29.90000,
0.05780, 2.46000, 0.48800, 2.82900, 3.00000, 17.80000, 396.90000, 6.98000, 58.40000, 193.00000, 37.20000,
0.06588, 2.46000, 0.48800, 2.74100, 3.00000, 17.80000, 395.56000, 7.76500, 83.30000, 193.00000, 39.80000,
0.06888, 2.46000, 0.48800, 2.59790, 3.00000, 17.80000, 396.90000, 6.14400, 62.20000, 193.00000, 36.20000,
0.09103, 2.46000, 0.48800, 2.70060, 3.00000, 17.80000, 394.12000, 7.15500, 92.20000, 193.00000, 37.90000,
0.10008, 2.46000, 0.48800, 2.84700, 3.00000, 17.80000, 396.90000, 6.56300, 95.60000, 193.00000, 32.50000,
0.08308, 2.46000, 0.48800, 2.98790, 3.00000, 17.80000, 391.00000, 5.60400, 89.80000, 193.00000, 26.40000,
0.06047, 2.46000, 0.48800, 3.27970, 3.00000, 17.80000, 387.11000, 6.15300, 68.80000, 193.00000, 29.60000,
0.05602, 2.46000, 0.48800, 3.19920, 3.00000, 17.80000, 392.63000, 7.83100, 53.60000, 193.00000, 50.00000,
0.07875, 3.44000, 0.43700, 3.78860, 5.00000, 15.20000, 393.87000, 6.78200, 41.10000, 398.00000, 32.00000,
0.12579, 3.44000, 0.43700, 4.56670, 5.00000, 15.20000, 382.84000, 6.55600, 29.10000, 398.00000, 29.80000,
0.08370, 3.44000, 0.43700, 4.56670, 5.00000, 15.20000, 396.90000, 7.18500, 38.90000, 398.00000, 34.90000,
0.09068, 3.44000, 0.43700, 6.47980, 5.00000, 15.20000, 377.68000, 6.95100, 21.50000, 398.00000, 37.00000,
0.06911, 3.44000, 0.43700, 6.47980, 5.00000, 15.20000, 389.71000, 6.73900, 30.80000, 398.00000, 30.50000,
0.08664, 3.44000, 0.43700, 6.47980, 5.00000, 15.20000, 390.49000, 7.17800, 26.30000, 398.00000, 36.40000,
0.02187, 2.93000, 0.40100, 6.21960, 1.00000, 15.60000, 393.37000, 6.80000, 9.90000, 265.00000, 31.10000,
0.01439, 2.93000, 0.40100, 6.21960, 1.00000, 15.60000, 376.70000, 6.60400, 18.80000, 265.00000, 29.10000,
0.01381, 0.46000, 0.42200, 5.64840, 4.00000, 14.40000, 394.23000, 7.87500, 32.00000, 255.00000, 50.00000,
0.04011, 1.52000, 0.40400, 7.30900, 2.00000, 12.60000, 396.90000, 7.28700, 34.10000, 329.00000, 33.30000,
0.04666, 1.52000, 0.40400, 7.30900, 2.00000, 12.60000, 354.31000, 7.10700, 36.60000, 329.00000, 30.30000,
0.03768, 1.52000, 0.40400, 7.30900, 2.00000, 12.60000, 392.20000, 7.27400, 38.30000, 329.00000, 34.60000,
0.03150, 1.47000, 0.40300, 7.65340, 3.00000, 17.00000, 396.90000, 6.97500, 15.30000, 402.00000, 34.90000,
0.01778, 1.47000, 0.40300, 7.65340, 3.00000, 17.00000, 384.30000, 7.13500, 13.90000, 402.00000, 32.90000,
0.03445, 2.03000, 0.41500, 6.27000, 2.00000, 14.70000, 393.77000, 6.16200, 38.40000, 348.00000, 24.10000,
0.02177, 2.03000, 0.41500, 6.27000, 2.00000, 14.70000, 395.38000, 7.61000, 15.70000, 348.00000, 42.30000,
0.03510, 2.68000, 0.41610, 5.11800, 4.00000, 14.70000, 392.78000, 7.85300, 33.20000, 224.00000, 48.50000,
0.02009, 2.68000, 0.41610, 5.11800, 4.00000, 14.70000, 390.55000, 8.03400, 31.90000, 224.00000, 50.00000,
0.13642, 10.59000, 0.48900, 3.94540, 4.00000, 18.60000, 396.90000, 5.89100, 22.30000, 277.00000, 22.60000,
0.22969, 10.59000, 0.48900, 4.35490, 4.00000, 18.60000, 394.87000, 6.32600, 52.50000, 277.00000, 24.40000,
0.25199, 10.59000, 0.48900, 4.35490, 4.00000, 18.60000, 389.43000, 5.78300, 72.70000, 277.00000, 22.50000,
0.13587, 10.59000, 0.48900, 4.23920, 4.00000, 18.60000, 381.32000, 6.06400, 59.10000, 277.00000, 24.40000,
0.43571, 10.59000, 0.48900, 3.87500, 4.00000, 18.60000, 396.90000, 5.34400, 100.00000, 277.00000, 20.00000,
0.17446, 10.59000, 0.48900, 3.87710, 4.00000, 18.60000, 393.25000, 5.96000, 92.10000, 277.00000, 21.70000,
0.37578, 10.59000, 0.48900, 3.66500, 4.00000, 18.60000, 395.24000, 5.40400, 88.60000, 277.00000, 19.30000,
0.21719, 10.59000, 0.48900, 3.65260, 4.00000, 18.60000, 390.94000, 5.80700, 53.80000, 277.00000, 22.40000,
0.14052, 10.59000, 0.48900, 3.94540, 4.00000, 18.60000, 385.81000, 6.37500, 32.30000, 277.00000, 28.10000,
0.28955, 10.59000, 0.48900, 3.58750, 4.00000, 18.60000, 348.93000, 5.41200, 9.80000, 277.00000, 23.70000,
0.19802, 10.59000, 0.48900, 3.94540, 4.00000, 18.60000, 393.63000, 6.18200, 42.40000, 277.00000, 25.00000,
0.04560, 13.89000, 0.55000, 3.11210, 5.00000, 16.40000, 392.80000, 5.88800, 56.00000, 276.00000, 23.30000,
0.07013, 13.89000, 0.55000, 3.42110, 5.00000, 16.40000, 392.78000, 6.64200, 85.10000, 276.00000, 28.70000,
0.11069, 13.89000, 0.55000, 2.88930, 5.00000, 16.40000, 396.90000, 5.95100, 93.80000, 276.00000, 21.50000,
0.11425, 13.89000, 0.55000, 3.36330, 5.00000, 16.40000, 393.74000, 6.37300, 92.40000, 276.00000, 23.00000,
0.35809, 6.20000, 0.50700, 2.86170, 8.00000, 17.40000, 391.70000, 6.95100, 88.50000, 307.00000, 26.70000,
0.40771, 6.20000, 0.50700, 3.04800, 8.00000, 17.40000, 395.24000, 6.16400, 91.30000, 307.00000, 21.70000,
0.62356, 6.20000, 0.50700, 3.27210, 8.00000, 17.40000, 390.39000, 6.87900, 77.70000, 307.00000, 27.50000,
0.61470, 6.20000, 0.50700, 3.27210, 8.00000, 17.40000, 396.90000, 6.61800, 80.80000, 307.00000, 30.10000,
0.31533, 6.20000, 0.50400, 2.89440, 8.00000, 17.40000, 385.05000, 8.26600, 78.30000, 307.00000, 44.80000,
0.52693, 6.20000, 0.50400, 2.89440, 8.00000, 17.40000, 382.00000, 8.72500, 83.00000, 307.00000, 50.00000,
0.38214, 6.20000, 0.50400, 3.21570, 8.00000, 17.40000, 387.38000, 8.04000, 86.50000, 307.00000, 37.60000,
0.41238, 6.20000, 0.50400, 3.21570, 8.00000, 17.40000, 372.08000, 7.16300, 79.90000, 307.00000, 31.60000,
0.29819, 6.20000, 0.50400, 3.37510, 8.00000, 17.40000, 377.51000, 7.68600, 17.00000, 307.00000, 46.70000,
0.44178, 6.20000, 0.50400, 3.37510, 8.00000, 17.40000, 380.34000, 6.55200, 21.40000, 307.00000, 31.50000,
0.53700, 6.20000, 0.50400, 3.67150, 8.00000, 17.40000, 378.35000, 5.98100, 68.10000, 307.00000, 24.30000,
0.46296, 6.20000, 0.50400, 3.67150, 8.00000, 17.40000, 376.14000, 7.41200, 76.90000, 307.00000, 31.70000,
0.57529, 6.20000, 0.50700, 3.83840, 8.00000, 17.40000, 385.91000, 8.33700, 73.30000, 307.00000, 41.70000,
0.33147, 6.20000, 0.50700, 3.65190, 8.00000, 17.40000, 378.95000, 8.24700, 70.40000, 307.00000, 48.30000,
0.44791, 6.20000, 0.50700, 3.65190, 8.00000, 17.40000, 360.20000, 6.72600, 66.50000, 307.00000, 29.00000,
0.33045, 6.20000, 0.50700, 3.65190, 8.00000, 17.40000, 376.75000, 6.08600, 61.50000, 307.00000, 24.00000,
0.52058, 6.20000, 0.50700, 4.14800, 8.00000, 17.40000, 388.45000, 6.63100, 76.50000, 307.00000, 25.10000,
0.51183, 6.20000, 0.50700, 4.14800, 8.00000, 17.40000, 390.07000, 7.35800, 71.60000, 307.00000, 31.50000,
0.08244, 4.93000, 0.42800, 6.18990, 6.00000, 16.60000, 379.41000, 6.48100, 18.50000, 300.00000, 23.70000,
0.09252, 4.93000, 0.42800, 6.18990, 6.00000, 16.60000, 383.78000, 6.60600, 42.20000, 300.00000, 23.30000,
0.11329, 4.93000, 0.42800, 6.33610, 6.00000, 16.60000, 391.25000, 6.89700, 54.30000, 300.00000, 22.00000,
0.10612, 4.93000, 0.42800, 6.33610, 6.00000, 16.60000, 394.62000, 6.09500, 65.10000, 300.00000, 20.10000,
0.10290, 4.93000, 0.42800, 7.03550, 6.00000, 16.60000, 372.75000, 6.35800, 52.90000, 300.00000, 22.20000,
0.12757, 4.93000, 0.42800, 7.03550, 6.00000, 16.60000, 374.71000, 6.39300, 7.80000, 300.00000, 23.70000,
0.20608, 5.86000, 0.43100, 7.95490, 7.00000, 19.10000, 372.49000, 5.59300, 76.50000, 330.00000, 17.60000,
0.19133, 5.86000, 0.43100, 7.95490, 7.00000, 19.10000, 389.13000, 5.60500, 70.20000, 330.00000, 18.50000,
0.33983, 5.86000, 0.43100, 8.05550, 7.00000, 19.10000, 390.18000, 6.10800, 34.90000, 330.00000, 24.30000,
0.19657, 5.86000, 0.43100, 8.05550, 7.00000, 19.10000, 376.14000, 6.22600, 79.20000, 330.00000, 20.50000,
0.16439, 5.86000, 0.43100, 7.82650, 7.00000, 19.10000, 374.71000, 6.43300, 49.10000, 330.00000, 24.50000,
0.19073, 5.86000, 0.43100, 7.82650, 7.00000, 19.10000, 393.74000, 6.71800, 17.50000, 330.00000, 26.20000,
0.14030, 5.86000, 0.43100, 7.39670, 7.00000, 19.10000, 396.28000, 6.48700, 13.00000, 330.00000, 24.40000,
0.21409, 5.86000, 0.43100, 7.39670, 7.00000, 19.10000, 377.07000, 6.43800, 8.90000, 330.00000, 24.80000,
0.08221, 5.86000, 0.43100, 8.90670, 7.00000, 19.10000, 386.09000, 6.95700, 6.80000, 330.00000, 29.60000,
0.36894, 5.86000, 0.43100, 8.90670, 7.00000, 19.10000, 396.90000, 8.25900, 8.40000, 330.00000, 42.80000,
0.04819, 3.64000, 0.39200, 9.22030, 1.00000, 16.40000, 392.89000, 6.10800, 32.00000, 315.00000, 21.90000,
0.03548, 3.64000, 0.39200, 9.22030, 1.00000, 16.40000, 395.18000, 5.87600, 19.10000, 315.00000, 20.90000,
0.01538, 3.75000, 0.39400, 6.33610, 3.00000, 15.90000, 386.34000, 7.45400, 34.20000, 244.00000, 44.00000,
0.61154, 3.97000, 0.64700, 1.80100, 5.00000, 13.00000, 389.70000, 8.70400, 86.90000, 264.00000, 50.00000,
0.66351, 3.97000, 0.64700, 1.89460, 5.00000, 13.00000, 383.29000, 7.33300, 100.00000, 264.00000, 36.00000,
0.65665, 3.97000, 0.64700, 2.01070, 5.00000, 13.00000, 391.93000, 6.84200, 100.00000, 264.00000, 30.10000,
0.54011, 3.97000, 0.64700, 2.11210, 5.00000, 13.00000, 392.80000, 7.20300, 81.80000, 264.00000, 33.80000,
0.53412, 3.97000, 0.64700, 2.13980, 5.00000, 13.00000, 388.37000, 7.52000, 89.40000, 264.00000, 43.10000,
0.52014, 3.97000, 0.64700, 2.28850, 5.00000, 13.00000, 386.86000, 8.39800, 91.50000, 264.00000, 48.80000,
0.82526, 3.97000, 0.64700, 2.07880, 5.00000, 13.00000, 393.42000, 7.32700, 94.50000, 264.00000, 31.00000,
0.55007, 3.97000, 0.64700, 1.93010, 5.00000, 13.00000, 387.89000, 7.20600, 91.60000, 264.00000, 36.50000,
0.76162, 3.97000, 0.64700, 1.98650, 5.00000, 13.00000, 392.40000, 5.56000, 62.80000, 264.00000, 22.80000,
0.78570, 3.97000, 0.64700, 2.13290, 5.00000, 13.00000, 384.07000, 7.01400, 84.60000, 264.00000, 30.70000,
0.57834, 3.97000, 0.57500, 2.42160, 5.00000, 13.00000, 384.54000, 8.29700, 67.00000, 264.00000, 50.00000,
0.54050, 3.97000, 0.57500, 2.87200, 5.00000, 13.00000, 390.30000, 7.47000, 52.60000, 264.00000, 43.50000,
0.09065, 6.96000, 0.46400, 3.91750, 3.00000, 18.60000, 391.34000, 5.92000, 61.50000, 223.00000, 20.70000,
0.29916, 6.96000, 0.46400, 4.42900, 3.00000, 18.60000, 388.65000, 5.85600, 42.10000, 223.00000, 21.10000,
0.16211, 6.96000, 0.46400, 4.42900, 3.00000, 18.60000, 396.90000, 6.24000, 16.30000, 223.00000, 25.20000,
0.11460, 6.96000, 0.46400, 3.91750, 3.00000, 18.60000, 394.96000, 6.53800, 58.70000, 223.00000, 24.40000,
0.22188, 6.96000, 0.46400, 4.36650, 3.00000, 18.60000, 390.77000, 7.69100, 51.80000, 223.00000, 35.20000,
0.05644, 6.41000, 0.44700, 4.07760, 4.00000, 17.60000, 396.90000, 6.75800, 32.90000, 254.00000, 32.40000,
0.09604, 6.41000, 0.44700, 4.26730, 4.00000, 17.60000, 396.90000, 6.85400, 42.80000, 254.00000, 32.00000,
0.10469, 6.41000, 0.44700, 4.78720, 4.00000, 17.60000, 389.25000, 7.26700, 49.00000, 254.00000, 33.20000,
0.06127, 6.41000, 0.44700, 4.86280, 4.00000, 17.60000, 393.45000, 6.82600, 27.60000, 254.00000, 33.10000,
0.07978, 6.41000, 0.44700, 4.14030, 4.00000, 17.60000, 396.90000, 6.48200, 32.10000, 254.00000, 29.10000,
0.21038, 3.33000, 0.44290, 4.10070, 5.00000, 14.90000, 396.90000, 6.81200, 32.20000, 216.00000, 35.10000,
0.03578, 3.33000, 0.44290, 4.69470, 5.00000, 14.90000, 387.31000, 7.82000, 64.50000, 216.00000, 45.40000,
0.03705, 3.33000, 0.44290, 5.24470, 5.00000, 14.90000, 392.23000, 6.96800, 37.20000, 216.00000, 35.40000,
0.06129, 3.33000, 0.44290, 5.21190, 5.00000, 14.90000, 377.07000, 7.64500, 49.70000, 216.00000, 46.00000,
0.01501, 1.21000, 0.40100, 5.88500, 1.00000, 13.60000, 395.52000, 7.92300, 24.80000, 198.00000, 50.00000,
0.00906, 2.97000, 0.40000, 7.30730, 1.00000, 15.30000, 394.72000, 7.08800, 20.80000, 285.00000, 32.20000,
0.01096, 2.25000, 0.38900, 7.30730, 1.00000, 15.30000, 394.72000, 6.45300, 31.90000, 300.00000, 22.00000,
0.01965, 1.76000, 0.38500, 9.08920, 1.00000, 18.20000, 341.60000, 6.23000, 31.50000, 241.00000, 20.10000,
0.03871, 5.32000, 0.40500, 7.31720, 6.00000, 16.60000, 396.90000, 6.20900, 31.30000, 293.00000, 23.20000,
0.04590, 5.32000, 0.40500, 7.31720, 6.00000, 16.60000, 396.90000, 6.31500, 45.60000, 293.00000, 22.30000,
0.04297, 5.32000, 0.40500, 7.31720, 6.00000, 16.60000, 371.72000, 6.56500, 22.90000, 293.00000, 24.80000,
0.03502, 4.95000, 0.41100, 5.11670, 4.00000, 19.20000, 396.90000, 6.86100, 27.90000, 245.00000, 28.50000,
0.07886, 4.95000, 0.41100, 5.11670, 4.00000, 19.20000, 396.90000, 7.14800, 27.70000, 245.00000, 37.30000,
0.03615, 4.95000, 0.41100, 5.11670, 4.00000, 19.20000, 396.90000, 6.63000, 23.40000, 245.00000, 27.90000,
0.08265, 13.92000, 0.43700, 5.50270, 4.00000, 16.00000, 396.90000, 6.12700, 18.40000, 289.00000, 23.90000,
0.08199, 13.92000, 0.43700, 5.50270, 4.00000, 16.00000, 396.90000, 6.00900, 42.30000, 289.00000, 21.70000,
0.12932, 13.92000, 0.43700, 5.96040, 4.00000, 16.00000, 396.90000, 6.67800, 31.10000, 289.00000, 28.60000,
0.05372, 13.92000, 0.43700, 5.96040, 4.00000, 16.00000, 392.85000, 6.54900, 51.00000, 289.00000, 27.10000,
0.14103, 13.92000, 0.43700, 6.32000, 4.00000, 16.00000, 396.90000, 5.79000, 58.00000, 289.00000, 20.30000,
0.06466, 2.24000, 0.40000, 7.82780, 5.00000, 14.80000, 368.24000, 6.34500, 20.10000, 358.00000, 22.50000,
0.05561, 2.24000, 0.40000, 7.82780, 5.00000, 14.80000, 371.58000, 7.04100, 10.00000, 358.00000, 29.00000,
0.04417, 2.24000, 0.40000, 7.82780, 5.00000, 14.80000, 390.86000, 6.87100, 47.40000, 358.00000, 24.80000,
0.03537, 6.09000, 0.43300, 5.49170, 7.00000, 16.10000, 395.75000, 6.59000, 40.40000, 329.00000, 22.00000,
0.09266, 6.09000, 0.43300, 5.49170, 7.00000, 16.10000, 383.61000, 6.49500, 18.40000, 329.00000, 26.40000,
0.10000, 6.09000, 0.43300, 5.49170, 7.00000, 16.10000, 390.43000, 6.98200, 17.70000, 329.00000, 33.10000,
0.05515, 2.18000, 0.47200, 4.02200, 7.00000, 18.40000, 393.68000, 7.23600, 41.10000, 222.00000, 36.10000,
0.05479, 2.18000, 0.47200, 3.37000, 7.00000, 18.40000, 393.36000, 6.61600, 58.10000, 222.00000, 28.40000,
0.07503, 2.18000, 0.47200, 3.09920, 7.00000, 18.40000, 396.90000, 7.42000, 71.90000, 222.00000, 33.40000,
0.04932, 2.18000, 0.47200, 3.18270, 7.00000, 18.40000, 396.90000, 6.84900, 70.30000, 222.00000, 28.20000,
0.49298, 9.90000, 0.54400, 3.31750, 4.00000, 18.40000, 396.90000, 6.63500, 82.50000, 304.00000, 22.80000,
0.34940, 9.90000, 0.54400, 3.10250, 4.00000, 18.40000, 396.24000, 5.97200, 76.70000, 304.00000, 20.30000,
2.63548, 9.90000, 0.54400, 2.51940, 4.00000, 18.40000, 350.45000, 4.97300, 37.80000, 304.00000, 16.10000,
0.79041, 9.90000, 0.54400, 2.64030, 4.00000, 18.40000, 396.90000, 6.12200, 52.80000, 304.00000, 22.10000,
0.26169, 9.90000, 0.54400, 2.83400, 4.00000, 18.40000, 396.30000, 6.02300, 90.40000, 304.00000, 19.40000,
0.26938, 9.90000, 0.54400, 3.26280, 4.00000, 18.40000, 393.39000, 6.26600, 82.80000, 304.00000, 21.60000,
0.36920, 9.90000, 0.54400, 3.60230, 4.00000, 18.40000, 395.69000, 6.56700, 87.30000, 304.00000, 23.80000,
0.25356, 9.90000, 0.54400, 3.94500, 4.00000, 18.40000, 396.42000, 5.70500, 77.70000, 304.00000, 16.20000,
0.31827, 9.90000, 0.54400, 3.99860, 4.00000, 18.40000, 390.70000, 5.91400, 83.20000, 304.00000, 17.80000,
0.24522, 9.90000, 0.54400, 4.03170, 4.00000, 18.40000, 396.90000, 5.78200, 71.70000, 304.00000, 19.80000,
0.40202, 9.90000, 0.54400, 3.53250, 4.00000, 18.40000, 395.21000, 6.38200, 67.20000, 304.00000, 23.10000,
0.47547, 9.90000, 0.54400, 4.00190, 4.00000, 18.40000, 396.23000, 6.11300, 58.80000, 304.00000, 21.00000,
0.16760, 7.38000, 0.49300, 4.54040, 5.00000, 19.60000, 396.90000, 6.42600, 52.30000, 287.00000, 23.80000,
0.18159, 7.38000, 0.49300, 4.54040, 5.00000, 19.60000, 396.90000, 6.37600, 54.30000, 287.00000, 23.10000,
0.35114, 7.38000, 0.49300, 4.72110, 5.00000, 19.60000, 396.90000, 6.04100, 49.90000, 287.00000, 20.40000,
0.28392, 7.38000, 0.49300, 4.72110, 5.00000, 19.60000, 391.13000, 5.70800, 74.30000, 287.00000, 18.50000,
0.34109, 7.38000, 0.49300, 4.72110, 5.00000, 19.60000, 396.90000, 6.41500, 40.10000, 287.00000, 25.00000,
0.19186, 7.38000, 0.49300, 5.41590, 5.00000, 19.60000, 393.68000, 6.43100, 14.70000, 287.00000, 24.60000,
0.30347, 7.38000, 0.49300, 5.41590, 5.00000, 19.60000, 396.90000, 6.31200, 28.90000, 287.00000, 23.00000,
0.24103, 7.38000, 0.49300, 5.41590, 5.00000, 19.60000, 396.90000, 6.08300, 43.70000, 287.00000, 22.20000,
0.06617, 3.24000, 0.46000, 5.21460, 4.00000, 16.90000, 382.44000, 5.86800, 25.80000, 430.00000, 19.30000,
0.06724, 3.24000, 0.46000, 5.21460, 4.00000, 16.90000, 375.21000, 6.33300, 17.20000, 430.00000, 22.60000,
0.04544, 3.24000, 0.46000, 5.87360, 4.00000, 16.90000, 368.57000, 6.14400, 32.20000, 430.00000, 19.80000,
0.05023, 6.06000, 0.43790, 6.64070, 1.00000, 16.90000, 394.02000, 5.70600, 28.40000, 304.00000, 17.10000,
0.03466, 6.06000, 0.43790, 6.64070, 1.00000, 16.90000, 362.25000, 6.03100, 23.30000, 304.00000, 19.40000,
0.05083, 5.19000, 0.51500, 6.45840, 5.00000, 20.20000, 389.71000, 6.31600, 38.10000, 224.00000, 22.20000,
0.03738, 5.19000, 0.51500, 6.45840, 5.00000, 20.20000, 389.40000, 6.31000, 38.50000, 224.00000, 20.70000,
0.03961, 5.19000, 0.51500, 5.98530, 5.00000, 20.20000, 396.90000, 6.03700, 34.50000, 224.00000, 21.10000,
0.03427, 5.19000, 0.51500, 5.23110, 5.00000, 20.20000, 396.90000, 5.86900, 46.30000, 224.00000, 19.50000,
0.03041, 5.19000, 0.51500, 5.61500, 5.00000, 20.20000, 394.81000, 5.89500, 59.60000, 224.00000, 18.50000,
0.03306, 5.19000, 0.51500, 4.81220, 5.00000, 20.20000, 396.14000, 6.05900, 37.30000, 224.00000, 20.60000,
0.05497, 5.19000, 0.51500, 4.81220, 5.00000, 20.20000, 396.90000, 5.98500, 45.40000, 224.00000, 19.00000,
0.06151, 5.19000, 0.51500, 4.81220, 5.00000, 20.20000, 396.90000, 5.96800, 58.50000, 224.00000, 18.70000,
0.01301, 1.52000, 0.44200, 7.03790, 1.00000, 15.50000, 394.74000, 7.24100, 49.30000, 284.00000, 32.70000,
0.02498, 1.89000, 0.51800, 6.26690, 1.00000, 15.90000, 389.96000, 6.54000, 59.70000, 422.00000, 16.50000,
0.02543, 3.78000, 0.48400, 5.73210, 5.00000, 17.60000, 396.90000, 6.69600, 56.40000, 370.00000, 23.90000,
0.03049, 3.78000, 0.48400, 6.46540, 5.00000, 17.60000, 387.97000, 6.87400, 28.10000, 370.00000, 31.20000,
0.03113, 4.39000, 0.44200, 8.01360, 3.00000, 18.80000, 385.64000, 6.01400, 48.50000, 352.00000, 17.50000,
0.06162, 4.39000, 0.44200, 8.01360, 3.00000, 18.80000, 364.61000, 5.89800, 52.30000, 352.00000, 17.20000,
0.01870, 4.15000, 0.42900, 8.53530, 4.00000, 17.90000, 392.43000, 6.51600, 27.70000, 351.00000, 23.10000,
0.01501, 2.01000, 0.43500, 8.34400, 4.00000, 17.00000, 390.94000, 6.63500, 29.70000, 280.00000, 24.50000,
0.02899, 1.25000, 0.42900, 8.79210, 1.00000, 19.70000, 389.85000, 6.93900, 34.50000, 335.00000, 26.60000,
0.06211, 1.25000, 0.42900, 8.79210, 1.00000, 19.70000, 396.90000, 6.49000, 44.40000, 335.00000, 22.90000,
0.07950, 1.69000, 0.41100, 10.71030, 4.00000, 18.30000, 370.78000, 6.57900, 35.90000, 411.00000, 24.10000,
0.07244, 1.69000, 0.41100, 10.71030, 4.00000, 18.30000, 392.33000, 5.88400, 18.50000, 411.00000, 18.60000,
0.01709, 2.02000, 0.41000, 12.12650, 5.00000, 17.00000, 384.46000, 6.72800, 36.10000, 187.00000, 30.10000,
0.04301, 1.91000, 0.41300, 10.58570, 4.00000, 22.00000, 382.80000, 5.66300, 21.90000, 334.00000, 18.20000,
0.10659, 1.91000, 0.41300, 10.58570, 4.00000, 22.00000, 376.04000, 5.93600, 19.50000, 334.00000, 20.60000,
8.98296, 18.10000, 0.77000, 2.12220, 24.00000, 20.20000, 377.73000, 6.21200, 97.40000, 666.00000, 17.80000,
3.84970, 18.10000, 0.77000, 2.50520, 24.00000, 20.20000, 391.34000, 6.39500, 91.00000, 666.00000, 21.70000,
5.20177, 18.10000, 0.77000, 2.72270, 24.00000, 20.20000, 395.43000, 6.12700, 83.40000, 666.00000, 22.70000,
4.26131, 18.10000, 0.77000, 2.50910, 24.00000, 20.20000, 390.74000, 6.11200, 81.30000, 666.00000, 22.60000,
4.54192, 18.10000, 0.77000, 2.51820, 24.00000, 20.20000, 374.56000, 6.39800, 88.00000, 666.00000, 25.00000,
3.83684, 18.10000, 0.77000, 2.29550, 24.00000, 20.20000, 350.65000, 6.25100, 91.10000, 666.00000, 19.90000,
3.67822, 18.10000, 0.77000, 2.10360, 24.00000, 20.20000, 380.79000, 5.36200, 96.20000, 666.00000, 20.80000,
4.22239, 18.10000, 0.77000, 1.90470, 24.00000, 20.20000, 353.04000, 5.80300, 89.00000, 666.00000, 16.80000,
3.47428, 18.10000, 0.71800, 1.90470, 24.00000, 20.20000, 354.55000, 8.78000, 82.90000, 666.00000, 21.90000,
4.55587, 18.10000, 0.71800, 1.61320, 24.00000, 20.20000, 354.70000, 3.56100, 87.90000, 666.00000, 27.50000,
3.69695, 18.10000, 0.71800, 1.75230, 24.00000, 20.20000, 316.03000, 4.96300, 91.40000, 666.00000, 21.90000,
13.52220, 18.10000, 0.63100, 1.51060, 24.00000, 20.20000, 131.42000, 3.86300, 100.00000, 666.00000, 23.10000,
4.89822, 18.10000, 0.63100, 1.33250, 24.00000, 20.20000, 375.52000, 4.97000, 100.00000, 666.00000, 50.00000,
5.66998, 18.10000, 0.63100, 1.35670, 24.00000, 20.20000, 375.33000, 6.68300, 96.80000, 666.00000, 50.00000,
6.53876, 18.10000, 0.63100, 1.20240, 24.00000, 20.20000, 392.05000, 7.01600, 97.50000, 666.00000, 50.00000,
9.23230, 18.10000, 0.63100, 1.16910, 24.00000, 20.20000, 366.15000, 6.21600, 100.00000, 666.00000, 50.00000,
8.26725, 18.10000, 0.66800, 1.12960, 24.00000, 20.20000, 347.88000, 5.87500, 89.60000, 666.00000, 50.00000,
11.10810, 18.10000, 0.66800, 1.17420, 24.00000, 20.20000, 396.90000, 4.90600, 100.00000, 666.00000, 13.80000,
18.49820, 18.10000, 0.66800, 1.13700, 24.00000, 20.20000, 396.90000, 4.13800, 100.00000, 666.00000, 13.80000,
19.60910, 18.10000, 0.67100, 1.31630, 24.00000, 20.20000, 396.90000, 7.31300, 97.90000, 666.00000, 15.00000,
15.28800, 18.10000, 0.67100, 1.34490, 24.00000, 20.20000, 363.02000, 6.64900, 93.30000, 666.00000, 13.90000,
9.82349, 18.10000, 0.67100, 1.35800, 24.00000, 20.20000, 396.90000, 6.79400, 98.80000, 666.00000, 13.30000,
23.64820, 18.10000, 0.67100, 1.38610, 24.00000, 20.20000, 396.90000, 6.38000, 96.20000, 666.00000, 13.10000,
17.86670, 18.10000, 0.67100, 1.38610, 24.00000, 20.20000, 393.74000, 6.22300, 100.00000, 666.00000, 10.20000,
88.97620, 18.10000, 0.67100, 1.41650, 24.00000, 20.20000, 396.90000, 6.96800, 91.90000, 666.00000, 10.40000,
15.87440, 18.10000, 0.67100, 1.51920, 24.00000, 20.20000, 396.90000, 6.54500, 99.10000, 666.00000, 10.90000,
9.18702, 18.10000, 0.70000, 1.58040, 24.00000, 20.20000, 396.90000, 5.53600, 100.00000, 666.00000, 11.30000,
7.99248, 18.10000, 0.70000, 1.53310, 24.00000, 20.20000, 396.90000, 5.52000, 100.00000, 666.00000, 12.30000,
20.08490, 18.10000, 0.70000, 1.43950, 24.00000, 20.20000, 285.83000, 4.36800, 91.20000, 666.00000, 8.80000,
16.81180, 18.10000, 0.70000, 1.42610, 24.00000, 20.20000, 396.90000, 5.27700, 98.10000, 666.00000, 7.20000,
24.39380, 18.10000, 0.70000, 1.46720, 24.00000, 20.20000, 396.90000, 4.65200, 100.00000, 666.00000, 10.50000,
22.59710, 18.10000, 0.70000, 1.51840, 24.00000, 20.20000, 396.90000, 5.00000, 89.50000, 666.00000, 7.40000,
14.33370, 18.10000, 0.70000, 1.58950, 24.00000, 20.20000, 372.92000, 4.88000, 100.00000, 666.00000, 10.20000,
8.15174, 18.10000, 0.70000, 1.72810, 24.00000, 20.20000, 396.90000, 5.39000, 98.90000, 666.00000, 11.50000,
6.96215, 18.10000, 0.70000, 1.92650, 24.00000, 20.20000, 394.43000, 5.71300, 97.00000, 666.00000, 15.10000,
5.29305, 18.10000, 0.70000, 2.16780, 24.00000, 20.20000, 378.38000, 6.05100, 82.50000, 666.00000, 23.20000,
11.57790, 18.10000, 0.70000, 1.77000, 24.00000, 20.20000, 396.90000, 5.03600, 97.00000, 666.00000, 9.70000,
8.64476, 18.10000, 0.69300, 1.79120, 24.00000, 20.20000, 396.90000, 6.19300, 92.60000, 666.00000, 13.80000,
13.35980, 18.10000, 0.69300, 1.78210, 24.00000, 20.20000, 396.90000, 5.88700, 94.70000, 666.00000, 12.70000,
8.71675, 18.10000, 0.69300, 1.72570, 24.00000, 20.20000, 391.98000, 6.47100, 98.80000, 666.00000, 13.10000,
5.87205, 18.10000, 0.69300, 1.67680, 24.00000, 20.20000, 396.90000, 6.40500, 96.00000, 666.00000, 12.50000,
7.67202, 18.10000, 0.69300, 1.63340, 24.00000, 20.20000, 393.10000, 5.74700, 98.90000, 666.00000, 8.50000,
38.35180, 18.10000, 0.69300, 1.48960, 24.00000, 20.20000, 396.90000, 5.45300, 100.00000, 666.00000, 5.00000,
9.91655, 18.10000, 0.69300, 1.50040, 24.00000, 20.20000, 338.16000, 5.85200, 77.80000, 666.00000, 6.30000,
25.04610, 18.10000, 0.69300, 1.58880, 24.00000, 20.20000, 396.90000, 5.98700, 100.00000, 666.00000, 5.60000,
14.23620, 18.10000, 0.69300, 1.57410, 24.00000, 20.20000, 396.90000, 6.34300, 100.00000, 666.00000, 7.20000,
9.59571, 18.10000, 0.69300, 1.63900, 24.00000, 20.20000, 376.11000, 6.40400, 100.00000, 666.00000, 12.10000,
24.80170, 18.10000, 0.69300, 1.70280, 24.00000, 20.20000, 396.90000, 5.34900, 96.00000, 666.00000, 8.30000,
41.52920, 18.10000, 0.69300, 1.60740, 24.00000, 20.20000, 329.46000, 5.53100, 85.40000, 666.00000, 8.50000,
67.92080, 18.10000, 0.69300, 1.42540, 24.00000, 20.20000, 384.97000, 5.68300, 100.00000, 666.00000, 5.00000,
20.71620, 18.10000, 0.65900, 1.17810, 24.00000, 20.20000, 370.22000, 4.13800, 100.00000, 666.00000, 11.90000,
11.95110, 18.10000, 0.65900, 1.28520, 24.00000, 20.20000, 332.09000, 5.60800, 100.00000, 666.00000, 27.90000,
7.40389, 18.10000, 0.59700, 1.45470, 24.00000, 20.20000, 314.64000, 5.61700, 97.90000, 666.00000, 17.20000,
14.43830, 18.10000, 0.59700, 1.46550, 24.00000, 20.20000, 179.36000, 6.85200, 100.00000, 666.00000, 27.50000,
51.13580, 18.10000, 0.59700, 1.41300, 24.00000, 20.20000, 2.60000, 5.75700, 100.00000, 666.00000, 15.00000,
14.05070, 18.10000, 0.59700, 1.52750, 24.00000, 20.20000, 35.05000, 6.65700, 100.00000, 666.00000, 17.20000,
18.81100, 18.10000, 0.59700, 1.55390, 24.00000, 20.20000, 28.79000, 4.62800, 100.00000, 666.00000, 17.90000,
28.65580, 18.10000, 0.59700, 1.58940, 24.00000, 20.20000, 210.97000, 5.15500, 100.00000, 666.00000, 16.30000,
45.74610, 18.10000, 0.69300, 1.65820, 24.00000, 20.20000, 88.27000, 4.51900, 100.00000, 666.00000, 7.00000,
18.08460, 18.10000, 0.67900, 1.83470, 24.00000, 20.20000, 27.25000, 6.43400, 100.00000, 666.00000, 7.20000,
10.83420, 18.10000, 0.67900, 1.81950, 24.00000, 20.20000, 21.57000, 6.78200, 90.80000, 666.00000, 7.50000,
25.94060, 18.10000, 0.67900, 1.64750, 24.00000, 20.20000, 127.36000, 5.30400, 89.10000, 666.00000, 10.40000,
73.53410, 18.10000, 0.67900, 1.80260, 24.00000, 20.20000, 16.45000, 5.95700, 100.00000, 666.00000, 8.80000,
11.81230, 18.10000, 0.71800, 1.79400, 24.00000, 20.20000, 48.45000, 6.82400, 76.50000, 666.00000, 8.40000,
11.08740, 18.10000, 0.71800, 1.85890, 24.00000, 20.20000, 318.75000, 6.41100, 100.00000, 666.00000, 16.70000,
7.02259, 18.10000, 0.71800, 1.87460, 24.00000, 20.20000, 319.98000, 6.00600, 95.30000, 666.00000, 14.20000,
12.04820, 18.10000, 0.61400, 1.95120, 24.00000, 20.20000, 291.55000, 5.64800, 87.60000, 666.00000, 20.80000,
7.05042, 18.10000, 0.61400, 2.02180, 24.00000, 20.20000, 2.52000, 6.10300, 85.10000, 666.00000, 13.40000,
8.79212, 18.10000, 0.58400, 2.06350, 24.00000, 20.20000, 3.65000, 5.56500, 70.60000, 666.00000, 11.70000,
15.86030, 18.10000, 0.67900, 1.90960, 24.00000, 20.20000, 7.68000, 5.89600, 95.40000, 666.00000, 8.30000,
12.24720, 18.10000, 0.58400, 1.99760, 24.00000, 20.20000, 24.65000, 5.83700, 59.70000, 666.00000, 10.20000,
37.66190, 18.10000, 0.67900, 1.86290, 24.00000, 20.20000, 18.82000, 6.20200, 78.70000, 666.00000, 10.90000,
7.36711, 18.10000, 0.67900, 1.93560, 24.00000, 20.20000, 96.73000, 6.19300, 78.10000, 666.00000, 11.00000,
9.33889, 18.10000, 0.67900, 1.96820, 24.00000, 20.20000, 60.72000, 6.38000, 95.60000, 666.00000, 9.50000,
8.49213, 18.10000, 0.58400, 2.05270, 24.00000, 20.20000, 83.45000, 6.34800, 86.10000, 666.00000, 14.50000,
10.06230, 18.10000, 0.58400, 2.08820, 24.00000, 20.20000, 81.33000, 6.83300, 94.30000, 666.00000, 14.10000,
6.44405, 18.10000, 0.58400, 2.20040, 24.00000, 20.20000, 97.95000, 6.42500, 74.80000, 666.00000, 16.10000,
5.58107, 18.10000, 0.71300, 2.31580, 24.00000, 20.20000, 100.19000, 6.43600, 87.90000, 666.00000, 14.30000,
13.91340, 18.10000, 0.71300, 2.22220, 24.00000, 20.20000, 100.63000, 6.20800, 95.00000, 666.00000, 11.70000,
11.16040, 18.10000, 0.74000, 2.12470, 24.00000, 20.20000, 109.85000, 6.62900, 94.60000, 666.00000, 13.40000,
14.42080, 18.10000, 0.74000, 2.00260, 24.00000, 20.20000, 27.49000, 6.46100, 93.30000, 666.00000, 9.60000,
15.17720, 18.10000, 0.74000, 1.91420, 24.00000, 20.20000, 9.32000, 6.15200, 100.00000, 666.00000, 8.70000,
13.67810, 18.10000, 0.74000, 1.82060, 24.00000, 20.20000, 68.95000, 5.93500, 87.90000, 666.00000, 8.40000,
9.39063, 18.10000, 0.74000, 1.81720, 24.00000, 20.20000, 396.90000, 5.62700, 93.90000, 666.00000, 12.80000,
22.05110, 18.10000, 0.74000, 1.86620, 24.00000, 20.20000, 391.45000, 5.81800, 92.40000, 666.00000, 10.50000,
9.72418, 18.10000, 0.74000, 2.06510, 24.00000, 20.20000, 385.96000, 6.40600, 97.20000, 666.00000, 17.10000,
5.66637, 18.10000, 0.74000, 2.00480, 24.00000, 20.20000, 395.69000, 6.21900, 100.00000, 666.00000, 18.40000,
9.96654, 18.10000, 0.74000, 1.97840, 24.00000, 20.20000, 386.73000, 6.48500, 100.00000, 666.00000, 15.40000,
12.80230, 18.10000, 0.74000, 1.89560, 24.00000, 20.20000, 240.52000, 5.85400, 96.60000, 666.00000, 10.80000,
10.67180, 18.10000, 0.74000, 1.98790, 24.00000, 20.20000, 43.06000, 6.45900, 94.80000, 666.00000, 11.80000,
6.28807, 18.10000, 0.74000, 2.07200, 24.00000, 20.20000, 318.01000, 6.34100, 96.40000, 666.00000, 14.90000,
9.92485, 18.10000, 0.74000, 2.19800, 24.00000, 20.20000, 388.52000, 6.25100, 96.60000, 666.00000, 12.60000,
9.32909, 18.10000, 0.71300, 2.26160, 24.00000, 20.20000, 396.90000, 6.18500, 98.70000, 666.00000, 14.10000,
7.52601, 18.10000, 0.71300, 2.18500, 24.00000, 20.20000, 304.21000, 6.41700, 98.30000, 666.00000, 13.00000,
6.71772, 18.10000, 0.71300, 2.32360, 24.00000, 20.20000, 0.32000, 6.74900, 92.60000, 666.00000, 13.40000,
5.44114, 18.10000, 0.71300, 2.35520, 24.00000, 20.20000, 355.29000, 6.65500, 98.20000, 666.00000, 15.20000,
5.09017, 18.10000, 0.71300, 2.36820, 24.00000, 20.20000, 385.09000, 6.29700, 91.80000, 666.00000, 16.10000,
8.24809, 18.10000, 0.71300, 2.45270, 24.00000, 20.20000, 375.87000, 7.39300, 99.30000, 666.00000, 17.80000,
9.51363, 18.10000, 0.71300, 2.49610, 24.00000, 20.20000, 6.68000, 6.72800, 94.10000, 666.00000, 14.90000,
4.75237, 18.10000, 0.71300, 2.43580, 24.00000, 20.20000, 50.92000, 6.52500, 86.50000, 666.00000, 14.10000,
4.66883, 18.10000, 0.71300, 2.58060, 24.00000, 20.20000, 10.48000, 5.97600, 87.90000, 666.00000, 12.70000,
8.20058, 18.10000, 0.71300, 2.77920, 24.00000, 20.20000, 3.50000, 5.93600, 80.30000, 666.00000, 13.50000,
7.75223, 18.10000, 0.71300, 2.78310, 24.00000, 20.20000, 272.21000, 6.30100, 83.70000, 666.00000, 14.90000,
6.80117, 18.10000, 0.71300, 2.71750, 24.00000, 20.20000, 396.90000, 6.08100, 84.40000, 666.00000, 20.00000,
4.81213, 18.10000, 0.71300, 2.59750, 24.00000, 20.20000, 255.23000, 6.70100, 90.00000, 666.00000, 16.40000,
3.69311, 18.10000, 0.71300, 2.56710, 24.00000, 20.20000, 391.43000, 6.37600, 88.40000, 666.00000, 17.70000,
6.65492, 18.10000, 0.71300, 2.73440, 24.00000, 20.20000, 396.90000, 6.31700, 83.00000, 666.00000, 19.50000,
5.82115, 18.10000, 0.71300, 2.80160, 24.00000, 20.20000, 393.82000, 6.51300, 89.90000, 666.00000, 20.20000,
7.83932, 18.10000, 0.65500, 2.96340, 24.00000, 20.20000, 396.90000, 6.20900, 65.40000, 666.00000, 21.40000,
3.16360, 18.10000, 0.65500, 3.06650, 24.00000, 20.20000, 334.40000, 5.75900, 48.20000, 666.00000, 19.90000,
3.77498, 18.10000, 0.65500, 2.87150, 24.00000, 20.20000, 22.01000, 5.95200, 84.70000, 666.00000, 19.00000,
4.42228, 18.10000, 0.58400, 2.54030, 24.00000, 20.20000, 331.29000, 6.00300, 94.50000, 666.00000, 19.10000,
15.57570, 18.10000, 0.58000, 2.90840, 24.00000, 20.20000, 368.74000, 5.92600, 71.00000, 666.00000, 19.10000,
13.07510, 18.10000, 0.58000, 2.82370, 24.00000, 20.20000, 396.90000, 5.71300, 56.70000, 666.00000, 20.10000,
4.34879, 18.10000, 0.58000, 3.03340, 24.00000, 20.20000, 396.90000, 6.16700, 84.00000, 666.00000, 19.90000,
4.03841, 18.10000, 0.53200, 3.09930, 24.00000, 20.20000, 395.33000, 6.22900, 90.70000, 666.00000, 19.60000,
3.56868, 18.10000, 0.58000, 2.89650, 24.00000, 20.20000, 393.37000, 6.43700, 75.00000, 666.00000, 23.20000,
4.64689, 18.10000, 0.61400, 2.53290, 24.00000, 20.20000, 374.68000, 6.98000, 67.60000, 666.00000, 29.80000,
8.05579, 18.10000, 0.58400, 2.42980, 24.00000, 20.20000, 352.58000, 5.42700, 95.40000, 666.00000, 13.80000,
6.39312, 18.10000, 0.58400, 2.20600, 24.00000, 20.20000, 302.76000, 6.16200, 97.40000, 666.00000, 13.30000,
4.87141, 18.10000, 0.61400, 2.30530, 24.00000, 20.20000, 396.21000, 6.48400, 93.60000, 666.00000, 16.70000,
15.02340, 18.10000, 0.61400, 2.10070, 24.00000, 20.20000, 349.48000, 5.30400, 97.30000, 666.00000, 12.00000,
10.23300, 18.10000, 0.61400, 2.17050, 24.00000, 20.20000, 379.70000, 6.18500, 96.70000, 666.00000, 14.60000,
14.33370, 18.10000, 0.61400, 1.95120, 24.00000, 20.20000, 383.32000, 6.22900, 88.00000, 666.00000, 21.40000,
5.82401, 18.10000, 0.53200, 3.42420, 24.00000, 20.20000, 396.90000, 6.24200, 64.70000, 666.00000, 23.00000,
5.70818, 18.10000, 0.53200, 3.33170, 24.00000, 20.20000, 393.07000, 6.75000, 74.90000, 666.00000, 23.70000,
5.73116, 18.10000, 0.53200, 3.41060, 24.00000, 20.20000, 395.28000, 7.06100, 77.00000, 666.00000, 25.00000,
2.81838, 18.10000, 0.53200, 4.09830, 24.00000, 20.20000, 392.92000, 5.76200, 40.30000, 666.00000, 21.80000,
2.37857, 18.10000, 0.58300, 3.72400, 24.00000, 20.20000, 370.73000, 5.87100, 41.90000, 666.00000, 20.60000,
3.67367, 18.10000, 0.58300, 3.99170, 24.00000, 20.20000, 388.62000, 6.31200, 51.90000, 666.00000, 21.20000,
5.69175, 18.10000, 0.58300, 3.54590, 24.00000, 20.20000, 392.68000, 6.11400, 79.80000, 666.00000, 19.10000,
4.83567, 18.10000, 0.58300, 3.15230, 24.00000, 20.20000, 388.22000, 5.90500, 53.20000, 666.00000, 20.60000,
0.15086, 27.74000, 0.60900, 1.82090, 4.00000, 20.10000, 395.09000, 5.45400, 92.70000, 711.00000, 15.20000,
0.18337, 27.74000, 0.60900, 1.75540, 4.00000, 20.10000, 344.05000, 5.41400, 98.30000, 711.00000, 7.00000,
0.20746, 27.74000, 0.60900, 1.82260, 4.00000, 20.10000, 318.43000, 5.09300, 98.00000, 711.00000, 8.10000,
0.10574, 27.74000, 0.60900, 1.86810, 4.00000, 20.10000, 390.11000, 5.98300, 98.80000, 711.00000, 13.60000,
0.11132, 27.74000, 0.60900, 2.10990, 4.00000, 20.10000, 396.90000, 5.98300, 83.50000, 711.00000, 20.10000,
0.17331, 9.69000, 0.58500, 2.38170, 6.00000, 19.20000, 396.90000, 5.70700, 54.00000, 391.00000, 21.80000,
0.27957, 9.69000, 0.58500, 2.38170, 6.00000, 19.20000, 396.90000, 5.92600, 42.60000, 391.00000, 24.50000,
0.17899, 9.69000, 0.58500, 2.79860, 6.00000, 19.20000, 393.29000, 5.67000, 28.80000, 391.00000, 23.10000,
0.28960, 9.69000, 0.58500, 2.79860, 6.00000, 19.20000, 396.90000, 5.39000, 72.90000, 391.00000, 19.70000,
0.26838, 9.69000, 0.58500, 2.89270, 6.00000, 19.20000, 396.90000, 5.79400, 70.60000, 391.00000, 18.30000,
0.23912, 9.69000, 0.58500, 2.40910, 6.00000, 19.20000, 396.90000, 6.01900, 65.30000, 391.00000, 21.20000,
0.17783, 9.69000, 0.58500, 2.39990, 6.00000, 19.20000, 395.77000, 5.56900, 73.50000, 391.00000, 17.50000,
0.22438, 9.69000, 0.58500, 2.49820, 6.00000, 19.20000, 396.90000, 6.02700, 79.70000, 391.00000, 16.80000,
0.06263, 11.93000, 0.57300, 2.47860, 1.00000, 21.00000, 391.99000, 6.59300, 69.10000, 273.00000, 22.40000,
0.04527, 11.93000, 0.57300, 2.28750, 1.00000, 21.00000, 396.90000, 6.12000, 76.70000, 273.00000, 20.60000,
0.06076, 11.93000, 0.57300, 2.16750, 1.00000, 21.00000, 396.90000, 6.97600, 91.00000, 273.00000, 23.90000,
0.10959, 11.93000, 0.57300, 2.38890, 1.00000, 21.00000, 393.45000, 6.79400, 89.30000, 273.00000, 22.00000,
0.04741, 11.93000, 0.57300, 2.50500, 1.00000, 21.00000, 396.90000, 6.03000, 80.80000, 273.00000, 11.90000,
})

406
stat/car_data_test.go Normal file
View File

@@ -0,0 +1,406 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import "github.com/gonum/matrix/mat64"
// ASA Car Exposition Data of Ramos and Donoho (1983)
// http://lib.stat.cmu.edu/datasets/cars.desc
// http://lib.stat.cmu.edu/datasets/cars.data
// Columns are: displacement, horsepower, weight, acceleration, MPG.
var carData = mat64.NewDense(392, 5, []float64{
307.0, 130.0, 3504.0, 12.0, 18.0,
350.0, 165.0, 3693.0, 11.5, 15.0,
318.0, 150.0, 3436.0, 11.0, 18.0,
304.0, 150.0, 3433.0, 12.0, 16.0,
302.0, 140.0, 3449.0, 10.5, 17.0,
429.0, 198.0, 4341.0, 10.0, 15.0,
454.0, 220.0, 4354.0, 9.0, 14.0,
440.0, 215.0, 4312.0, 8.5, 14.0,
455.0, 225.0, 4425.0, 10.0, 14.0,
390.0, 190.0, 3850.0, 8.5, 15.0,
383.0, 170.0, 3563.0, 10.0, 15.0,
340.0, 160.0, 3609.0, 8.0, 14.0,
400.0, 150.0, 3761.0, 9.5, 15.0,
455.0, 225.0, 3086.0, 10.0, 14.0,
113.0, 95.0, 2372.0, 15.0, 24.0,
198.0, 95.0, 2833.0, 15.5, 22.0,
199.0, 97.0, 2774.0, 15.5, 18.0,
200.0, 85.0, 2587.0, 16.0, 21.0,
97.0, 88.0, 2130.0, 14.5, 27.0,
97.0, 46.0, 1835.0, 20.5, 26.0,
110.0, 87.0, 2672.0, 17.5, 25.0,
107.0, 90.0, 2430.0, 14.5, 24.0,
104.0, 95.0, 2375.0, 17.5, 25.0,
121.0, 113.0, 2234.0, 12.5, 26.0,
199.0, 90.0, 2648.0, 15.0, 21.0,
360.0, 215.0, 4615.0, 14.0, 10.0,
307.0, 200.0, 4376.0, 15.0, 10.0,
318.0, 210.0, 4382.0, 13.5, 11.0,
304.0, 193.0, 4732.0, 18.5, 9.0,
97.0, 88.0, 2130.0, 14.5, 27.0,
140.0, 90.0, 2264.0, 15.5, 28.0,
113.0, 95.0, 2228.0, 14.0, 25.0,
232.0, 100.0, 2634.0, 13.0, 19.0,
225.0, 105.0, 3439.0, 15.5, 16.0,
250.0, 100.0, 3329.0, 15.5, 17.0,
250.0, 88.0, 3302.0, 15.5, 19.0,
232.0, 100.0, 3288.0, 15.5, 18.0,
350.0, 165.0, 4209.0, 12.0, 14.0,
400.0, 175.0, 4464.0, 11.5, 14.0,
351.0, 153.0, 4154.0, 13.5, 14.0,
318.0, 150.0, 4096.0, 13.0, 14.0,
383.0, 180.0, 4955.0, 11.5, 12.0,
400.0, 170.0, 4746.0, 12.0, 13.0,
400.0, 175.0, 5140.0, 12.0, 13.0,
258.0, 110.0, 2962.0, 13.5, 18.0,
140.0, 72.0, 2408.0, 19.0, 22.0,
250.0, 100.0, 3282.0, 15.0, 19.0,
250.0, 88.0, 3139.0, 14.5, 18.0,
122.0, 86.0, 2220.0, 14.0, 23.0,
116.0, 90.0, 2123.0, 14.0, 28.0,
79.0, 70.0, 2074.0, 19.5, 30.0,
88.0, 76.0, 2065.0, 14.5, 30.0,
71.0, 65.0, 1773.0, 19.0, 31.0,
72.0, 69.0, 1613.0, 18.0, 35.0,
97.0, 60.0, 1834.0, 19.0, 27.0,
91.0, 70.0, 1955.0, 20.5, 26.0,
113.0, 95.0, 2278.0, 15.5, 24.0,
97.5, 80.0, 2126.0, 17.0, 25.0,
97.0, 54.0, 2254.0, 23.5, 23.0,
140.0, 90.0, 2408.0, 19.5, 20.0,
122.0, 86.0, 2226.0, 16.5, 21.0,
350.0, 165.0, 4274.0, 12.0, 13.0,
400.0, 175.0, 4385.0, 12.0, 14.0,
318.0, 150.0, 4135.0, 13.5, 15.0,
351.0, 153.0, 4129.0, 13.0, 14.0,
304.0, 150.0, 3672.0, 11.5, 17.0,
429.0, 208.0, 4633.0, 11.0, 11.0,
350.0, 155.0, 4502.0, 13.5, 13.0,
350.0, 160.0, 4456.0, 13.5, 12.0,
400.0, 190.0, 4422.0, 12.5, 13.0,
70.0, 97.0, 2330.0, 13.5, 19.0,
304.0, 150.0, 3892.0, 12.5, 15.0,
307.0, 130.0, 4098.0, 14.0, 13.0,
302.0, 140.0, 4294.0, 16.0, 13.0,
318.0, 150.0, 4077.0, 14.0, 14.0,
121.0, 112.0, 2933.0, 14.5, 18.0,
121.0, 76.0, 2511.0, 18.0, 22.0,
120.0, 87.0, 2979.0, 19.5, 21.0,
96.0, 69.0, 2189.0, 18.0, 26.0,
122.0, 86.0, 2395.0, 16.0, 22.0,
97.0, 92.0, 2288.0, 17.0, 28.0,
120.0, 97.0, 2506.0, 14.5, 23.0,
98.0, 80.0, 2164.0, 15.0, 28.0,
97.0, 88.0, 2100.0, 16.5, 27.0,
350.0, 175.0, 4100.0, 13.0, 13.0,
304.0, 150.0, 3672.0, 11.5, 14.0,
350.0, 145.0, 3988.0, 13.0, 13.0,
302.0, 137.0, 4042.0, 14.5, 14.0,
318.0, 150.0, 3777.0, 12.5, 15.0,
429.0, 198.0, 4952.0, 11.5, 12.0,
400.0, 150.0, 4464.0, 12.0, 13.0,
351.0, 158.0, 4363.0, 13.0, 13.0,
318.0, 150.0, 4237.0, 14.5, 14.0,
440.0, 215.0, 4735.0, 11.0, 13.0,
455.0, 225.0, 4951.0, 11.0, 12.0,
360.0, 175.0, 3821.0, 11.0, 13.0,
225.0, 105.0, 3121.0, 16.5, 18.0,
250.0, 100.0, 3278.0, 18.0, 16.0,
232.0, 100.0, 2945.0, 16.0, 18.0,
250.0, 88.0, 3021.0, 16.5, 18.0,
198.0, 95.0, 2904.0, 16.0, 23.0,
97.0, 46.0, 1950.0, 21.0, 26.0,
400.0, 150.0, 4997.0, 14.0, 11.0,
400.0, 167.0, 4906.0, 12.5, 12.0,
360.0, 170.0, 4654.0, 13.0, 13.0,
350.0, 180.0, 4499.0, 12.5, 12.0,
232.0, 100.0, 2789.0, 15.0, 18.0,
97.0, 88.0, 2279.0, 19.0, 20.0,
140.0, 72.0, 2401.0, 19.5, 21.0,
108.0, 94.0, 2379.0, 16.5, 22.0,
70.0, 90.0, 2124.0, 13.5, 18.0,
122.0, 85.0, 2310.0, 18.5, 19.0,
155.0, 107.0, 2472.0, 14.0, 21.0,
98.0, 90.0, 2265.0, 15.5, 26.0,
350.0, 145.0, 4082.0, 13.0, 15.0,
400.0, 230.0, 4278.0, 9.5, 16.0,
68.0, 49.0, 1867.0, 19.5, 29.0,
116.0, 75.0, 2158.0, 15.5, 24.0,
114.0, 91.0, 2582.0, 14.0, 20.0,
121.0, 112.0, 2868.0, 15.5, 19.0,
318.0, 150.0, 3399.0, 11.0, 15.0,
121.0, 110.0, 2660.0, 14.0, 24.0,
156.0, 122.0, 2807.0, 13.5, 20.0,
350.0, 180.0, 3664.0, 11.0, 11.0,
198.0, 95.0, 3102.0, 16.5, 20.0,
232.0, 100.0, 2901.0, 16.0, 19.0,
250.0, 100.0, 3336.0, 17.0, 15.0,
79.0, 67.0, 1950.0, 19.0, 31.0,
122.0, 80.0, 2451.0, 16.5, 26.0,
71.0, 65.0, 1836.0, 21.0, 32.0,
140.0, 75.0, 2542.0, 17.0, 25.0,
250.0, 100.0, 3781.0, 17.0, 16.0,
258.0, 110.0, 3632.0, 18.0, 16.0,
225.0, 105.0, 3613.0, 16.5, 18.0,
302.0, 140.0, 4141.0, 14.0, 16.0,
350.0, 150.0, 4699.0, 14.5, 13.0,
318.0, 150.0, 4457.0, 13.5, 14.0,
302.0, 140.0, 4638.0, 16.0, 14.0,
304.0, 150.0, 4257.0, 15.5, 14.0,
98.0, 83.0, 2219.0, 16.5, 29.0,
79.0, 67.0, 1963.0, 15.5, 26.0,
97.0, 78.0, 2300.0, 14.5, 26.0,
76.0, 52.0, 1649.0, 16.5, 31.0,
83.0, 61.0, 2003.0, 19.0, 32.0,
90.0, 75.0, 2125.0, 14.5, 28.0,
90.0, 75.0, 2108.0, 15.5, 24.0,
116.0, 75.0, 2246.0, 14.0, 26.0,
120.0, 97.0, 2489.0, 15.0, 24.0,
108.0, 93.0, 2391.0, 15.5, 26.0,
79.0, 67.0, 2000.0, 16.0, 31.0,
225.0, 95.0, 3264.0, 16.0, 19.0,
250.0, 105.0, 3459.0, 16.0, 18.0,
250.0, 72.0, 3432.0, 21.0, 15.0,
250.0, 72.0, 3158.0, 19.5, 15.0,
400.0, 170.0, 4668.0, 11.5, 16.0,
350.0, 145.0, 4440.0, 14.0, 15.0,
318.0, 150.0, 4498.0, 14.5, 16.0,
351.0, 148.0, 4657.0, 13.5, 14.0,
231.0, 110.0, 3907.0, 21.0, 17.0,
250.0, 105.0, 3897.0, 18.5, 16.0,
258.0, 110.0, 3730.0, 19.0, 15.0,
225.0, 95.0, 3785.0, 19.0, 18.0,
231.0, 110.0, 3039.0, 15.0, 21.0,
262.0, 110.0, 3221.0, 13.5, 20.0,
302.0, 129.0, 3169.0, 12.0, 13.0,
97.0, 75.0, 2171.0, 16.0, 29.0,
140.0, 83.0, 2639.0, 17.0, 23.0,
232.0, 100.0, 2914.0, 16.0, 20.0,
140.0, 78.0, 2592.0, 18.5, 23.0,
134.0, 96.0, 2702.0, 13.5, 24.0,
90.0, 71.0, 2223.0, 16.5, 25.0,
119.0, 97.0, 2545.0, 17.0, 24.0,
171.0, 97.0, 2984.0, 14.5, 18.0,
90.0, 70.0, 1937.0, 14.0, 29.0,
232.0, 90.0, 3211.0, 17.0, 19.0,
115.0, 95.0, 2694.0, 15.0, 23.0,
120.0, 88.0, 2957.0, 17.0, 23.0,
121.0, 98.0, 2945.0, 14.5, 22.0,
121.0, 115.0, 2671.0, 13.5, 25.0,
91.0, 53.0, 1795.0, 17.5, 33.0,
107.0, 86.0, 2464.0, 15.5, 28.0,
116.0, 81.0, 2220.0, 16.9, 25.0,
140.0, 92.0, 2572.0, 14.9, 25.0,
98.0, 79.0, 2255.0, 17.7, 26.0,
101.0, 83.0, 2202.0, 15.3, 27.0,
305.0, 140.0, 4215.0, 13.0, 17.5,
318.0, 150.0, 4190.0, 13.0, 16.0,
304.0, 120.0, 3962.0, 13.9, 15.5,
351.0, 152.0, 4215.0, 12.8, 14.5,
225.0, 100.0, 3233.0, 15.4, 22.0,
250.0, 105.0, 3353.0, 14.5, 22.0,
200.0, 81.0, 3012.0, 17.6, 24.0,
232.0, 90.0, 3085.0, 17.6, 22.5,
85.0, 52.0, 2035.0, 22.2, 29.0,
98.0, 60.0, 2164.0, 22.1, 24.5,
90.0, 70.0, 1937.0, 14.2, 29.0,
91.0, 53.0, 1795.0, 17.4, 33.0,
225.0, 100.0, 3651.0, 17.7, 20.0,
250.0, 78.0, 3574.0, 21.0, 18.0,
250.0, 110.0, 3645.0, 16.2, 18.5,
258.0, 95.0, 3193.0, 17.8, 17.5,
97.0, 71.0, 1825.0, 12.2, 29.5,
85.0, 70.0, 1990.0, 17.0, 32.0,
97.0, 75.0, 2155.0, 16.4, 28.0,
140.0, 72.0, 2565.0, 13.6, 26.5,
130.0, 102.0, 3150.0, 15.7, 20.0,
318.0, 150.0, 3940.0, 13.2, 13.0,
120.0, 88.0, 3270.0, 21.9, 19.0,
156.0, 108.0, 2930.0, 15.5, 19.0,
168.0, 120.0, 3820.0, 16.7, 16.5,
350.0, 180.0, 4380.0, 12.1, 16.5,
350.0, 145.0, 4055.0, 12.0, 13.0,
302.0, 130.0, 3870.0, 15.0, 13.0,
318.0, 150.0, 3755.0, 14.0, 13.0,
98.0, 68.0, 2045.0, 18.5, 31.5,
111.0, 80.0, 2155.0, 14.8, 30.0,
79.0, 58.0, 1825.0, 18.6, 36.0,
122.0, 96.0, 2300.0, 15.5, 25.5,
85.0, 70.0, 1945.0, 16.8, 33.5,
305.0, 145.0, 3880.0, 12.5, 17.5,
260.0, 110.0, 4060.0, 19.0, 17.0,
318.0, 145.0, 4140.0, 13.7, 15.5,
302.0, 130.0, 4295.0, 14.9, 15.0,
250.0, 110.0, 3520.0, 16.4, 17.5,
231.0, 105.0, 3425.0, 16.9, 20.5,
225.0, 100.0, 3630.0, 17.7, 19.0,
250.0, 98.0, 3525.0, 19.0, 18.5,
400.0, 180.0, 4220.0, 11.1, 16.0,
350.0, 170.0, 4165.0, 11.4, 15.5,
400.0, 190.0, 4325.0, 12.2, 15.5,
351.0, 149.0, 4335.0, 14.5, 16.0,
97.0, 78.0, 1940.0, 14.5, 29.0,
151.0, 88.0, 2740.0, 16.0, 24.5,
97.0, 75.0, 2265.0, 18.2, 26.0,
140.0, 89.0, 2755.0, 15.8, 25.5,
98.0, 63.0, 2051.0, 17.0, 30.5,
98.0, 83.0, 2075.0, 15.9, 33.5,
97.0, 67.0, 1985.0, 16.4, 30.0,
97.0, 78.0, 2190.0, 14.1, 30.5,
146.0, 97.0, 2815.0, 14.5, 22.0,
121.0, 110.0, 2600.0, 12.8, 21.5,
80.0, 110.0, 2720.0, 13.5, 21.5,
90.0, 48.0, 1985.0, 21.5, 43.1,
98.0, 66.0, 1800.0, 14.4, 36.1,
78.0, 52.0, 1985.0, 19.4, 32.8,
85.0, 70.0, 2070.0, 18.6, 39.4,
91.0, 60.0, 1800.0, 16.4, 36.1,
260.0, 110.0, 3365.0, 15.5, 19.9,
318.0, 140.0, 3735.0, 13.2, 19.4,
302.0, 139.0, 3570.0, 12.8, 20.2,
231.0, 105.0, 3535.0, 19.2, 19.2,
200.0, 95.0, 3155.0, 18.2, 20.5,
200.0, 85.0, 2965.0, 15.8, 20.2,
140.0, 88.0, 2720.0, 15.4, 25.1,
225.0, 100.0, 3430.0, 17.2, 20.5,
232.0, 90.0, 3210.0, 17.2, 19.4,
231.0, 105.0, 3380.0, 15.8, 20.6,
200.0, 85.0, 3070.0, 16.7, 20.8,
225.0, 110.0, 3620.0, 18.7, 18.6,
258.0, 120.0, 3410.0, 15.1, 18.1,
305.0, 145.0, 3425.0, 13.2, 19.2,
231.0, 165.0, 3445.0, 13.4, 17.7,
302.0, 139.0, 3205.0, 11.2, 18.1,
318.0, 140.0, 4080.0, 13.7, 17.5,
98.0, 68.0, 2155.0, 16.5, 30.0,
134.0, 95.0, 2560.0, 14.2, 27.5,
119.0, 97.0, 2300.0, 14.7, 27.2,
105.0, 75.0, 2230.0, 14.5, 30.9,
134.0, 95.0, 2515.0, 14.8, 21.1,
156.0, 105.0, 2745.0, 16.7, 23.2,
151.0, 85.0, 2855.0, 17.6, 23.8,
119.0, 97.0, 2405.0, 14.9, 23.9,
131.0, 103.0, 2830.0, 15.9, 20.3,
163.0, 125.0, 3140.0, 13.6, 17.0,
121.0, 115.0, 2795.0, 15.7, 21.6,
163.0, 133.0, 3410.0, 15.8, 16.2,
89.0, 71.0, 1990.0, 14.9, 31.5,
98.0, 68.0, 2135.0, 16.6, 29.5,
231.0, 115.0, 3245.0, 15.4, 21.5,
200.0, 85.0, 2990.0, 18.2, 19.8,
140.0, 88.0, 2890.0, 17.3, 22.3,
232.0, 90.0, 3265.0, 18.2, 20.2,
225.0, 110.0, 3360.0, 16.6, 20.6,
305.0, 130.0, 3840.0, 15.4, 17.0,
302.0, 129.0, 3725.0, 13.4, 17.6,
351.0, 138.0, 3955.0, 13.2, 16.5,
318.0, 135.0, 3830.0, 15.2, 18.2,
350.0, 155.0, 4360.0, 14.9, 16.9,
351.0, 142.0, 4054.0, 14.3, 15.5,
267.0, 125.0, 3605.0, 15.0, 19.2,
360.0, 150.0, 3940.0, 13.0, 18.5,
89.0, 71.0, 1925.0, 14.0, 31.9,
86.0, 65.0, 1975.0, 15.2, 34.1,
98.0, 80.0, 1915.0, 14.4, 35.7,
121.0, 80.0, 2670.0, 15.0, 27.4,
183.0, 77.0, 3530.0, 20.1, 25.4,
350.0, 125.0, 3900.0, 17.4, 23.0,
141.0, 71.0, 3190.0, 24.8, 27.2,
260.0, 90.0, 3420.0, 22.2, 23.9,
105.0, 70.0, 2200.0, 13.2, 34.2,
105.0, 70.0, 2150.0, 14.9, 34.5,
85.0, 65.0, 2020.0, 19.2, 31.8,
91.0, 69.0, 2130.0, 14.7, 37.3,
151.0, 90.0, 2670.0, 16.0, 28.4,
173.0, 115.0, 2595.0, 11.3, 28.8,
173.0, 115.0, 2700.0, 12.9, 26.8,
151.0, 90.0, 2556.0, 13.2, 33.5,
98.0, 76.0, 2144.0, 14.7, 41.5,
89.0, 60.0, 1968.0, 18.8, 38.1,
98.0, 70.0, 2120.0, 15.5, 32.1,
86.0, 65.0, 2019.0, 16.4, 37.2,
151.0, 90.0, 2678.0, 16.5, 28.0,
140.0, 88.0, 2870.0, 18.1, 26.4,
151.0, 90.0, 3003.0, 20.1, 24.3,
225.0, 90.0, 3381.0, 18.7, 19.1,
97.0, 78.0, 2188.0, 15.8, 34.3,
134.0, 90.0, 2711.0, 15.5, 29.8,
120.0, 75.0, 2542.0, 17.5, 31.3,
119.0, 92.0, 2434.0, 15.0, 37.0,
108.0, 75.0, 2265.0, 15.2, 32.2,
86.0, 65.0, 2110.0, 17.9, 46.6,
156.0, 105.0, 2800.0, 14.4, 27.9,
85.0, 65.0, 2110.0, 19.2, 40.8,
90.0, 48.0, 2085.0, 21.7, 44.3,
90.0, 48.0, 2335.0, 23.7, 43.4,
121.0, 67.0, 2950.0, 19.9, 36.4,
146.0, 67.0, 3250.0, 21.8, 30.0,
91.0, 67.0, 1850.0, 13.8, 44.6,
97.0, 67.0, 2145.0, 18.0, 33.8,
89.0, 62.0, 1845.0, 15.3, 29.8,
168.0, 132.0, 2910.0, 11.4, 32.7,
70.0, 100.0, 2420.0, 12.5, 23.7,
122.0, 88.0, 2500.0, 15.1, 35.0,
107.0, 72.0, 2290.0, 17.0, 32.4,
135.0, 84.0, 2490.0, 15.7, 27.2,
151.0, 84.0, 2635.0, 16.4, 26.6,
156.0, 92.0, 2620.0, 14.4, 25.8,
173.0, 110.0, 2725.0, 12.6, 23.5,
135.0, 84.0, 2385.0, 12.9, 30.0,
79.0, 58.0, 1755.0, 16.9, 39.1,
86.0, 64.0, 1875.0, 16.4, 39.0,
81.0, 60.0, 1760.0, 16.1, 35.1,
97.0, 67.0, 2065.0, 17.8, 32.3,
85.0, 65.0, 1975.0, 19.4, 37.0,
89.0, 62.0, 2050.0, 17.3, 37.7,
91.0, 68.0, 1985.0, 16.0, 34.1,
105.0, 63.0, 2215.0, 14.9, 34.7,
98.0, 65.0, 2045.0, 16.2, 34.4,
98.0, 65.0, 2380.0, 20.7, 29.9,
105.0, 74.0, 2190.0, 14.2, 33.0,
107.0, 75.0, 2210.0, 14.4, 33.7,
108.0, 75.0, 2350.0, 16.8, 32.4,
119.0, 100.0, 2615.0, 14.8, 32.9,
120.0, 74.0, 2635.0, 18.3, 31.6,
141.0, 80.0, 3230.0, 20.4, 28.1,
145.0, 76.0, 3160.0, 19.6, 30.7,
168.0, 116.0, 2900.0, 12.6, 25.4,
146.0, 120.0, 2930.0, 13.8, 24.2,
231.0, 110.0, 3415.0, 15.8, 22.4,
350.0, 105.0, 3725.0, 19.0, 26.6,
200.0, 88.0, 3060.0, 17.1, 20.2,
225.0, 85.0, 3465.0, 16.6, 17.6,
112.0, 88.0, 2605.0, 19.6, 28.0,
112.0, 88.0, 2640.0, 18.6, 27.0,
112.0, 88.0, 2395.0, 18.0, 34.0,
112.0, 85.0, 2575.0, 16.2, 31.0,
135.0, 84.0, 2525.0, 16.0, 29.0,
151.0, 90.0, 2735.0, 18.0, 27.0,
140.0, 92.0, 2865.0, 16.4, 24.0,
105.0, 74.0, 1980.0, 15.3, 36.0,
91.0, 68.0, 2025.0, 18.2, 37.0,
91.0, 68.0, 1970.0, 17.6, 31.0,
105.0, 63.0, 2125.0, 14.7, 38.0,
98.0, 70.0, 2125.0, 17.3, 36.0,
120.0, 88.0, 2160.0, 14.5, 36.0,
107.0, 75.0, 2205.0, 14.5, 36.0,
108.0, 70.0, 2245.0, 16.9, 34.0,
91.0, 67.0, 1965.0, 15.0, 38.0,
91.0, 67.0, 1965.0, 15.7, 32.0,
91.0, 67.0, 1995.0, 16.2, 38.0,
181.0, 110.0, 2945.0, 16.4, 25.0,
262.0, 85.0, 3015.0, 17.0, 38.0,
156.0, 92.0, 2585.0, 14.5, 26.0,
232.0, 112.0, 2835.0, 14.7, 22.0,
144.0, 96.0, 2665.0, 13.9, 32.0,
135.0, 84.0, 2370.0, 13.0, 36.0,
151.0, 90.0, 2950.0, 17.3, 27.0,
140.0, 86.0, 2790.0, 15.6, 27.0,
97.0, 52.0, 2130.0, 24.6, 44.0,
135.0, 84.0, 2295.0, 11.6, 32.0,
120.0, 79.0, 2625.0, 18.6, 28.0,
119.0, 82.0, 2720.0, 19.4, 31.0,
})

165
stat/cca_example_test.go Normal file
View File

@@ -0,0 +1,165 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import (
"fmt"
"log"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
// symView is a helper for getting a View of a SymDense.
type symView struct {
sym *mat64.SymDense
i, j, r, c int
}
func (s symView) Dims() (r, c int) { return s.r, s.c }
func (s symView) At(i, j int) float64 {
if i < 0 || s.r <= i {
panic("i out of bounds")
}
if j < 0 || s.c <= j {
panic("j out of bounds")
}
return s.sym.At(s.i+i, s.j+j)
}
func (s symView) T() mat64.Matrix { return mat64.Transpose{s} }
func ExampleCC() {
// This example is directly analogous to Example 3.5 on page 87 of
// Koch, Inge. Analysis of multivariate and high-dimensional data.
// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
// bostonData is the Boston Housing Data of Harrison and Rubinfeld (1978)
n, _ := bostonData.Dims()
var xd, yd = 7, 4
// The variables (columns) of bostonData can be partitioned into two sets:
// those that deal with environmental/social variables (xdata), and those
// that contain information regarding the individual (ydata). Because the
// variables can be naturally partitioned in this way, these data are
// appropriate for canonical correlation analysis. The columns (variables)
// of xdata are, in order:
// per capita crime rate by town,
// proportion of non-retail business acres per town,
// nitric oxide concentration (parts per 10 million),
// weighted distances to Boston employment centres,
// index of accessibility to radial highways,
// pupil-teacher ratio by town, and
// proportion of blacks by town.
xdata := bostonData.Slice(0, n, 0, xd)
// The columns (variables) of ydata are, in order:
// average number of rooms per dwelling,
// proportion of owner-occupied units built prior to 1940,
// full-value property-tax rate per $10000, and
// median value of owner-occupied homes in $1000s.
ydata := bostonData.Slice(0, n, xd, xd+yd)
// For comparison, calculate the correlation matrix for the original data.
var cor mat64.SymDense
stat.CorrelationMatrix(&cor, bostonData, nil)
// Extract just those correlations that are between xdata and ydata.
var corRaw = symView{sym: &cor, i: 0, j: xd, r: xd, c: yd}
// Note that the strongest correlation between individual variables is 0.91
// between the 5th variable of xdata (index of accessibility to radial
// highways) and the 3rd variable of ydata (full-value property-tax rate per
// $10000).
fmt.Printf("corRaw = %.4f", mat64.Formatted(corRaw, mat64.Prefix(" ")))
// Calculate the canonical correlations.
var cc stat.CC
err := cc.CanonicalCorrelations(xdata, ydata, nil)
if err != nil {
log.Fatal(err)
}
// Unpack cc.
ccors := cc.Corrs(nil)
pVecs := cc.Left(nil, true)
qVecs := cc.Right(nil, true)
phiVs := cc.Left(nil, false)
psiVs := cc.Right(nil, false)
// Canonical Correlation Matrix, or the correlations between the sphered
// data.
var corSph mat64.Dense
corSph.Clone(pVecs)
col := make([]float64, xd)
for j := 0; j < yd; j++ {
mat64.Col(col, j, &corSph)
floats.Scale(ccors[j], col)
corSph.SetCol(j, col)
}
corSph.Product(&corSph, qVecs.T())
fmt.Printf("\n\ncorSph = %.4f", mat64.Formatted(&corSph, mat64.Prefix(" ")))
// Canonical Correlations. Note that the first canonical correlation is
// 0.95, stronger than the greatest correlation in the original data, and
// much stronger than the greatest correlation in the sphered data.
fmt.Printf("\n\nccors = %.4f", ccors)
// Left and right eigenvectors of the canonical correlation matrix.
fmt.Printf("\n\npVecs = %.4f", mat64.Formatted(pVecs, mat64.Prefix(" ")))
fmt.Printf("\n\nqVecs = %.4f", mat64.Formatted(qVecs, mat64.Prefix(" ")))
// Canonical Correlation Transforms. These can be useful as they represent
// the canonical variables as linear combinations of the original variables.
fmt.Printf("\n\nphiVs = %.4f", mat64.Formatted(phiVs, mat64.Prefix(" ")))
fmt.Printf("\n\npsiVs = %.4f", mat64.Formatted(psiVs, mat64.Prefix(" ")))
// Output:
// corRaw = ⎡-0.2192 0.3527 0.5828 -0.3883⎤
// ⎢-0.3917 0.6448 0.7208 -0.4837⎥
// ⎢-0.3022 0.7315 0.6680 -0.4273⎥
// ⎢ 0.2052 -0.7479 -0.5344 0.2499⎥
// ⎢-0.2098 0.4560 0.9102 -0.3816⎥
// ⎢-0.3555 0.2615 0.4609 -0.5078⎥
// ⎣ 0.1281 -0.2735 -0.4418 0.3335⎦
//
// corSph = ⎡ 0.0118 0.0525 0.2300 -0.1363⎤
// ⎢-0.1810 0.3213 0.3814 -0.1412⎥
// ⎢ 0.0166 0.2241 0.0104 -0.2235⎥
// ⎢ 0.0346 -0.5481 -0.0034 -0.1994⎥
// ⎢ 0.0303 -0.0956 0.7152 0.2039⎥
// ⎢-0.0298 -0.0022 0.0739 -0.3703⎥
// ⎣-0.1226 -0.0746 -0.3899 0.1541⎦
//
// ccors = [0.9451 0.6787 0.5714 0.2010]
//
// pVecs = ⎡-0.2574 0.0158 0.2122 -0.0946⎤
// ⎢-0.4837 0.3837 0.1474 0.6597⎥
// ⎢-0.0801 0.3494 0.3287 -0.2862⎥
// ⎢ 0.1278 -0.7337 0.4851 0.2248⎥
// ⎢-0.6969 -0.4342 -0.3603 0.0291⎥
// ⎢-0.0991 0.0503 0.6384 0.1022⎥
// ⎣ 0.4260 0.0323 -0.2290 0.6419⎦
//
// qVecs = ⎡ 0.0182 -0.1583 -0.0067 -0.9872⎤
// ⎢-0.2348 0.9483 -0.1462 -0.1554⎥
// ⎢-0.9701 -0.2406 -0.0252 0.0209⎥
// ⎣ 0.0593 -0.1330 -0.9889 0.0291⎦
//
// phiVs = ⎡-0.0027 0.0093 0.0490 -0.0155⎤
// ⎢-0.0429 -0.0242 0.0361 0.1839⎥
// ⎢-1.2248 5.6031 5.8094 -4.7927⎥
// ⎢-0.0044 -0.3424 0.4470 0.1150⎥
// ⎢-0.0742 -0.1193 -0.1116 0.0022⎥
// ⎢-0.0233 0.1046 0.3853 -0.0161⎥
// ⎣ 0.0001 0.0005 -0.0030 0.0082⎦
//
// psiVs = ⎡ 0.0302 -0.3002 0.0878 -1.9583⎤
// ⎢-0.0065 0.0392 -0.0118 -0.0061⎥
// ⎢-0.0052 -0.0046 -0.0023 0.0008⎥
// ⎣ 0.0020 0.0037 -0.1293 0.1038⎦
}

191
stat/cca_test.go Normal file
View File

@@ -0,0 +1,191 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import (
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
func TestCanonicalCorrelations(t *testing.T) {
tests:
for i, test := range []struct {
xdata mat64.Matrix
ydata mat64.Matrix
weights []float64
wantCorrs []float64
wantpVecs *mat64.Dense
wantqVecs *mat64.Dense
wantphiVs *mat64.Dense
wantpsiVs *mat64.Dense
epsilon float64
}{
// Test results verified using R.
{ // Truncated iris data, Sepal vs Petal measurements.
xdata: mat64.NewDense(10, 2, []float64{
5.1, 3.5,
4.9, 3.0,
4.7, 3.2,
4.6, 3.1,
5.0, 3.6,
5.4, 3.9,
4.6, 3.4,
5.0, 3.4,
4.4, 2.9,
4.9, 3.1,
}),
ydata: mat64.NewDense(10, 2, []float64{
1.4, 0.2,
1.4, 0.2,
1.3, 0.2,
1.5, 0.2,
1.4, 0.2,
1.7, 0.4,
1.4, 0.3,
1.5, 0.2,
1.4, 0.2,
1.5, 0.1,
}),
wantCorrs: []float64{0.7250624174504773, 0.5547679185730191},
wantpVecs: mat64.NewDense(2, 2, []float64{
0.0765914610875867, 0.9970625597666721,
0.9970625597666721, -0.0765914610875868,
}),
wantqVecs: mat64.NewDense(2, 2, []float64{
0.3075184850910837, 0.9515421069649439,
0.9515421069649439, -0.3075184850910837,
}),
wantphiVs: mat64.NewDense(2, 2, []float64{
-1.9794877596804641, 5.2016325219025124,
4.5211829944066553, -2.7263663170835697,
}),
wantpsiVs: mat64.NewDense(2, 2, []float64{
-0.0613084818030103, 10.8514169865438941,
12.7209032660734298, -7.6793888180353775,
}),
epsilon: 1e-12,
},
// Test results compared to those results presented in examples by
// Koch, Inge. Analysis of multivariate and high-dimensional data.
// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
{ // ASA Car Exposition Data of Ramos and Donoho (1983)
// Displacement, Horsepower, Weight
xdata: carData.Slice(0, 392, 0, 3),
// Acceleration, MPG
ydata: carData.Slice(0, 392, 3, 5),
wantCorrs: []float64{0.8782187384352336, 0.6328187219216761},
wantpVecs: mat64.NewDense(3, 2, []float64{
0.3218296374829181, 0.3947540257657075,
0.4162807660635797, 0.7573719053303306,
0.8503740401982725, -0.5201509936144236,
}),
wantqVecs: mat64.NewDense(2, 2, []float64{
-0.5161984172278830, -0.8564690269072364,
-0.8564690269072364, 0.5161984172278830,
}),
wantphiVs: mat64.NewDense(3, 2, []float64{
0.0025033152994308, 0.0047795464118615,
0.0201923608080173, 0.0409150208725958,
-0.0000247374128745, -0.0026766435161875,
}),
wantpsiVs: mat64.NewDense(2, 2, []float64{
-0.1666196759760772, -0.3637393866139658,
-0.0915512109649727, 0.1077863777929168,
}),
epsilon: 1e-12,
},
// Test results compared to those results presented in examples by
// Koch, Inge. Analysis of multivariate and high-dimensional data.
// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
{ // Boston Housing Data of Harrison and Rubinfeld (1978)
// Per capita crime rate by town,
// Proportion of non-retail business acres per town,
// Nitric oxide concentration (parts per 10 million),
// Weighted distances to Boston employment centres,
// Index of accessibility to radial highways,
// Pupil-teacher ratio by town, Proportion of blacks by town
xdata: bostonData.Slice(0, 506, 0, 7),
// Average number of rooms per dwelling,
// Proportion of owner-occupied units built prior to 1940,
// Full-value property-tax rate per $10000,
// Median value of owner-occupied homes in $1000s
ydata: bostonData.Slice(0, 506, 7, 11),
wantCorrs: []float64{0.9451239443886021, 0.6786622733370654, 0.5714338361583764, 0.2009739704710440},
wantpVecs: mat64.NewDense(7, 4, []float64{
-0.2574391924541903, 0.0158477516621194, 0.2122169934631024, -0.0945733803894706,
-0.4836594430018478, 0.3837101908138468, 0.1474448317415911, 0.6597324886718275,
-0.0800776365873296, 0.3493556742809252, 0.3287336458109373, -0.2862040444334655,
0.1277586360386374, -0.7337427663667596, 0.4851134819037011, 0.2247964865970192,
-0.6969432006136684, -0.4341748776002893, -0.3602872887636357, 0.0290661608626292,
-0.0990903250057199, 0.0503411215453873, 0.6384330631742202, 0.1022367136218303,
0.4260459963765036, 0.0323334351308141, -0.2289527516030810, 0.6419232947608805,
}),
wantqVecs: mat64.NewDense(4, 4, []float64{
0.0181660502363264, -0.1583489460479038, -0.0066723577642883, -0.9871935400650649,
-0.2347699045986119, 0.9483314614936594, -0.1462420505631345, -0.1554470767919033,
-0.9700704038477141, -0.2406071741000039, -0.0251838984227037, 0.0209134074358349,
0.0593000682318482, -0.1330460003097728, -0.9889057151969489, 0.0291161494720761,
}),
wantphiVs: mat64.NewDense(7, 4, []float64{
-0.0027462234108197, 0.0093444513500898, 0.0489643932714296, -0.0154967189805819,
-0.0428564455279537, -0.0241708702119420, 0.0360723472093996, 0.1838983230588095,
-1.2248435648802380, 5.6030921364723980, 5.8094144583797025, -4.7926812190419676,
-0.0043684825094649, -0.3424101164977618, 0.4469961215717917, 0.1150161814353696,
-0.0741534069521954, -0.1193135794923700, -0.1115518305471460, 0.0021638758323088,
-0.0233270323101624, 0.1046330818178399, 0.3853045975077387, -0.0160927870102877,
0.0001293051387859, 0.0004540746921446, -0.0030296315865440, 0.0081895477974654,
}),
wantpsiVs: mat64.NewDense(4, 4, []float64{
0.0301593362017375, -0.3002219289647127, 0.0878217377593682, -1.9583226531517062,
-0.0065483104073892, 0.0392212086716247, -0.0117570776209991, -0.0061113064481860,
-0.0052075523350125, -0.0045770200452960, -0.0022762313289592, 0.0008441873006821,
0.0020111735096327, 0.0037352799829930, -0.1292578071621794, 0.1037709056329765,
}),
epsilon: 1e-12,
},
} {
var cc stat.CC
var corrs []float64
var pVecs, qVecs *mat64.Dense
var phiVs, psiVs *mat64.Dense
for j := 0; j < 2; j++ {
err := cc.CanonicalCorrelations(test.xdata, test.ydata, test.weights)
if err != nil {
t.Errorf("%d use %d: unexpected error: %v", i, j, err)
continue tests
}
corrs = cc.Corrs(corrs)
pVecs = cc.Left(pVecs, true)
qVecs = cc.Right(qVecs, true)
phiVs = cc.Left(phiVs, false)
psiVs = cc.Right(psiVs, false)
if !floats.EqualApprox(corrs, test.wantCorrs, test.epsilon) {
t.Errorf("%d use %d: unexpected variance result got:%v, want:%v",
i, j, corrs, test.wantCorrs)
}
if !mat64.EqualApprox(pVecs, test.wantpVecs, test.epsilon) {
t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v",
i, j, mat64.Formatted(pVecs), mat64.Formatted(test.wantpVecs))
}
if !mat64.EqualApprox(qVecs, test.wantqVecs, test.epsilon) {
t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v",
i, j, mat64.Formatted(qVecs), mat64.Formatted(test.wantqVecs))
}
if !mat64.EqualApprox(phiVs, test.wantphiVs, test.epsilon) {
t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v",
i, j, mat64.Formatted(phiVs), mat64.Formatted(test.wantphiVs))
}
if !mat64.EqualApprox(psiVs, test.wantpsiVs, test.epsilon) {
t.Errorf("%d use %d: unexpected CCA result got:\n%v\nwant:\n%v",
i, j, mat64.Formatted(psiVs), mat64.Formatted(test.wantpsiVs))
}
}
}
}

183
stat/combin/combin.go Normal file
View File

@@ -0,0 +1,183 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package combin implements routines involving combinatorics (permutations,
// combinations, etc.).
package combin
import "math"
const (
badNegInput = "combin: negative input"
badSetSize = "combin: n < k"
badInput = "combin: wrong input slice length"
)
// Binomial returns the binomial coefficient of (n,k), also commonly referred to
// as "n choose k".
//
// The binomial coefficient, C(n,k), is the number of unordered combinations of
// k elements in a set that is n elements big, and is defined as
//
// C(n,k) = n!/((n-k)!k!)
//
// n and k must be non-negative with n >= k, otherwise Binomial will panic.
// No check is made for overflow.
func Binomial(n, k int) int {
if n < 0 || k < 0 {
panic(badNegInput)
}
if n < k {
panic(badSetSize)
}
// (n,k) = (n, n-k)
if k > n/2 {
k = n - k
}
b := 1
for i := 1; i <= k; i++ {
b = (n - k + i) * b / i
}
return b
}
// GeneralizedBinomial returns the generalized binomial coefficient of (n, k),
// defined as
// Γ(n+1) / (Γ(k+1) Γ(n-k+1))
// where Γ is the Gamma function. GeneralizedBinomial is useful for continuous
// relaxations of the binomial coefficient, or when the binomial coefficient value
// may overflow int. In the latter case, one may use math/big for an exact
// computation.
//
// n and k must be non-negative with n >= k, otherwise GeneralizedBinomial will panic.
func GeneralizedBinomial(n, k float64) float64 {
return math.Exp(LogGeneralizedBinomial(n, k))
}
// LogGeneralizedBinomial returns the log of the generalized binomial coefficient.
// See GeneralizedBinomial for more information.
func LogGeneralizedBinomial(n, k float64) float64 {
if n < 0 || k < 0 {
panic(badNegInput)
}
if n < k {
panic(badSetSize)
}
a, _ := math.Lgamma(n + 1)
b, _ := math.Lgamma(k + 1)
c, _ := math.Lgamma(n - k + 1)
return a - b - c
}
// CombinationGenerator generates combinations iteratively. Combinations may be
// called to generate all combinations collectively.
type CombinationGenerator struct {
n int
k int
previous []int
remaining int
}
// NewCombinationGenerator returns a CombinationGenerator for generating the
// combinations of k elements from a set of size n.
//
// n and k must be non-negative with n >= k, otherwise NewCombinationGenerator
// will panic.
func NewCombinationGenerator(n, k int) *CombinationGenerator {
return &CombinationGenerator{
n: n,
k: k,
remaining: Binomial(n, k),
}
}
// Next advances the iterator if there are combinations remaining to be generated,
// and returns false if all combinations have been generated. Next must be called
// to initialize the first value before calling Combination or Combination will
// panic. The value returned by Combination is only changed during calls to Next.
func (c *CombinationGenerator) Next() bool {
if c.remaining <= 0 {
// Next is called before combination, so c.remaining is set to zero before
// Combination is called. Thus, Combination cannot panic on zero, and a
// second sentinel value is needed.
c.remaining = -1
return false
}
if c.previous == nil {
c.previous = make([]int, c.k)
for i := range c.previous {
c.previous[i] = i
}
} else {
nextCombination(c.previous, c.n, c.k)
}
c.remaining--
return true
}
// Combination generates the next combination. If next is non-nil, it must have
// length k and the result will be stored in-place into combination. If combination
// is nil a new slice will be allocated and returned. If all of the combinations
// have already been constructed (Next() returns false), Combination will panic.
//
// Next must be called to initialize the first value before calling Combination
// or Combination will panic. The value returned by Combination is only changed
// during calls to Next.
func (c *CombinationGenerator) Combination(combination []int) []int {
if c.remaining == -1 {
panic("combin: all combinations have been generated")
}
if c.previous == nil {
panic("combin: Combination called before Next")
}
if combination == nil {
combination = make([]int, c.k)
}
if len(combination) != c.k {
panic(badInput)
}
copy(combination, c.previous)
return combination
}
// Combinations generates all of the combinations of k elements from a
// set of size n. The returned slice has length Binomial(n,k) and each inner slice
// has length k.
//
// n and k must be non-negative with n >= k, otherwise Combinations will panic.
//
// CombinationGenerator may alternatively be used to generate the combinations
// iteratively instead of collectively.
func Combinations(n, k int) [][]int {
combins := Binomial(n, k)
data := make([][]int, combins)
if len(data) == 0 {
return data
}
data[0] = make([]int, k)
for i := range data[0] {
data[0][i] = i
}
for i := 1; i < combins; i++ {
next := make([]int, k)
copy(next, data[i-1])
nextCombination(next, n, k)
data[i] = next
}
return data
}
// nextCombination generates the combination after s, overwriting the input value.
func nextCombination(s []int, n, k int) {
for j := k - 1; j >= 0; j-- {
if s[j] == n+j-k {
continue
}
s[j]++
for l := j + 1; l < k; l++ {
s[l] = s[j] + l - j
}
break
}
}

181
stat/combin/combin_test.go Normal file
View File

@@ -0,0 +1,181 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package combin
import (
"math/big"
"testing"
"github.com/gonum/floats"
)
// intSosMatch returns true if the two slices of slices are equal.
func intSosMatch(a, b [][]int) bool {
if len(a) != len(b) {
return false
}
for i, s := range a {
if len(s) != len(b[i]) {
return false
}
for j, v := range s {
if v != b[i][j] {
return false
}
}
}
return true
}
var binomialTests = []struct {
n, k, ans int
}{
{0, 0, 1},
{5, 0, 1},
{5, 1, 5},
{5, 2, 10},
{5, 3, 10},
{5, 4, 5},
{5, 5, 1},
{6, 0, 1},
{6, 1, 6},
{6, 2, 15},
{6, 3, 20},
{6, 4, 15},
{6, 5, 6},
{6, 6, 1},
{20, 0, 1},
{20, 1, 20},
{20, 2, 190},
{20, 3, 1140},
{20, 4, 4845},
{20, 5, 15504},
{20, 6, 38760},
{20, 7, 77520},
{20, 8, 125970},
{20, 9, 167960},
{20, 10, 184756},
{20, 11, 167960},
{20, 12, 125970},
{20, 13, 77520},
{20, 14, 38760},
{20, 15, 15504},
{20, 16, 4845},
{20, 17, 1140},
{20, 18, 190},
{20, 19, 20},
{20, 20, 1},
}
func TestBinomial(t *testing.T) {
for cas, test := range binomialTests {
ans := Binomial(test.n, test.k)
if ans != test.ans {
t.Errorf("Case %v: Binomial mismatch. Got %v, want %v.", cas, ans, test.ans)
}
}
var (
n = 61
want big.Int
got big.Int
)
for k := 0; k <= n; k++ {
want.Binomial(int64(n), int64(k))
got.SetInt64(int64(Binomial(n, k)))
if want.Cmp(&got) != 0 {
t.Errorf("Case n=%v,k=%v: Binomial mismatch for large n. Got %v, want %v.", n, k, got, want)
}
}
}
func TestGeneralizedBinomial(t *testing.T) {
for cas, test := range binomialTests {
ans := GeneralizedBinomial(float64(test.n), float64(test.k))
if !floats.EqualWithinAbsOrRel(ans, float64(test.ans), 1e-14, 1e-14) {
t.Errorf("Case %v: Binomial mismatch. Got %v, want %v.", cas, ans, test.ans)
}
}
}
func TestCombinations(t *testing.T) {
for cas, test := range []struct {
n, k int
data [][]int
}{
{
n: 1,
k: 1,
data: [][]int{{0}},
},
{
n: 2,
k: 1,
data: [][]int{{0}, {1}},
},
{
n: 2,
k: 2,
data: [][]int{{0, 1}},
},
{
n: 3,
k: 1,
data: [][]int{{0}, {1}, {2}},
},
{
n: 3,
k: 2,
data: [][]int{{0, 1}, {0, 2}, {1, 2}},
},
{
n: 3,
k: 3,
data: [][]int{{0, 1, 2}},
},
{
n: 4,
k: 1,
data: [][]int{{0}, {1}, {2}, {3}},
},
{
n: 4,
k: 2,
data: [][]int{{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}},
},
{
n: 4,
k: 3,
data: [][]int{{0, 1, 2}, {0, 1, 3}, {0, 2, 3}, {1, 2, 3}},
},
{
n: 4,
k: 4,
data: [][]int{{0, 1, 2, 3}},
},
} {
data := Combinations(test.n, test.k)
if !intSosMatch(data, test.data) {
t.Errorf("Cas %v: Generated combinations mismatch. Got %v, want %v.", cas, data, test.data)
}
}
}
func TestCombinationGenerator(t *testing.T) {
for n := 0; n <= 10; n++ {
for k := 1; k <= n; k++ {
combinations := Combinations(n, k)
cg := NewCombinationGenerator(n, k)
genCombs := make([][]int, 0, len(combinations))
for cg.Next() {
genCombs = append(genCombs, cg.Combination(nil))
}
if !intSosMatch(combinations, genCombs) {
t.Errorf("Combinations and generated combinations do not match. n = %v, k = %v", n, k)
}
}
}
}

8
stat/distmat/general.go Normal file
View File

@@ -0,0 +1,8 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package distmat provides probability distributions over matrices.
package distmat
var badDim = "distmat: dimension mismatch"

210
stat/distmat/wishart.go Normal file
View File

@@ -0,0 +1,210 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmat
import (
"math"
"math/rand"
"sync"
"github.com/gonum/mathext"
"github.com/gonum/matrix"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat/distuv"
)
// Wishart is a distribution over d×d positive symmetric definite matrices. It
// is parametrized by a scalar degrees of freedom parameter ν and a d×d positive
// definite matrix V.
//
// The Wishart PDF is given by
// p(X) = [|X|^((ν-d-1)/2) * exp(-tr(V^-1 * X)/2)] / [2^(ν*d/2) * |V|^(ν/2) * Γ_d(ν/2)]
// where X is a d×d PSD matrix, ν > d-1, |·| denotes the determinant, tr is the
// trace and Γ_d is the multivariate gamma function.
//
// See https://en.wikipedia.org/wiki/Wishart_distribution for more information.
type Wishart struct {
nu float64
src *rand.Rand
dim int
cholv mat64.Cholesky
logdetv float64
upper mat64.TriDense
once sync.Once
v *mat64.SymDense // only stored if needed
}
// NewWishart returns a new Wishart distribution with the given shape matrix and
// degrees of freedom parameter. NewWishart returns whether the creation was
// successful.
//
// NewWishart panics if nu <= d - 1 where d is the order of v.
func NewWishart(v mat64.Symmetric, nu float64, src *rand.Rand) (*Wishart, bool) {
dim := v.Symmetric()
if nu <= float64(dim-1) {
panic("wishart: nu must be greater than dim-1")
}
var chol mat64.Cholesky
ok := chol.Factorize(v)
if !ok {
return nil, false
}
var u mat64.TriDense
u.UFromCholesky(&chol)
w := &Wishart{
nu: nu,
src: src,
dim: dim,
cholv: chol,
logdetv: chol.LogDet(),
upper: u,
}
return w, true
}
// MeanSym returns the mean matrix of the distribution as a symmetric matrix.
// If x is nil, a new matrix is allocated and returned. If x is not nil, the
// result is stored in-place into x and MeanSym will panic if the order of x
// is not equal to the order of the receiver.
func (w *Wishart) MeanSym(x *mat64.SymDense) *mat64.SymDense {
if x == nil {
x = mat64.NewSymDense(w.dim, nil)
}
d := x.Symmetric()
if d != w.dim {
panic(badDim)
}
w.setV()
x.CopySym(w.v)
x.ScaleSym(w.nu, x)
return x
}
// ProbSym returns the probability of the symmetric matrix x. If x is not positive
// definite (the Cholesky decomposition fails), it has 0 probability.
func (w *Wishart) ProbSym(x mat64.Symmetric) float64 {
return math.Exp(w.LogProbSym(x))
}
// LogProbSym returns the log of the probability of the input symmetric matrix.
//
// LogProbSym returns -∞ if the input matrix is not positive definite (the Cholesky
// decomposition fails).
func (w *Wishart) LogProbSym(x mat64.Symmetric) float64 {
dim := x.Symmetric()
if dim != w.dim {
panic(badDim)
}
var chol mat64.Cholesky
ok := chol.Factorize(x)
if !ok {
return math.Inf(-1)
}
return w.logProbSymChol(&chol)
}
// LogProbSymChol returns the log of the probability of the input symmetric matrix
// given its Cholesky decomposition.
func (w *Wishart) LogProbSymChol(cholX *mat64.Cholesky) float64 {
dim := cholX.Size()
if dim != w.dim {
panic(badDim)
}
return w.logProbSymChol(cholX)
}
func (w *Wishart) logProbSymChol(cholX *mat64.Cholesky) float64 {
// The PDF is
// p(X) = [|X|^((ν-d-1)/2) * exp(-tr(V^-1 * X)/2)] / [2^(ν*d/2) * |V|^(ν/2) * Γ_d(ν/2)]
// The LogPDF is thus
// (ν-d-1)/2 * log(|X|) - tr(V^-1 * X)/2 - (ν*d/2)*log(2) - ν/2 * log(|V|) - log(Γ_d(ν/2))
logdetx := cholX.LogDet()
// Compute tr(V^-1 * X), using the fact that X = U^T * U.
var u mat64.TriDense
u.UFromCholesky(cholX)
var vinvx mat64.Dense
err := vinvx.SolveCholesky(&w.cholv, u.T())
if err != nil {
return math.Inf(-1)
}
vinvx.Mul(&vinvx, &u)
tr := mat64.Trace(&vinvx)
fnu := float64(w.nu)
fdim := float64(w.dim)
return 0.5*((fnu-fdim-1)*logdetx-tr-fnu*fdim*math.Ln2-fnu*w.logdetv) - mathext.MvLgamma(0.5*fnu, w.dim)
}
// RandSym generates a random symmetric matrix from the distribution.
func (w *Wishart) RandSym(x *mat64.SymDense) *mat64.SymDense {
if x == nil {
x = &mat64.SymDense{}
}
var c mat64.Cholesky
w.RandChol(&c)
x.FromCholesky(&c)
return x
}
// RandChol generates the Cholesky decomposition of a random matrix from the distribution.
func (w *Wishart) RandChol(c *mat64.Cholesky) *mat64.Cholesky {
// TODO(btracey): Modify the code if the underlying data from c is exposed
// to avoid the dim^2 allocation here.
// Use the Bartlett Decomposition, which says that
// X ~ L A A^T L^T
// Where A is a lower triangular matrix in which the diagonal of A is
// generated from the square roots of χ^2 random variables, and the
// off-diagonals are generated from standard normal variables.
// The above gives the cholesky decomposition of X, where L_x = L A.
//
// mat64 works with the upper triagular decomposition, so we would like to do
// the same. We can instead say that
// U_x = L_x^T = (L * A)^T = A^T * L^T = A^T * U
// Instead, generate A^T, by using the procedure above, except as an upper
// triangular matrix.
norm := distuv.Normal{
Mu: 0,
Sigma: 1,
Source: w.src,
}
t := mat64.NewTriDense(w.dim, matrix.Upper, nil)
for i := 0; i < w.dim; i++ {
v := distuv.ChiSquared{
K: w.nu - float64(i),
Src: w.src,
}.Rand()
t.SetTri(i, i, math.Sqrt(v))
}
for i := 0; i < w.dim; i++ {
for j := i + 1; j < w.dim; j++ {
t.SetTri(i, j, norm.Rand())
}
}
t.MulTri(t, &w.upper)
if c == nil {
c = &mat64.Cholesky{}
}
c.SetFromU(t)
return c
}
// setV computes and stores the covariance matrix of the distribution.
func (w *Wishart) setV() {
w.once.Do(func() {
w.v = mat64.NewSymDense(w.dim, nil)
w.v.FromCholesky(&w.cholv)
})
}

View File

@@ -0,0 +1,129 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmat
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
func TestWishart(t *testing.T) {
for c, test := range []struct {
v *mat64.SymDense
nu float64
xs []*mat64.SymDense
lps []float64
}{
// Logprob data compared with scipy.
{
v: mat64.NewSymDense(2, []float64{1, 0, 0, 1}),
nu: 4,
xs: []*mat64.SymDense{
mat64.NewSymDense(2, []float64{0.9, 0.1, 0.1, 0.9}),
},
lps: []float64{-4.2357432031863409},
},
{
v: mat64.NewSymDense(2, []float64{0.8, -0.2, -0.2, 0.7}),
nu: 5,
xs: []*mat64.SymDense{
mat64.NewSymDense(2, []float64{0.9, 0.1, 0.1, 0.9}),
mat64.NewSymDense(2, []float64{0.3, -0.1, -0.1, 0.7}),
},
lps: []float64{-4.2476495605333575, -4.9993285370378633},
},
{
v: mat64.NewSymDense(3, []float64{0.8, 0.3, 0.1, 0.3, 0.7, -0.1, 0.1, -0.1, 7}),
nu: 5,
xs: []*mat64.SymDense{
mat64.NewSymDense(3, []float64{1, 0.2, -0.3, 0.2, 0.6, -0.2, -0.3, -0.2, 6}),
},
lps: []float64{-11.010982249229421},
},
} {
w, ok := NewWishart(test.v, test.nu, nil)
if !ok {
panic("bad test")
}
for i, x := range test.xs {
lp := w.LogProbSym(x)
var chol mat64.Cholesky
ok := chol.Factorize(x)
if !ok {
panic("bad test")
}
lpc := w.LogProbSymChol(&chol)
if math.Abs(lp-lpc) > 1e-14 {
t.Errorf("Case %d, test %d: probability mismatch between chol and not", c, i)
}
if !floats.EqualWithinAbsOrRel(lp, test.lps[i], 1e-14, 1e-14) {
t.Errorf("Case %d, test %d: got %v, want %v", c, i, lp, test.lps[i])
}
}
ch := w.RandChol(nil)
w.RandChol(ch)
s := w.RandSym(nil)
w.RandSym(s)
}
}
func TestWishartRand(t *testing.T) {
for c, test := range []struct {
v *mat64.SymDense
nu float64
samples int
tol float64
}{
{
v: mat64.NewSymDense(2, []float64{0.8, -0.2, -0.2, 0.7}),
nu: 5,
samples: 30000,
tol: 3e-2,
},
{
v: mat64.NewSymDense(3, []float64{0.8, 0.3, 0.1, 0.3, 0.7, -0.1, 0.1, -0.1, 7}),
nu: 5,
samples: 300000,
tol: 3e-2,
},
{
v: mat64.NewSymDense(4, []float64{
0.8, 0.3, 0.1, -0.2,
0.3, 0.7, -0.1, 0.4,
0.1, -0.1, 7, 1,
-0.2, -0.1, 1, 6}),
nu: 6,
samples: 300000,
tol: 3e-2,
},
} {
rnd := rand.New(rand.NewSource(1))
dim := test.v.Symmetric()
w, ok := NewWishart(test.v, test.nu, rnd)
if !ok {
panic("bad test")
}
mean := mat64.NewSymDense(dim, nil)
x := mat64.NewSymDense(dim, nil)
for i := 0; i < test.samples; i++ {
w.RandSym(x)
x.ScaleSym(1/float64(test.samples), x)
mean.AddSym(mean, x)
}
trueMean := w.MeanSym(nil)
if !mat64.EqualApprox(trueMean, mean, test.tol) {
t.Errorf("Case %d: Mismatch between estimated and true mean. Got\n%0.4v\nWant\n%0.4v\n", c, mat64.Formatted(mean), mat64.Formatted(trueMean))
}
}
}

144
stat/distmv/dirichlet.go Normal file
View File

@@ -0,0 +1,144 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat/distuv"
)
// Dirichlet implements the Dirichlet probability distribution.
//
// The Dirichlet distribution is a continuous probability distribution that
// generates elements over the probability simplex, i.e. ||x||_1 = 1. The Dirichlet
// distribution is the conjugate prior to the categorical distribution and the
// multivariate version of the beta distribution. The probability of a point x is
// 1/Beta(α) \prod_i x_i^(α_i - 1)
// where Beta(α) is the multivariate Beta function (see the mathext package).
//
// For more information see https://en.wikipedia.org/wiki/Dirichlet_distribution
type Dirichlet struct {
alpha []float64
dim int
src *rand.Rand
lbeta float64
sumAlpha float64
}
// NewDirichlet creates a new dirichlet distribution with the given parameters alpha.
// NewDirichlet will panic if len(alpha) == 0, or if any alpha is <= 0.
func NewDirichlet(alpha []float64, src *rand.Rand) *Dirichlet {
dim := len(alpha)
if dim == 0 {
panic(badZeroDimension)
}
for _, v := range alpha {
if v <= 0 {
panic("dirichlet: non-positive alpha")
}
}
a := make([]float64, len(alpha))
copy(a, alpha)
d := &Dirichlet{
alpha: a,
dim: dim,
src: src,
}
d.lbeta, d.sumAlpha = d.genLBeta(a)
return d
}
// CovarianceMatrix returns the covariance matrix of the distribution. Upon
// return, the value at element {i, j} of the covariance matrix is equal to
// the covariance of the i^th and j^th variables.
// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])]
// If the input matrix is nil a new matrix is allocated, otherwise the result
// is stored in-place into the input.
func (d *Dirichlet) CovarianceMatrix(cov *mat64.SymDense) *mat64.SymDense {
if cov == nil {
cov = mat64.NewSymDense(d.Dim(), nil)
} else if cov.Symmetric() == 0 {
*cov = *(cov.GrowSquare(d.dim).(*mat64.SymDense))
} else if cov.Symmetric() != d.dim {
panic("normal: input matrix size mismatch")
}
scale := 1 / (d.sumAlpha * d.sumAlpha * (d.sumAlpha + 1))
for i := 0; i < d.dim; i++ {
ai := d.alpha[i]
v := ai * (d.sumAlpha - ai) * scale
cov.SetSym(i, i, v)
for j := i + 1; j < d.dim; j++ {
aj := d.alpha[j]
v := -ai * aj * scale
cov.SetSym(i, j, v)
}
}
return cov
}
// genLBeta computes the generalized LBeta function.
func (d *Dirichlet) genLBeta(alpha []float64) (lbeta, sumAlpha float64) {
for _, alpha := range d.alpha {
lg, _ := math.Lgamma(alpha)
lbeta += lg
sumAlpha += alpha
}
lg, _ := math.Lgamma(sumAlpha)
return lbeta - lg, sumAlpha
}
// Dim returns the dimension of the distribution.
func (d *Dirichlet) Dim() int {
return d.dim
}
// LogProb computes the log of the pdf of the point x.
//
// It does not check that ||x||_1 = 1.
func (d *Dirichlet) LogProb(x []float64) float64 {
dim := d.dim
if len(x) != dim {
panic(badSizeMismatch)
}
var lprob float64
for i, x := range x {
lprob += (d.alpha[i] - 1) * math.Log(x)
}
lprob -= d.lbeta
return lprob
}
// Mean returns the mean of the probability distribution at x. If the
// input argument is nil, a new slice will be allocated, otherwise the result
// will be put in-place into the receiver.
func (d *Dirichlet) Mean(x []float64) []float64 {
x = reuseAs(x, d.dim)
copy(x, d.alpha)
floats.Scale(1/d.sumAlpha, x)
return x
}
// Prob computes the value of the probability density function at x.
func (d *Dirichlet) Prob(x []float64) float64 {
return math.Exp(d.LogProb(x))
}
// Rand generates a random number according to the distributon.
// If the input slice is nil, new memory is allocated, otherwise the result is stored
// in place.
func (d *Dirichlet) Rand(x []float64) []float64 {
x = reuseAs(x, d.dim)
for i := range x {
x[i] = distuv.Gamma{Alpha: d.alpha[i], Beta: 1, Source: d.src}.Rand()
}
sum := floats.Sum(x)
floats.Scale(1/sum, x)
return x
}

View File

@@ -0,0 +1,72 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"testing"
"github.com/gonum/matrix/mat64"
)
func TestDirichlet(t *testing.T) {
// Data from Scipy.
for cas, test := range []struct {
Dir *Dirichlet
x []float64
prob float64
}{
{
NewDirichlet([]float64{1, 1, 1}, nil),
[]float64{0.2, 0.3, 0.5},
2.0,
},
{
NewDirichlet([]float64{0.6, 10, 8.7}, nil),
[]float64{0.2, 0.3, 0.5},
0.24079612737071665,
},
} {
p := test.Dir.Prob(test.x)
if math.Abs(p-test.prob) > 1e-14 {
t.Errorf("Probablility mismatch. Case %v. Got %v, want %v", cas, p, test.prob)
}
}
rnd := rand.New(rand.NewSource(1))
for cas, test := range []struct {
Dir *Dirichlet
N int
}{
{
NewDirichlet([]float64{1, 1, 1}, rnd),
1e6,
},
{
NewDirichlet([]float64{2, 3}, rnd),
1e6,
},
{
NewDirichlet([]float64{0.2, 0.3}, rnd),
1e6,
},
{
NewDirichlet([]float64{0.2, 4}, rnd),
1e6,
},
{
NewDirichlet([]float64{0.1, 4, 20}, rnd),
1e6,
},
} {
d := test.Dir
dim := d.Dim()
x := mat64.NewDense(test.N, dim, nil)
generateSamples(x, d)
checkMean(t, cas, x, d, 1e-3)
checkCov(t, cas, x, d, 1e-3)
}
}

31
stat/distmv/general.go Normal file
View File

@@ -0,0 +1,31 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package distmv provides multivariate random distribution types.
package distmv
var (
badQuantile = "distmv: quantile not between 0 and 1"
badReceiver = "distmv: input slice is not nil or the correct length"
badSizeMismatch = "distmv: size mismatch"
badZeroDimension = "distmv: zero dimensional input"
nonPosDimension = "distmv: non-positive dimension input"
)
const logTwoPi = 1.8378770664093454835606594728112352797227949472755668
// useAs gets a slice of size n. If len(x) == n, x is returned, if len(x) == 0
// then a slice is returned of length n.
func reuseAs(x []float64, n int) []float64 {
if len(x) == n {
return x
}
if len(x) == 0 {
if cap(x) >= n {
return x[:n]
}
return make([]float64, n)
}
panic(badReceiver)
}

View File

@@ -0,0 +1,96 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
type prober interface {
Prob(x []float64) float64
LogProb(x []float64) float64
}
type probCase struct {
dist prober
loc []float64
logProb float64
}
func testProbability(t *testing.T, cases []probCase) {
for _, test := range cases {
logProb := test.dist.LogProb(test.loc)
if math.Abs(logProb-test.logProb) > 1e-14 {
t.Errorf("LogProb mismatch: want: %v, got: %v", test.logProb, logProb)
}
prob := test.dist.Prob(test.loc)
if math.Abs(prob-math.Exp(test.logProb)) > 1e-14 {
t.Errorf("Prob mismatch: want: %v, got: %v", math.Exp(test.logProb), prob)
}
}
}
func generateSamples(x *mat64.Dense, r Rander) {
n, _ := x.Dims()
for i := 0; i < n; i++ {
r.Rand(x.RawRowView(i))
}
}
type Meaner interface {
Mean([]float64) []float64
}
func checkMean(t *testing.T, cas int, x *mat64.Dense, m Meaner, tol float64) {
mean := m.Mean(nil)
// Check that the answer is identical when using nil or non-nil.
mean2 := make([]float64, len(mean))
m.Mean(mean2)
if !floats.Equal(mean, mean2) {
t.Errorf("Mean mismatch when providing nil and slice. Case %v", cas)
}
// Check that the mean matches the samples.
r, _ := x.Dims()
col := make([]float64, r)
meanEst := make([]float64, len(mean))
for i := range meanEst {
meanEst[i] = stat.Mean(mat64.Col(col, i, x), nil)
}
if !floats.EqualApprox(mean, meanEst, tol) {
t.Errorf("Returned mean and sample mean mismatch. Case %v. Empirical %v, returned %v", cas, meanEst, mean)
}
}
type Cover interface {
CovarianceMatrix(*mat64.SymDense) *mat64.SymDense
}
func checkCov(t *testing.T, cas int, x *mat64.Dense, c Cover, tol float64) {
cov := c.CovarianceMatrix(nil)
n := cov.Symmetric()
cov2 := mat64.NewSymDense(n, nil)
c.CovarianceMatrix(cov2)
if !mat64.Equal(cov, cov2) {
t.Errorf("Cov mismatch when providing nil and matrix. Case %v", cas)
}
var cov3 mat64.SymDense
c.CovarianceMatrix(&cov3)
if !mat64.Equal(cov, &cov3) {
t.Errorf("Cov mismatch when providing zero matrix. Case %v", cas)
}
// Check that the covariance matrix matches the samples
covEst := stat.CovarianceMatrix(nil, x, nil)
if !mat64.EqualApprox(covEst, cov, tol) {
t.Errorf("Return cov and sample cov mismatch. Cas %v.\nGot:\n%0.4v\nWant:\n%0.4v", cas, mat64.Formatted(cov), mat64.Formatted(covEst))
}
}

33
stat/distmv/interfaces.go Normal file
View File

@@ -0,0 +1,33 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
// Quantiler returns the multi-dimensional inverse cumulative distribution function.
// len(x) must equal len(p), and if x is non-nil, len(x) must also equal len(p).
// If x is nil, a new slice will be allocated and returned, otherwise the quantile
// will be stored in-place into x. All of the values of p must be between 0 and 1,
// or Quantile will panic.
type Quantiler interface {
Quantile(x, p []float64) []float64
}
// LogProber computes the log of the probability of the point x.
type LogProber interface {
LogProb(x []float64) float64
}
// Rander generates a random number according to the distributon.
// If the input is non-nil, len(x) must equal len(p) and the dimension of the distribution,
// otherwise Quantile will panic.
// If the input is nil, a new slice will be allocated and returned.
type Rander interface {
Rand(x []float64) []float64
}
// RandLogProber is both a Rander and a LogProber.
type RandLogProber interface {
Rander
LogProber
}

316
stat/distmv/normal.go Normal file
View File

@@ -0,0 +1,316 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
"github.com/gonum/stat/distuv"
)
var (
badInputLength = "distmv: input slice length mismatch"
)
// Normal is a multivariate normal distribution (also known as the multivariate
// Gaussian distribution). Its pdf in k dimensions is given by
// (2 π)^(-k/2) |Σ|^(-1/2) exp(-1/2 (x-μ)'Σ^-1(x-μ))
// where μ is the mean vector and Σ the covariance matrix. Σ must be symmetric
// and positive definite. Use NewNormal to construct.
type Normal struct {
mu []float64
sigma mat64.SymDense
chol mat64.Cholesky
lower mat64.TriDense
logSqrtDet float64
dim int
src *rand.Rand
}
// NewNormal creates a new Normal with the given mean and covariance matrix.
// NewNormal panics if len(mu) == 0, or if len(mu) != sigma.N. If the covariance
// matrix is not positive-definite, the returned boolean is false.
func NewNormal(mu []float64, sigma mat64.Symmetric, src *rand.Rand) (*Normal, bool) {
if len(mu) == 0 {
panic(badZeroDimension)
}
dim := sigma.Symmetric()
if dim != len(mu) {
panic(badSizeMismatch)
}
n := &Normal{
src: src,
dim: dim,
mu: make([]float64, dim),
}
copy(n.mu, mu)
ok := n.chol.Factorize(sigma)
if !ok {
return nil, false
}
n.sigma = *mat64.NewSymDense(dim, nil)
n.sigma.CopySym(sigma)
n.lower.LFromCholesky(&n.chol)
n.logSqrtDet = 0.5 * n.chol.LogDet()
return n, true
}
// NewNormalChol creates a new Normal distribution with the given mean and
// covariance matrix represented by its Cholesky decomposition. NewNormalChol
// panics if len(mu) is not equal to chol.Size().
func NewNormalChol(mu []float64, chol *mat64.Cholesky, src *rand.Rand) *Normal {
dim := len(mu)
if dim != chol.Size() {
panic(badSizeMismatch)
}
n := &Normal{
src: src,
dim: dim,
mu: make([]float64, dim),
}
n.chol.Clone(chol)
copy(n.mu, mu)
n.lower.LFromCholesky(chol)
n.logSqrtDet = 0.5 * n.chol.LogDet()
return n
}
// NewNormalPrecision creates a new Normal distribution with the given mean and
// precision matrix (inverse of the covariance matrix). NewNormalPrecision
// panics if len(mu) is not equal to prec.Symmetric(). If the precision matrix
// is not positive-definite, NewNormalPrecision returns nil for norm and false
// for ok.
func NewNormalPrecision(mu []float64, prec *mat64.SymDense, src *rand.Rand) (norm *Normal, ok bool) {
if len(mu) == 0 {
panic(badZeroDimension)
}
dim := prec.Symmetric()
if dim != len(mu) {
panic(badSizeMismatch)
}
// TODO(btracey): Computing a matrix inverse is generally numerically instable.
// This only has to compute the inverse of a positive definite matrix, which
// is much better, but this still loses precision. It is worth considering if
// instead the precision matrix should be stored explicitly and used instead
// of the Cholesky decomposition of the covariance matrix where appropriate.
var chol mat64.Cholesky
ok = chol.Factorize(prec)
if !ok {
return nil, false
}
var sigma mat64.SymDense
sigma.InverseCholesky(&chol)
return NewNormal(mu, &sigma, src)
}
// ConditionNormal returns the Normal distribution that is the receiver conditioned
// on the input evidence. The returned multivariate normal has dimension
// n - len(observed), where n is the dimension of the original receiver. The updated
// mean and covariance are
// mu = mu_un + sigma_{ob,un}^T * sigma_{ob,ob}^-1 (v - mu_ob)
// sigma = sigma_{un,un} - sigma_{ob,un}^T * sigma_{ob,ob}^-1 * sigma_{ob,un}
// where mu_un and mu_ob are the original means of the unobserved and observed
// variables respectively, sigma_{un,un} is the unobserved subset of the covariance
// matrix, sigma_{ob,ob} is the observed subset of the covariance matrix, and
// sigma_{un,ob} are the cross terms. The elements of x_2 have been observed with
// values v. The dimension order is preserved during conditioning, so if the value
// of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...}
// of the original Normal distribution.
//
// ConditionNormal returns {nil, false} if there is a failure during the update.
// Mathematically this is impossible, but can occur with finite precision arithmetic.
func (n *Normal) ConditionNormal(observed []int, values []float64, src *rand.Rand) (*Normal, bool) {
if len(observed) == 0 {
panic("normal: no observed value")
}
if len(observed) != len(values) {
panic(badInputLength)
}
for _, v := range observed {
if v < 0 || v >= n.Dim() {
panic("normal: observed value out of bounds")
}
}
_, mu1, sigma11 := studentsTConditional(observed, values, math.Inf(1), n.mu, &n.sigma)
if mu1 == nil {
return nil, false
}
return NewNormal(mu1, sigma11, src)
}
// CovarianceMatrix returns the covariance matrix of the distribution. Upon
// return, the value at element {i, j} of the covariance matrix is equal to
// the covariance of the i^th and j^th variables.
// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])]
// If the input matrix is nil a new matrix is allocated, otherwise the result
// is stored in-place into the input.
func (n *Normal) CovarianceMatrix(s *mat64.SymDense) *mat64.SymDense {
if s == nil {
s = mat64.NewSymDense(n.Dim(), nil)
}
sn := s.Symmetric()
if sn != n.Dim() {
panic("normal: input matrix size mismatch")
}
s.CopySym(&n.sigma)
return s
}
// Dim returns the dimension of the distribution.
func (n *Normal) Dim() int {
return n.dim
}
// Entropy returns the differential entropy of the distribution.
func (n *Normal) Entropy() float64 {
return float64(n.dim)/2*(1+logTwoPi) + n.logSqrtDet
}
// LogProb computes the log of the pdf of the point x.
func (n *Normal) LogProb(x []float64) float64 {
dim := n.dim
if len(x) != dim {
panic(badSizeMismatch)
}
c := -0.5*float64(dim)*logTwoPi - n.logSqrtDet
dst := stat.Mahalanobis(mat64.NewVector(dim, x), mat64.NewVector(dim, n.mu), &n.chol)
return c - 0.5*dst*dst
}
// MarginalNormal returns the marginal distribution of the given input variables.
// That is, MarginalNormal returns
// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o
// where x_i are the dimensions in the input, and x_o are the remaining dimensions.
// See https://en.wikipedia.org/wiki/Marginal_distribution for more information.
//
// The input src is passed to the call to NewNormal.
func (n *Normal) MarginalNormal(vars []int, src *rand.Rand) (*Normal, bool) {
newMean := make([]float64, len(vars))
for i, v := range vars {
newMean[i] = n.mu[v]
}
var s mat64.SymDense
s.SubsetSym(&n.sigma, vars)
return NewNormal(newMean, &s, src)
}
// MarginalNormalSingle returns the marginal of the given input variable.
// That is, MarginalNormal returns
// p(x_i) = \int_{x_¬i} p(x_i | x_¬i) p(x_¬i) dx_¬i
// where i is the input index.
// See https://en.wikipedia.org/wiki/Marginal_distribution for more information.
//
// The input src is passed to the constructed distuv.Normal.
func (n *Normal) MarginalNormalSingle(i int, src *rand.Rand) distuv.Normal {
return distuv.Normal{
Mu: n.mu[i],
Sigma: math.Sqrt(n.sigma.At(i, i)),
Source: src,
}
}
// Mean returns the mean of the probability distribution at x. If the
// input argument is nil, a new slice will be allocated, otherwise the result
// will be put in-place into the receiver.
func (n *Normal) Mean(x []float64) []float64 {
x = reuseAs(x, n.dim)
copy(x, n.mu)
return x
}
// Prob computes the value of the probability density function at x.
func (n *Normal) Prob(x []float64) float64 {
return math.Exp(n.LogProb(x))
}
// Quantile returns the multi-dimensional inverse cumulative distribution function.
// If x is nil, a new slice will be allocated and returned. If x is non-nil,
// len(x) must equal len(p) and the quantile will be stored in-place into x.
// All of the values of p must be between 0 and 1, inclusive, or Quantile will panic.
func (n *Normal) Quantile(x, p []float64) []float64 {
dim := n.Dim()
if len(p) != dim {
panic(badInputLength)
}
if x == nil {
x = make([]float64, dim)
}
if len(x) != len(p) {
panic(badInputLength)
}
// Transform to a standard normal and then transform to a multivariate Gaussian.
tmp := make([]float64, len(x))
for i, v := range p {
tmp[i] = distuv.UnitNormal.Quantile(v)
}
n.TransformNormal(x, tmp)
return x
}
// Rand generates a random number according to the distributon.
// If the input slice is nil, new memory is allocated, otherwise the result is stored
// in place.
func (n *Normal) Rand(x []float64) []float64 {
x = reuseAs(x, n.dim)
tmp := make([]float64, n.dim)
if n.src == nil {
for i := range x {
tmp[i] = rand.NormFloat64()
}
} else {
for i := range x {
tmp[i] = n.src.NormFloat64()
}
}
n.transformNormal(x, tmp)
return x
}
// SetMean changes the mean of the normal distribution. SetMean panics if len(mu)
// does not equal the dimension of the normal distribution.
func (n *Normal) SetMean(mu []float64) {
if len(mu) != n.Dim() {
panic(badSizeMismatch)
}
copy(n.mu, mu)
}
// TransformNormal transforms the vector, normal, generated from a standard
// multidimensional normal into a vector that has been generated under the
// distribution of the receiver.
//
// If dst is non-nil, the result will be stored into dst, otherwise a new slice
// will be allocated. TransformNormal will panic if the length of normal is not
// the dimension of the receiver, or if dst is non-nil and len(dist) != len(normal).
func (n *Normal) TransformNormal(dst, normal []float64) []float64 {
if len(normal) != n.dim {
panic(badInputLength)
}
dst = reuseAs(dst, n.dim)
if len(dst) != len(normal) {
panic(badInputLength)
}
n.transformNormal(dst, normal)
return dst
}
// transformNormal performs the same operation as TransformNormal except no
// safety checks are performed and both input slices must be non-nil.
func (n *Normal) transformNormal(dst, normal []float64) []float64 {
srcVec := mat64.NewVector(n.dim, normal)
dstVec := mat64.NewVector(n.dim, dst)
dstVec.MulVec(&n.lower, srcVec)
floats.Add(dst, n.mu)
return dst
}

538
stat/distmv/normal_test.go Normal file
View File

@@ -0,0 +1,538 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
type mvTest struct {
Mu []float64
Sigma *mat64.SymDense
Loc []float64
Logprob float64
Prob float64
}
func TestNormProbs(t *testing.T) {
dist1, ok := NewNormal([]float64{0, 0}, mat64.NewSymDense(2, []float64{1, 0, 0, 1}), nil)
if !ok {
t.Errorf("bad test")
}
dist2, ok := NewNormal([]float64{6, 7}, mat64.NewSymDense(2, []float64{8, 2, 0, 4}), nil)
if !ok {
t.Errorf("bad test")
}
testProbability(t, []probCase{
{
dist: dist1,
loc: []float64{0, 0},
logProb: -1.837877066409345,
},
{
dist: dist2,
loc: []float64{6, 7},
logProb: -3.503979321496947,
},
{
dist: dist2,
loc: []float64{1, 2},
logProb: -7.075407892925519,
},
})
}
func TestNewNormalChol(t *testing.T) {
for _, test := range []struct {
mean []float64
cov *mat64.SymDense
}{
{
mean: []float64{2, 3},
cov: mat64.NewSymDense(2, []float64{1, 0.1, 0.1, 1}),
},
} {
var chol mat64.Cholesky
ok := chol.Factorize(test.cov)
if !ok {
panic("bad test")
}
n := NewNormalChol(test.mean, &chol, nil)
// Generate a random number and calculate probability to ensure things
// have been set properly. See issue #426.
x := n.Rand(nil)
_ = n.Prob(x)
}
}
func TestNormRand(t *testing.T) {
for _, test := range []struct {
mean []float64
cov []float64
}{
{
mean: []float64{0, 0},
cov: []float64{
1, 0,
0, 1,
},
},
{
mean: []float64{0, 0},
cov: []float64{
1, 0.9,
0.9, 1,
},
},
{
mean: []float64{6, 7},
cov: []float64{
5, 0.9,
0.9, 2,
},
},
} {
dim := len(test.mean)
cov := mat64.NewSymDense(dim, test.cov)
n, ok := NewNormal(test.mean, cov, nil)
if !ok {
t.Errorf("bad covariance matrix")
}
nSamples := 1000000
samps := mat64.NewDense(nSamples, dim, nil)
for i := 0; i < nSamples; i++ {
n.Rand(samps.RawRowView(i))
}
estMean := make([]float64, dim)
for i := range estMean {
estMean[i] = stat.Mean(mat64.Col(nil, i, samps), nil)
}
if !floats.EqualApprox(estMean, test.mean, 1e-2) {
t.Errorf("Mean mismatch: want: %v, got %v", test.mean, estMean)
}
estCov := stat.CovarianceMatrix(nil, samps, nil)
if !mat64.EqualApprox(estCov, cov, 1e-2) {
t.Errorf("Cov mismatch: want: %v, got %v", cov, estCov)
}
}
}
func TestNormalQuantile(t *testing.T) {
for _, test := range []struct {
mean []float64
cov []float64
}{
{
mean: []float64{6, 7},
cov: []float64{
5, 0.9,
0.9, 2,
},
},
} {
dim := len(test.mean)
cov := mat64.NewSymDense(dim, test.cov)
n, ok := NewNormal(test.mean, cov, nil)
if !ok {
t.Errorf("bad covariance matrix")
}
nSamples := 1000000
rnd := rand.New(rand.NewSource(1))
samps := mat64.NewDense(nSamples, dim, nil)
tmp := make([]float64, dim)
for i := 0; i < nSamples; i++ {
for j := range tmp {
tmp[j] = rnd.Float64()
}
n.Quantile(samps.RawRowView(i), tmp)
}
estMean := make([]float64, dim)
for i := range estMean {
estMean[i] = stat.Mean(mat64.Col(nil, i, samps), nil)
}
if !floats.EqualApprox(estMean, test.mean, 1e-2) {
t.Errorf("Mean mismatch: want: %v, got %v", test.mean, estMean)
}
estCov := stat.CovarianceMatrix(nil, samps, nil)
if !mat64.EqualApprox(estCov, cov, 1e-2) {
t.Errorf("Cov mismatch: want: %v, got %v", cov, estCov)
}
}
}
func TestConditionNormal(t *testing.T) {
// Uncorrelated values shouldn't influence the updated values.
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
observed []int
values []float64
newMu []float64
newSigma *mat64.SymDense
}{
{
mu: []float64{2, 3},
sigma: mat64.NewSymDense(2, []float64{2, 0, 0, 5}),
observed: []int{0},
values: []float64{10},
newMu: []float64{3},
newSigma: mat64.NewSymDense(1, []float64{5}),
},
{
mu: []float64{2, 3},
sigma: mat64.NewSymDense(2, []float64{2, 0, 0, 5}),
observed: []int{1},
values: []float64{10},
newMu: []float64{2},
newSigma: mat64.NewSymDense(1, []float64{2}),
},
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0, 0, 0, 5, 0, 0, 0, 10}),
observed: []int{1},
values: []float64{10},
newMu: []float64{2, 4},
newSigma: mat64.NewSymDense(2, []float64{2, 0, 0, 10}),
},
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0, 0, 0, 5, 0, 0, 0, 10}),
observed: []int{0, 1},
values: []float64{10, 15},
newMu: []float64{4},
newSigma: mat64.NewSymDense(1, []float64{10}),
},
{
mu: []float64{2, 3, 4, 5},
sigma: mat64.NewSymDense(4, []float64{2, 0.5, 0, 0, 0.5, 5, 0, 0, 0, 0, 10, 2, 0, 0, 2, 3}),
observed: []int{0, 1},
values: []float64{10, 15},
newMu: []float64{4, 5},
newSigma: mat64.NewSymDense(2, []float64{10, 2, 2, 3}),
},
} {
normal, ok := NewNormal(test.mu, test.sigma, nil)
if !ok {
t.Fatalf("Bad test, original sigma not positive definite")
}
newNormal, ok := normal.ConditionNormal(test.observed, test.values, nil)
if !ok {
t.Fatalf("Bad test, update failure")
}
if !floats.EqualApprox(test.newMu, newNormal.mu, 1e-12) {
t.Errorf("Updated mean mismatch. Want %v, got %v.", test.newMu, newNormal.mu)
}
var sigma mat64.SymDense
sigma.FromCholesky(&newNormal.chol)
if !mat64.EqualApprox(test.newSigma, &sigma, 1e-12) {
t.Errorf("Updated sigma mismatch\n.Want:\n% v\nGot:\n% v\n", test.newSigma, sigma)
}
}
// Test bivariate case where the update rule is analytic
for _, test := range []struct {
mu []float64
std []float64
rho float64
value float64
}{
{
mu: []float64{2, 3},
std: []float64{3, 5},
rho: 0.9,
value: 1000,
},
{
mu: []float64{2, 3},
std: []float64{3, 5},
rho: -0.9,
value: 1000,
},
} {
std := test.std
rho := test.rho
sigma := mat64.NewSymDense(2, []float64{std[0] * std[0], std[0] * std[1] * rho, std[0] * std[1] * rho, std[1] * std[1]})
normal, ok := NewNormal(test.mu, sigma, nil)
if !ok {
t.Fatalf("Bad test, original sigma not positive definite")
}
newNormal, ok := normal.ConditionNormal([]int{1}, []float64{test.value}, nil)
if !ok {
t.Fatalf("Bad test, update failed")
}
var newSigma mat64.SymDense
newSigma.FromCholesky(&newNormal.chol)
trueMean := test.mu[0] + rho*(std[0]/std[1])*(test.value-test.mu[1])
if math.Abs(trueMean-newNormal.mu[0]) > 1e-14 {
t.Errorf("Mean mismatch. Want %v, got %v", trueMean, newNormal.mu[0])
}
trueVar := (1 - rho*rho) * std[0] * std[0]
if math.Abs(trueVar-newSigma.At(0, 0)) > 1e-14 {
t.Errorf("Std mismatch. Want %v, got %v", trueMean, newNormal.mu[0])
}
}
// Test via sampling.
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
observed []int
unobserved []int
value []float64
}{
// The indices in unobserved must be in ascending order for this test.
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0.5, 3, 0.5, 1, 0.6, 3, 0.6, 10}),
observed: []int{0},
unobserved: []int{1, 2},
value: []float64{1.9},
},
{
mu: []float64{2, 3, 4, 5},
sigma: mat64.NewSymDense(4, []float64{2, 0.5, 3, 0.1, 0.5, 1, 0.6, 0.2, 3, 0.6, 10, 0.3, 0.1, 0.2, 0.3, 3}),
observed: []int{0, 3},
unobserved: []int{1, 2},
value: []float64{1.9, 2.9},
},
} {
totalSamp := 4000000
var nSamp int
samples := mat64.NewDense(totalSamp, len(test.mu), nil)
normal, ok := NewNormal(test.mu, test.sigma, nil)
if !ok {
t.Errorf("bad test")
}
sample := make([]float64, len(test.mu))
for i := 0; i < totalSamp; i++ {
normal.Rand(sample)
isClose := true
for i, v := range test.observed {
if math.Abs(sample[v]-test.value[i]) > 1e-1 {
isClose = false
break
}
}
if isClose {
samples.SetRow(nSamp, sample)
nSamp++
}
}
if nSamp < 100 {
t.Errorf("bad test, not enough samples")
continue
}
samples = samples.View(0, 0, nSamp, len(test.mu)).(*mat64.Dense)
// Compute mean and covariance matrix.
estMean := make([]float64, len(test.mu))
for i := range estMean {
estMean[i] = stat.Mean(mat64.Col(nil, i, samples), nil)
}
estCov := stat.CovarianceMatrix(nil, samples, nil)
// Compute update rule.
newNormal, ok := normal.ConditionNormal(test.observed, test.value, nil)
if !ok {
t.Fatalf("Bad test, update failure")
}
var subEstMean []float64
for _, v := range test.unobserved {
subEstMean = append(subEstMean, estMean[v])
}
subEstCov := mat64.NewSymDense(len(test.unobserved), nil)
for i := 0; i < len(test.unobserved); i++ {
for j := i; j < len(test.unobserved); j++ {
subEstCov.SetSym(i, j, estCov.At(test.unobserved[i], test.unobserved[j]))
}
}
for i, v := range subEstMean {
if math.Abs(newNormal.mu[i]-v) > 5e-2 {
t.Errorf("Mean mismatch. Want %v, got %v.", newNormal.mu[i], v)
}
}
var sigma mat64.SymDense
sigma.FromCholesky(&newNormal.chol)
if !mat64.EqualApprox(&sigma, subEstCov, 1e-1) {
t.Errorf("Covariance mismatch. Want:\n%0.8v\nGot:\n%0.8v\n", subEstCov, sigma)
}
}
}
func TestCovarianceMatrix(t *testing.T) {
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
}{
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{1, 0.5, 3, 0.5, 8, -1, 3, -1, 15}),
},
} {
normal, ok := NewNormal(test.mu, test.sigma, nil)
if !ok {
t.Fatalf("Bad test, covariance matrix not positive definite")
}
cov := normal.CovarianceMatrix(nil)
if !mat64.EqualApprox(cov, test.sigma, 1e-14) {
t.Errorf("Covariance mismatch with nil input")
}
dim := test.sigma.Symmetric()
cov = mat64.NewSymDense(dim, nil)
normal.CovarianceMatrix(cov)
if !mat64.EqualApprox(cov, test.sigma, 1e-14) {
t.Errorf("Covariance mismatch with supplied input")
}
}
}
func TestMarginal(t *testing.T) {
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
marginal []int
}{
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0.5, 3, 0.5, 1, 0.6, 3, 0.6, 10}),
marginal: []int{0},
},
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0.5, 3, 0.5, 1, 0.6, 3, 0.6, 10}),
marginal: []int{0, 2},
},
{
mu: []float64{2, 3, 4, 5},
sigma: mat64.NewSymDense(4, []float64{2, 0.5, 3, 0.1, 0.5, 1, 0.6, 0.2, 3, 0.6, 10, 0.3, 0.1, 0.2, 0.3, 3}),
marginal: []int{0, 3},
},
} {
normal, ok := NewNormal(test.mu, test.sigma, nil)
if !ok {
t.Fatalf("Bad test, covariance matrix not positive definite")
}
marginal, ok := normal.MarginalNormal(test.marginal, nil)
if !ok {
t.Fatalf("Bad test, marginal matrix not positive definite")
}
dim := normal.Dim()
nSamples := 1000000
samps := mat64.NewDense(nSamples, dim, nil)
for i := 0; i < nSamples; i++ {
normal.Rand(samps.RawRowView(i))
}
estMean := make([]float64, dim)
for i := range estMean {
estMean[i] = stat.Mean(mat64.Col(nil, i, samps), nil)
}
for i, v := range test.marginal {
if math.Abs(marginal.mu[i]-estMean[v]) > 1e-2 {
t.Errorf("Mean mismatch: want: %v, got %v", estMean[v], marginal.mu[i])
}
}
marginalCov := marginal.CovarianceMatrix(nil)
estCov := stat.CovarianceMatrix(nil, samps, nil)
for i, v1 := range test.marginal {
for j, v2 := range test.marginal {
c := marginalCov.At(i, j)
ec := estCov.At(v1, v2)
if math.Abs(c-ec) > 5e-2 {
t.Errorf("Cov mismatch element i = %d, j = %d: want: %v, got %v", i, j, c, ec)
}
}
}
}
}
func TestMarginalSingle(t *testing.T) {
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
}{
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0.5, 3, 0.5, 1, 0.6, 3, 0.6, 10}),
},
{
mu: []float64{2, 3, 4, 5},
sigma: mat64.NewSymDense(4, []float64{2, 0.5, 3, 0.1, 0.5, 1, 0.6, 0.2, 3, 0.6, 10, 0.3, 0.1, 0.2, 0.3, 3}),
},
} {
normal, ok := NewNormal(test.mu, test.sigma, nil)
if !ok {
t.Fatalf("Bad test, covariance matrix not positive definite")
}
for i, mean := range test.mu {
norm := normal.MarginalNormalSingle(i, nil)
if norm.Mean() != mean {
t.Errorf("Mean mismatch nil Sigma, idx %v: want %v, got %v.", i, mean, norm.Mean())
}
std := math.Sqrt(test.sigma.At(i, i))
if math.Abs(norm.StdDev()-std) > 1e-14 {
t.Errorf("StdDev mismatch nil Sigma, idx %v: want %v, got %v.", i, std, norm.StdDev())
}
}
}
// Test matching with TestMarginal.
rnd := rand.New(rand.NewSource(1))
for cas := 0; cas < 10; cas++ {
dim := rnd.Intn(10) + 1
mu := make([]float64, dim)
for i := range mu {
mu[i] = rnd.Float64()
}
x := make([]float64, dim*dim)
for i := range x {
x[i] = rnd.Float64()
}
mat := mat64.NewDense(dim, dim, x)
var sigma mat64.SymDense
sigma.SymOuterK(1, mat)
normal, ok := NewNormal(mu, &sigma, nil)
if !ok {
t.Fatal("bad test")
}
for i := 0; i < dim; i++ {
single := normal.MarginalNormalSingle(i, nil)
mult, ok := normal.MarginalNormal([]int{i}, nil)
if !ok {
t.Fatal("bad test")
}
if math.Abs(single.Mean()-mult.Mean(nil)[0]) > 1e-14 {
t.Errorf("Mean mismatch")
}
if math.Abs(single.Variance()-mult.CovarianceMatrix(nil).At(0, 0)) > 1e-14 {
t.Errorf("Variance mismatch")
}
}
}
}

View File

@@ -0,0 +1,73 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"log"
"math/rand"
"testing"
"github.com/gonum/matrix/mat64"
)
func BenchmarkMarginalNormal10(b *testing.B) {
sz := 10
rnd := rand.New(rand.NewSource(1))
normal := randomNormal(sz, rnd)
_ = normal.CovarianceMatrix(nil) // pre-compute sigma
b.ResetTimer()
for i := 0; i < b.N; i++ {
marg, ok := normal.MarginalNormal([]int{1}, nil)
if !ok {
b.Error("bad test")
}
_ = marg
}
}
func BenchmarkMarginalNormalReset10(b *testing.B) {
sz := 10
rnd := rand.New(rand.NewSource(1))
normal := randomNormal(sz, rnd)
b.ResetTimer()
for i := 0; i < b.N; i++ {
marg, ok := normal.MarginalNormal([]int{1}, nil)
if !ok {
b.Error("bad test")
}
_ = marg
}
}
func BenchmarkMarginalNormalSingle10(b *testing.B) {
sz := 10
rnd := rand.New(rand.NewSource(1))
normal := randomNormal(sz, rnd)
b.ResetTimer()
for i := 0; i < b.N; i++ {
marg := normal.MarginalNormalSingle(1, nil)
_ = marg
}
}
func randomNormal(sz int, rnd *rand.Rand) *Normal {
mu := make([]float64, sz)
for i := range mu {
mu[i] = rnd.Float64()
}
data := make([]float64, sz*sz)
for i := range data {
data[i] = rnd.Float64()
}
dM := mat64.NewDense(sz, sz, data)
var sigma mat64.SymDense
sigma.SymOuterK(1, dM)
normal, ok := NewNormal(mu, &sigma, nil)
if !ok {
log.Fatal("bad test, not pos def")
}
return normal
}

252
stat/distmv/statdist.go Normal file
View File

@@ -0,0 +1,252 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
// Bhattacharyya is a type for computing the Bhattacharyya distance between
// probability distributions.
//
// The Battachara distance is defined as
// D_B = -ln(BC(l,r))
// BC = \int_x (p(x)q(x))^(1/2) dx
// Where BC is known as the Bhattacharyya coefficient.
// The Bhattacharyya distance is related to the Hellinger distance by
// H = sqrt(1-BC)
// For more information, see
// https://en.wikipedia.org/wiki/Bhattacharyya_distance
type Bhattacharyya struct{}
// DistNormal computes the Bhattacharyya distance between normal distributions l and r.
// The dimensions of the input distributions must match or DistNormal will panic.
//
// For Normal distributions, the Bhattacharyya distance is
// Σ = (Σ_l + Σ_r)/2
// D_B = (1/8)*(μ_l - μ_r)^T*Σ^-1*(μ_l - μ_r) + (1/2)*ln(det(Σ)/(det(Σ_l)*det(Σ_r))^(1/2))
func (Bhattacharyya) DistNormal(l, r *Normal) float64 {
dim := l.Dim()
if dim != r.Dim() {
panic(badSizeMismatch)
}
var sigma mat64.SymDense
sigma.AddSym(&l.sigma, &r.sigma)
sigma.ScaleSym(0.5, &sigma)
var chol mat64.Cholesky
chol.Factorize(&sigma)
mahalanobis := stat.Mahalanobis(mat64.NewVector(dim, l.mu), mat64.NewVector(dim, r.mu), &chol)
mahalanobisSq := mahalanobis * mahalanobis
dl := l.chol.LogDet()
dr := r.chol.LogDet()
ds := chol.LogDet()
return 0.125*mahalanobisSq + 0.5*ds - 0.25*dl - 0.25*dr
}
// DistUniform computes the Bhattacharyya distance between uniform distributions l and r.
// The dimensions of the input distributions must match or DistUniform will panic.
func (Bhattacharyya) DistUniform(l, r *Uniform) float64 {
if len(l.bounds) != len(r.bounds) {
panic(badSizeMismatch)
}
// BC = \int \sqrt(p(x)q(x)), which for uniform distributions is a constant
// over the volume where both distributions have positive probability.
// Compute the overlap and the value of sqrt(p(x)q(x)). The entropy is the
// negative log probability of the distribution (use instead of LogProb so
// it is not necessary to construct an x value).
//
// BC = volume * sqrt(p(x)q(x))
// logBC = log(volume) + 0.5*(logP + logQ)
// D_B = -logBC
return -unifLogVolOverlap(l.bounds, r.bounds) + 0.5*(l.Entropy()+r.Entropy())
}
// unifLogVolOverlap computes the log of the volume of the hyper-rectangle where
// both uniform distributions have positive probability.
func unifLogVolOverlap(b1, b2 []Bound) float64 {
var logVolOverlap float64
for dim, v1 := range b1 {
v2 := b2[dim]
// If the surfaces don't overlap, then the volume is 0
if v1.Max <= v2.Min || v2.Max <= v1.Min {
return math.Inf(-1)
}
vol := math.Min(v1.Max, v2.Max) - math.Max(v1.Min, v2.Min)
logVolOverlap += math.Log(vol)
}
return logVolOverlap
}
// CrossEntropy is a type for computing the cross-entropy between probability
// distributions.
//
// The cross-entropy is defined as
// - \int_x l(x) log(r(x)) dx = KL(l || r) + H(l)
// where KL is the Kullback-Leibler divergence and H is the entropy.
// For more information, see
// https://en.wikipedia.org/wiki/Cross_entropy
type CrossEntropy struct{}
// DistNormal returns the cross-entropy between normal distributions l and r.
// The dimensions of the input distributions must match or DistNormal will panic.
func (CrossEntropy) DistNormal(l, r *Normal) float64 {
if l.Dim() != r.Dim() {
panic(badSizeMismatch)
}
kl := KullbackLeibler{}.DistNormal(l, r)
return kl + l.Entropy()
}
// Hellinger is a type for computing the Hellinger distance between probability
// distributions.
//
// The Hellinger distance is defined as
// H^2(l,r) = 1/2 * int_x (\sqrt(l(x)) - \sqrt(r(x)))^2 dx
// and is bounded between 0 and 1.
// The Hellinger distance is related to the Bhattacharyya distance by
// H^2 = 1 - exp(-Db)
// For more information, see
// https://en.wikipedia.org/wiki/Hellinger_distance
type Hellinger struct{}
// DistNormal returns the Hellinger distance between normal distributions l and r.
// The dimensions of the input distributions must match or DistNormal will panic.
//
// See the documentation of Bhattacharyya.DistNormal for the formula for Normal
// distributions.
func (Hellinger) DistNormal(l, r *Normal) float64 {
if l.Dim() != r.Dim() {
panic(badSizeMismatch)
}
db := Bhattacharyya{}.DistNormal(l, r)
bc := math.Exp(-db)
return math.Sqrt(1 - bc)
}
// KullbackLiebler is a type for computing the Kullback-Leibler divergence from l to r.
// The dimensions of the input distributions must match or the function will panic.
//
// The Kullback-Liebler divergence is defined as
// D_KL(l || r ) = \int_x p(x) log(p(x)/q(x)) dx
// Note that the Kullback-Liebler divergence is not symmetric with respect to
// the order of the input arguments.
type KullbackLeibler struct{}
// DistNormal returns the KullbackLeibler distance between normal distributions l and r.
// The dimensions of the input distributions must match or DistNormal will panic.
//
// For two normal distributions, the KL divergence is computed as
// D_KL(l || r) = 0.5*[ln(|Σ_r|) - ln(|Σ_l|) + (μ_l - μ_r)^T*Σ_r^-1*(μ_l - μ_r) + tr(Σ_r^-1*Σ_l)-d]
func (KullbackLeibler) DistNormal(l, r *Normal) float64 {
dim := l.Dim()
if dim != r.Dim() {
panic(badSizeMismatch)
}
mahalanobis := stat.Mahalanobis(mat64.NewVector(dim, l.mu), mat64.NewVector(dim, r.mu), &r.chol)
mahalanobisSq := mahalanobis * mahalanobis
// TODO(btracey): Optimize where there is a SolveCholeskySym
// TODO(btracey): There may be a more efficient way to just compute the trace
// Compute tr(Σ_r^-1*Σ_l) using the fact that Σ_l = U^T * U
var u mat64.TriDense
u.UFromCholesky(&l.chol)
var m mat64.Dense
err := m.SolveCholesky(&r.chol, u.T())
if err != nil {
return math.NaN()
}
m.Mul(&m, &u)
tr := mat64.Trace(&m)
return r.logSqrtDet - l.logSqrtDet + 0.5*(mahalanobisSq+tr-float64(l.dim))
}
// DistUniform returns the KullbackLeibler distance between uniform distributions
// l and r. The dimensions of the input distributions must match or DistUniform
// will panic.
func (KullbackLeibler) DistUniform(l, r *Uniform) float64 {
bl := l.Bounds(nil)
br := r.Bounds(nil)
if len(bl) != len(br) {
panic(badSizeMismatch)
}
// The KL is ∞ if l is not completely contained within r, because then
// r(x) is zero when l(x) is non-zero for some x.
contained := true
for i, v := range bl {
if v.Min < br[i].Min || br[i].Max < v.Max {
contained = false
break
}
}
if !contained {
return math.Inf(1)
}
// The KL divergence is finite.
//
// KL defines 0*ln(0) = 0, so there is no contribution to KL where l(x) = 0.
// Inside the region, l(x) and r(x) are constant (uniform distribution), and
// this constant is integrated over l(x), which integrates out to one.
// The entropy is -log(p(x)).
logPx := -l.Entropy()
logQx := -r.Entropy()
return logPx - logQx
}
// Wasserstein is a type for computing the Wasserstein distance between two
// probability distributions.
//
// The Wasserstein distance is defined as
// W(l,r) := inf 𝔼(||X-Y||_2^2)^1/2
// For more information, see
// https://en.wikipedia.org/wiki/Wasserstein_metric
type Wasserstein struct{}
// DistNormal returns the Wasserstein distance between normal distributions l and r.
// The dimensions of the input distributions must match or DistNormal will panic.
//
// The Wasserstein distance for Normal distributions is
// d^2 = ||m_l - m_r||_2^2 + Tr(Σ_l + Σ_r - 2(Σ_l^(1/2)*Σ_r*Σ_l^(1/2))^(1/2))
// For more information, see
// http://djalil.chafai.net/blog/2010/04/30/wasserstein-distance-between-two-gaussians/
func (Wasserstein) DistNormal(l, r *Normal) float64 {
dim := l.Dim()
if dim != r.Dim() {
panic(badSizeMismatch)
}
d := floats.Distance(l.mu, r.mu, 2)
d = d * d
// Compute Σ_l^(1/2)
var ssl mat64.SymDense
ssl.PowPSD(&l.sigma, 0.5)
// Compute Σ_l^(1/2)*Σ_r*Σ_l^(1/2)
var mean mat64.Dense
mean.Mul(&ssl, &r.sigma)
mean.Mul(&mean, &ssl)
// Reinterpret as symdense, and take Σ^(1/2)
meanSym := mat64.NewSymDense(dim, mean.RawMatrix().Data)
ssl.PowPSD(meanSym, 0.5)
tr := mat64.Trace(&r.sigma)
tl := mat64.Trace(&l.sigma)
tm := mat64.Trace(&ssl)
return d + tl + tr - 2*tm
}

View File

@@ -0,0 +1,261 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
func TestBhattacharyyaNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
ac, bc *mat64.SymDense
samples int
tol float64
}{
{
am: []float64{2, 3},
ac: mat64.NewSymDense(2, []float64{3, -1, -1, 2}),
bm: []float64{-1, 1},
bc: mat64.NewSymDense(2, []float64{1.5, 0.2, 0.2, 0.9}),
samples: 100000,
tol: 1e-2,
},
} {
rnd := rand.New(rand.NewSource(1))
a, ok := NewNormal(test.am, test.ac, rnd)
if !ok {
panic("bad test")
}
b, ok := NewNormal(test.bm, test.bc, rnd)
if !ok {
panic("bad test")
}
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
got := Bhattacharyya{}.DistNormal(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
}
// Bhattacharyya should by symmetric
got2 := Bhattacharyya{}.DistNormal(b, a)
if math.Abs(got-got2) > 1e-14 {
t.Errorf("Bhattacharyya distance not symmetric")
}
}
}
func TestBhattacharyyaUniform(t *testing.T) {
rnd := rand.New(rand.NewSource(1))
for cas, test := range []struct {
a, b *Uniform
samples int
tol float64
}{
{
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
{
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
b: NewUniform([]Bound{{-5, -4}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
} {
a, b := test.a, test.b
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
got := Bhattacharyya{}.DistUniform(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
}
// Bhattacharyya should by symmetric
got2 := Bhattacharyya{}.DistUniform(b, a)
if math.Abs(got-got2) > 1e-14 {
t.Errorf("Bhattacharyya distance not symmetric")
}
}
}
// bhattacharyyaSample finds an estimate of the Bhattacharyya coefficient through
// sampling.
func bhattacharyyaSample(dim, samples int, l RandLogProber, r LogProber) float64 {
lBhatt := make([]float64, samples)
x := make([]float64, dim)
for i := 0; i < samples; i++ {
// Do importance sampling over a: \int sqrt(a*b)/a * a dx
l.Rand(x)
pa := l.LogProb(x)
pb := r.LogProb(x)
lBhatt[i] = 0.5*pb - 0.5*pa
}
logBc := floats.LogSumExp(lBhatt) - math.Log(float64(samples))
return -logBc
}
func TestCrossEntropyNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
ac, bc *mat64.SymDense
samples int
tol float64
}{
{
am: []float64{2, 3},
ac: mat64.NewSymDense(2, []float64{3, -1, -1, 2}),
bm: []float64{-1, 1},
bc: mat64.NewSymDense(2, []float64{1.5, 0.2, 0.2, 0.9}),
samples: 100000,
tol: 1e-2,
},
} {
rnd := rand.New(rand.NewSource(1))
a, ok := NewNormal(test.am, test.ac, rnd)
if !ok {
panic("bad test")
}
b, ok := NewNormal(test.bm, test.bc, rnd)
if !ok {
panic("bad test")
}
var ce float64
x := make([]float64, a.Dim())
for i := 0; i < test.samples; i++ {
a.Rand(x)
ce -= b.LogProb(x)
}
ce /= float64(test.samples)
got := CrossEntropy{}.DistNormal(a, b)
if math.Abs(ce-got) > test.tol {
t.Errorf("CrossEntropy mismatch, case %d: got %v, want %v", cas, got, ce)
}
}
}
func TestHellingerNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
ac, bc *mat64.SymDense
samples int
tol float64
}{
{
am: []float64{2, 3},
ac: mat64.NewSymDense(2, []float64{3, -1, -1, 2}),
bm: []float64{-1, 1},
bc: mat64.NewSymDense(2, []float64{1.5, 0.2, 0.2, 0.9}),
samples: 100000,
tol: 5e-1,
},
} {
rnd := rand.New(rand.NewSource(1))
a, ok := NewNormal(test.am, test.ac, rnd)
if !ok {
panic("bad test")
}
b, ok := NewNormal(test.bm, test.bc, rnd)
if !ok {
panic("bad test")
}
lAitchEDoubleHockeySticks := make([]float64, test.samples)
x := make([]float64, a.Dim())
for i := 0; i < test.samples; i++ {
// Do importance sampling over a: \int (\sqrt(a)-\sqrt(b))^2/a * a dx
a.Rand(x)
pa := a.LogProb(x)
pb := b.LogProb(x)
d := math.Exp(0.5*pa) - math.Exp(0.5*pb)
d = d * d
lAitchEDoubleHockeySticks[i] = math.Log(d) - pa
}
want := math.Sqrt(0.5 * math.Exp(floats.LogSumExp(lAitchEDoubleHockeySticks)-math.Log(float64(test.samples))))
got := Hellinger{}.DistNormal(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Hellinger mismatch, case %d: got %v, want %v", cas, got, want)
}
}
}
func TestKullbackLeiblerNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
ac, bc *mat64.SymDense
samples int
tol float64
}{
{
am: []float64{2, 3},
ac: mat64.NewSymDense(2, []float64{3, -1, -1, 2}),
bm: []float64{-1, 1},
bc: mat64.NewSymDense(2, []float64{1.5, 0.2, 0.2, 0.9}),
samples: 10000,
tol: 1e-2,
},
} {
rnd := rand.New(rand.NewSource(1))
a, ok := NewNormal(test.am, test.ac, rnd)
if !ok {
panic("bad test")
}
b, ok := NewNormal(test.bm, test.bc, rnd)
if !ok {
panic("bad test")
}
want := klSample(a.Dim(), test.samples, a, b)
got := KullbackLeibler{}.DistNormal(a, b)
if !floats.EqualWithinAbsOrRel(want, got, test.tol, test.tol) {
t.Errorf("Case %d, KL mismatch: got %v, want %v", cas, got, want)
}
}
}
func TestKullbackLeiblerUniform(t *testing.T) {
rnd := rand.New(rand.NewSource(1))
for cas, test := range []struct {
a, b *Uniform
samples int
tol float64
}{
{
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
{
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
b: NewUniform([]Bound{{-9, -6}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
} {
a, b := test.a, test.b
want := klSample(a.Dim(), test.samples, a, b)
got := KullbackLeibler{}.DistUniform(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Kullback-Leibler mismatch, case %d: got %v, want %v", cas, got, want)
}
}
}
// klSample finds an estimate of the Kullback-Leibler Divergence through sampling.
func klSample(dim, samples int, l RandLogProber, r LogProber) float64 {
var klmc float64
x := make([]float64, dim)
for i := 0; i < samples; i++ {
l.Rand(x)
pa := l.LogProb(x)
pb := r.LogProb(x)
klmc += pa - pb
}
return klmc / float64(samples)
}

354
stat/distmv/studentst.go Normal file
View File

@@ -0,0 +1,354 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"sort"
"golang.org/x/tools/container/intsets"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat/distuv"
)
// StudentsT is a multivariate Student's T distribution. It is a distribution over
// ^n with the probability density
// p(y) = (Γ((ν+n)/2) / Γ(ν/2)) * (νπ)^(-n/2) * |Ʃ|^(-1/2) *
// (1 + 1/ν * (y-μ)^T * Ʃ^-1 * (y-μ))^(-(ν+n)/2)
// where ν is a scalar greater than 2, μ is a vector in ^n, and Ʃ is an n×n
// symmetric positive definite matrix.
//
// In this distribution, ν sets the spread of the distribution, similar to
// the degrees of freedom in a univariate Student's T distribution. As ν → ∞,
// the distribution approaches a multi-variate normal distribution.
// μ is the mean of the distribution, and the covariance is ν/(ν-2)*Ʃ.
//
// See https://en.wikipedia.org/wiki/Student%27s_t-distribution and
// http://users.isy.liu.se/en/rt/roth/student.pdf for more information.
type StudentsT struct {
nu float64
mu []float64
src *rand.Rand
sigma mat64.SymDense // only stored if needed
chol mat64.Cholesky
lower mat64.TriDense
logSqrtDet float64
dim int
}
// NewStudentsT creates a new StudentsT with the given nu, mu, and sigma
// parameters.
//
// NewStudentsT panics if len(mu) == 0, or if len(mu) != sigma.Symmetric(). If
// the covariance matrix is not positive-definite, nil is returned and ok is false.
func NewStudentsT(mu []float64, sigma mat64.Symmetric, nu float64, src *rand.Rand) (dist *StudentsT, ok bool) {
if len(mu) == 0 {
panic(badZeroDimension)
}
dim := sigma.Symmetric()
if dim != len(mu) {
panic(badSizeMismatch)
}
s := &StudentsT{
nu: nu,
mu: make([]float64, dim),
dim: dim,
src: src,
}
copy(s.mu, mu)
ok = s.chol.Factorize(sigma)
if !ok {
return nil, false
}
s.sigma = *mat64.NewSymDense(dim, nil)
s.sigma.CopySym(sigma)
s.lower.LFromCholesky(&s.chol)
s.logSqrtDet = 0.5 * s.chol.LogDet()
return s, true
}
// ConditionStudentsT returns the Student's T distribution that is the receiver
// conditioned on the input evidence, and the success of the operation.
// The returned Student's T has dimension
// n - len(observed), where n is the dimension of the original receiver.
// The dimension order is preserved during conditioning, so if the value
// of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...}
// of the original Student's T distribution.
//
// ok indicates whether there was a failure during the update. If ok is false
// the operation failed and dist is not usable.
// Mathematically this is impossible, but can occur with finite precision arithmetic.
func (s *StudentsT) ConditionStudentsT(observed []int, values []float64, src *rand.Rand) (dist *StudentsT, ok bool) {
if len(observed) == 0 {
panic("studentst: no observed value")
}
if len(observed) != len(values) {
panic(badInputLength)
}
for _, v := range observed {
if v < 0 || v >= s.dim {
panic("studentst: observed value out of bounds")
}
}
newNu, newMean, newSigma := studentsTConditional(observed, values, s.nu, s.mu, &s.sigma)
if newMean == nil {
return nil, false
}
return NewStudentsT(newMean, newSigma, newNu, src)
}
// studentsTConditional updates a Student's T distribution based on the observed samples
// (see documentation for the public function). The Gaussian conditional update
// is treated as a special case when nu == math.Inf(1).
func studentsTConditional(observed []int, values []float64, nu float64, mu []float64, sigma mat64.Symmetric) (newNu float64, newMean []float64, newSigma *mat64.SymDense) {
dim := len(mu)
ob := len(observed)
unobserved := findUnob(observed, dim)
unob := len(unobserved)
if unob == 0 {
panic("stat: all dimensions observed")
}
mu1 := make([]float64, unob)
for i, v := range unobserved {
mu1[i] = mu[v]
}
mu2 := make([]float64, ob) // really v - mu2
for i, v := range observed {
mu2[i] = values[i] - mu[v]
}
var sigma11, sigma22 mat64.SymDense
sigma11.SubsetSym(sigma, unobserved)
sigma22.SubsetSym(sigma, observed)
sigma21 := mat64.NewDense(ob, unob, nil)
for i, r := range observed {
for j, c := range unobserved {
v := sigma.At(r, c)
sigma21.Set(i, j, v)
}
}
var chol mat64.Cholesky
ok := chol.Factorize(&sigma22)
if !ok {
return math.NaN(), nil, nil
}
// Compute mu_1 + sigma_{2,1}^T * sigma_{2,2}^-1 (v - mu_2).
v := mat64.NewVector(ob, mu2)
var tmp, tmp2 mat64.Vector
err := tmp.SolveCholeskyVec(&chol, v)
if err != nil {
return math.NaN(), nil, nil
}
tmp2.MulVec(sigma21.T(), &tmp)
for i := range mu1 {
mu1[i] += tmp2.At(i, 0)
}
// Compute tmp4 = sigma_{2,1}^T * sigma_{2,2}^-1 * sigma_{2,1}.
// TODO(btracey): Should this be a method of SymDense?
var tmp3, tmp4 mat64.Dense
err = tmp3.SolveCholesky(&chol, sigma21)
if err != nil {
return math.NaN(), nil, nil
}
tmp4.Mul(sigma21.T(), &tmp3)
// Compute sigma_{1,1} - tmp4
// TODO(btracey): If tmp4 can constructed with a method, then this can be
// replaced with SubSym.
for i := 0; i < len(unobserved); i++ {
for j := i; j < len(unobserved); j++ {
v := sigma11.At(i, j)
sigma11.SetSym(i, j, v-tmp4.At(i, j))
}
}
// The computed variables are accurate for a Normal.
if math.IsInf(nu, 1) {
return nu, mu1, &sigma11
}
// Compute beta = (v - mu_2)^T * sigma_{2,2}^-1 * (v - mu_2)^T
beta := mat64.Dot(v, &tmp)
// Scale the covariance matrix
sigma11.ScaleSym((nu+beta)/(nu+float64(ob)), &sigma11)
return nu + float64(ob), mu1, &sigma11
}
// findUnob returns the unobserved variables (the complementary set to observed).
// findUnob panics if any value repeated in observed.
func findUnob(observed []int, dim int) (unobserved []int) {
var setOb intsets.Sparse
for _, v := range observed {
setOb.Insert(v)
}
var setAll intsets.Sparse
for i := 0; i < dim; i++ {
setAll.Insert(i)
}
var setUnob intsets.Sparse
setUnob.Difference(&setAll, &setOb)
unobserved = setUnob.AppendTo(nil)
sort.Ints(unobserved)
return unobserved
}
// CovarianceMatrix returns the covariance matrix of the distribution. Upon
// return, the value at element {i, j} of the covariance matrix is equal to
// the covariance of the i^th and j^th variables.
// covariance(i, j) = E[(x_i - E[x_i])(x_j - E[x_j])]
// If the input matrix is nil a new matrix is allocated, otherwise the result
// is stored in-place into the input.
func (st *StudentsT) CovarianceMatrix(s *mat64.SymDense) *mat64.SymDense {
if s == nil {
s = mat64.NewSymDense(st.dim, nil)
}
sn := s.Symmetric()
if sn != st.dim {
panic("normal: input matrix size mismatch")
}
s.CopySym(&st.sigma)
s.ScaleSym(st.nu/(st.nu-2), s)
return s
}
// Dim returns the dimension of the distribution.
func (s *StudentsT) Dim() int {
return s.dim
}
// LogProb computes the log of the pdf of the point x.
func (s *StudentsT) LogProb(y []float64) float64 {
if len(y) != s.dim {
panic(badInputLength)
}
nu := s.nu
n := float64(s.dim)
lg1, _ := math.Lgamma((nu + n) / 2)
lg2, _ := math.Lgamma(nu / 2)
t1 := lg1 - lg2 - n/2*math.Log(nu*math.Pi) - s.logSqrtDet
shift := make([]float64, len(y))
copy(shift, y)
floats.Sub(shift, s.mu)
x := mat64.NewVector(s.dim, shift)
var tmp mat64.Vector
tmp.SolveCholeskyVec(&s.chol, x)
dot := mat64.Dot(&tmp, x)
return t1 - ((nu+n)/2)*math.Log(1+dot/nu)
}
// MarginalStudentsT returns the marginal distribution of the given input variables,
// and the success of the operation.
// That is, MarginalStudentsT returns
// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o
// where x_i are the dimensions in the input, and x_o are the remaining dimensions.
// See https://en.wikipedia.org/wiki/Marginal_distribution for more information.
//
// The input src is passed to the created StudentsT.
//
// ok indicates whether there was a failure during the marginalization. If ok is false
// the operation failed and dist is not usable.
// Mathematically this is impossible, but can occur with finite precision arithmetic.
func (s *StudentsT) MarginalStudentsT(vars []int, src *rand.Rand) (dist *StudentsT, ok bool) {
newMean := make([]float64, len(vars))
for i, v := range vars {
newMean[i] = s.mu[v]
}
var newSigma mat64.SymDense
newSigma.SubsetSym(&s.sigma, vars)
return NewStudentsT(newMean, &newSigma, s.nu, src)
}
// MarginalStudentsT returns the marginal distribution of the given input variable.
// That is, MarginalStudentsT returns
// p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o
// where i is the input index, and x_o are the remaining dimensions.
// See https://en.wikipedia.org/wiki/Marginal_distribution for more information.
//
// The input src is passed to the call to NewStudentsT.
func (s *StudentsT) MarginalStudentsTSingle(i int, src *rand.Rand) distuv.StudentsT {
return distuv.StudentsT{
Mu: s.mu[i],
Sigma: math.Sqrt(s.sigma.At(i, i)),
Nu: s.nu,
Src: src,
}
}
// TODO(btracey): Implement marginal single. Need to modify univariate StudentsT
// to be three-parameter.
// Mean returns the mean of the probability distribution at x. If the
// input argument is nil, a new slice will be allocated, otherwise the result
// will be put in-place into the receiver.
func (s *StudentsT) Mean(x []float64) []float64 {
x = reuseAs(x, s.dim)
copy(x, s.mu)
return x
}
// Prob computes the value of the probability density function at x.
func (s *StudentsT) Prob(y []float64) float64 {
return math.Exp(s.LogProb(y))
}
// Rand generates a random number according to the distributon.
// If the input slice is nil, new memory is allocated, otherwise the result is stored
// in place.
func (s *StudentsT) Rand(x []float64) []float64 {
// If Y is distributed according to N(0,Sigma), and U is chi^2 with
// parameter ν, then
// X = mu + Y * sqrt(nu / U)
// X is distributed according to this distribution.
// Generate Y.
x = reuseAs(x, s.dim)
tmp := make([]float64, s.dim)
if s.src == nil {
for i := range x {
tmp[i] = rand.NormFloat64()
}
} else {
for i := range x {
tmp[i] = s.src.NormFloat64()
}
}
xVec := mat64.NewVector(s.dim, x)
tmpVec := mat64.NewVector(s.dim, tmp)
xVec.MulVec(&s.lower, tmpVec)
u := distuv.ChiSquared{K: s.nu, Src: s.src}.Rand()
floats.Scale(math.Sqrt(s.nu/u), x)
floats.Add(x, s.mu)
return x
}

View File

@@ -0,0 +1,262 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
func TestStudentTProbs(t *testing.T) {
src := rand.New(rand.NewSource(1))
for _, test := range []struct {
nu float64
mu []float64
sigma *mat64.SymDense
x [][]float64
probs []float64
}{
{
nu: 3,
mu: []float64{0, 0},
sigma: mat64.NewSymDense(2, []float64{1, 0, 0, 1}),
x: [][]float64{
{0, 0},
{1, -1},
{3, 4},
{-1, -2},
},
// Outputs compared with WolframAlpha.
probs: []float64{
0.159154943091895335768883,
0.0443811199724279860006777747927,
0.0005980371870904696541052658,
0.01370560783418571283428283,
},
},
{
nu: 4,
mu: []float64{2, -3},
sigma: mat64.NewSymDense(2, []float64{8, -1, -1, 5}),
x: [][]float64{
{0, 0},
{1, -1},
{3, 4},
{-1, -2},
{2, -3},
},
// Outputs compared with WolframAlpha.
probs: []float64{
0.007360810111491788657953608191001,
0.0143309905845607117740440592999,
0.0005307774290578041397794096037035009801668903,
0.0115657422475668739943625904793879,
0.0254851872062589062995305736215,
},
},
} {
s, ok := NewStudentsT(test.mu, test.sigma, test.nu, src)
if !ok {
t.Fatal("bad test")
}
for i, x := range test.x {
xcpy := make([]float64, len(x))
copy(xcpy, x)
p := s.Prob(x)
if !floats.Same(x, xcpy) {
t.Errorf("X modified during call to prob, %v, %v", x, xcpy)
}
if !floats.EqualWithinAbsOrRel(p, test.probs[i], 1e-10, 1e-10) {
t.Errorf("Probability mismatch. X = %v. Got %v, want %v.", x, p, test.probs[i])
}
}
}
}
func TestStudentsTRand(t *testing.T) {
src := rand.New(rand.NewSource(1))
for _, test := range []struct {
mean []float64
cov *mat64.SymDense
nu float64
tolcov float64
}{
{
mean: []float64{0, 0},
cov: mat64.NewSymDense(2, []float64{1, 0, 0, 1}),
nu: 3,
tolcov: 1e-2,
},
{
mean: []float64{3, 4},
cov: mat64.NewSymDense(2, []float64{5, 1.2, 1.2, 6}),
nu: 8,
tolcov: 1e-2,
},
{
mean: []float64{3, 4, -2},
cov: mat64.NewSymDense(3, []float64{5, 1.2, -0.8, 1.2, 6, 0.4, -0.8, 0.4, 2}),
nu: 8,
tolcov: 1e-2,
},
} {
s, ok := NewStudentsT(test.mean, test.cov, test.nu, src)
if !ok {
t.Fatal("bad test")
}
nSamples := 10000000
dim := len(test.mean)
samps := mat64.NewDense(nSamples, dim, nil)
for i := 0; i < nSamples; i++ {
s.Rand(samps.RawRowView(i))
}
estMean := make([]float64, dim)
for i := range estMean {
estMean[i] = stat.Mean(mat64.Col(nil, i, samps), nil)
}
mean := s.Mean(nil)
if !floats.EqualApprox(estMean, mean, 1e-2) {
t.Errorf("Mean mismatch: want: %v, got %v", test.mean, estMean)
}
cov := s.CovarianceMatrix(nil)
estCov := stat.CovarianceMatrix(nil, samps, nil)
if !mat64.EqualApprox(estCov, cov, test.tolcov) {
t.Errorf("Cov mismatch: want: %v, got %v", cov, estCov)
}
}
}
func TestStudentsTConditional(t *testing.T) {
src := rand.New(rand.NewSource(1))
for _, test := range []struct {
mean []float64
cov *mat64.SymDense
nu float64
idx []int
value []float64
tolcov float64
}{
{
mean: []float64{3, 4, -2},
cov: mat64.NewSymDense(3, []float64{5, 1.2, -0.8, 1.2, 6, 0.4, -0.8, 0.4, 2}),
nu: 8,
idx: []int{0},
value: []float64{6},
tolcov: 1e-2,
},
} {
s, ok := NewStudentsT(test.mean, test.cov, test.nu, src)
if !ok {
t.Fatal("bad test")
}
sUp, ok := s.ConditionStudentsT(test.idx, test.value, src)
// Compute the other values by hand the inefficient way to compare
newNu := test.nu + float64(len(test.idx))
if newNu != sUp.nu {
t.Errorf("Updated nu mismatch. Got %v, want %v", s.nu, newNu)
}
dim := len(test.mean)
unob := findUnob(test.idx, dim)
ob := test.idx
muUnob := make([]float64, len(unob))
for i, v := range unob {
muUnob[i] = test.mean[v]
}
muOb := make([]float64, len(ob))
for i, v := range ob {
muOb[i] = test.mean[v]
}
var sig11, sig22 mat64.SymDense
sig11.SubsetSym(&s.sigma, unob)
sig22.SubsetSym(&s.sigma, ob)
sig12 := mat64.NewDense(len(unob), len(ob), nil)
for i := range unob {
for j := range ob {
sig12.Set(i, j, s.sigma.At(unob[i], ob[j]))
}
}
shift := make([]float64, len(ob))
copy(shift, test.value)
floats.Sub(shift, muOb)
newMu := make([]float64, len(muUnob))
newMuVec := mat64.NewVector(len(muUnob), newMu)
shiftVec := mat64.NewVector(len(shift), shift)
var tmp mat64.Vector
tmp.SolveVec(&sig22, shiftVec)
newMuVec.MulVec(sig12, &tmp)
floats.Add(newMu, muUnob)
if !floats.EqualApprox(newMu, sUp.mu, 1e-10) {
t.Errorf("Mu mismatch. Got %v, want %v", sUp.mu, newMu)
}
var tmp2 mat64.Dense
tmp2.Solve(&sig22, sig12.T())
var tmp3 mat64.Dense
tmp3.Mul(sig12, &tmp2)
tmp3.Sub(&sig11, &tmp3)
dot := mat64.Dot(shiftVec, &tmp)
tmp3.Scale((test.nu+dot)/(test.nu+float64(len(ob))), &tmp3)
if !mat64.EqualApprox(&tmp3, &sUp.sigma, 1e-10) {
t.Errorf("Sigma mismatch")
}
}
}
func TestStudentsTMarginalSingle(t *testing.T) {
for _, test := range []struct {
mu []float64
sigma *mat64.SymDense
nu float64
}{
{
mu: []float64{2, 3, 4},
sigma: mat64.NewSymDense(3, []float64{2, 0.5, 3, 0.5, 1, 0.6, 3, 0.6, 10}),
nu: 5,
},
{
mu: []float64{2, 3, 4, 5},
sigma: mat64.NewSymDense(4, []float64{2, 0.5, 3, 0.1, 0.5, 1, 0.6, 0.2, 3, 0.6, 10, 0.3, 0.1, 0.2, 0.3, 3}),
nu: 6,
},
} {
studentst, ok := NewStudentsT(test.mu, test.sigma, test.nu, nil)
if !ok {
t.Fatalf("Bad test, covariance matrix not positive definite")
}
for i, mean := range test.mu {
st := studentst.MarginalStudentsTSingle(i, nil)
if st.Mean() != mean {
t.Errorf("Mean mismatch nil Sigma, idx %v: want %v, got %v.", i, mean, st.Mean())
}
std := math.Sqrt(test.sigma.At(i, i))
if math.Abs(st.Sigma-std) > 1e-14 {
t.Errorf("StdDev mismatch nil Sigma, idx %v: want %v, got %v.", i, std, st.StdDev())
}
if st.Nu != test.nu {
t.Errorf("Nu mismatch nil Sigma, idx %v: want %v, got %v ", i, test.nu, st.Nu)
}
}
}
}

196
stat/distmv/uniform.go Normal file
View File

@@ -0,0 +1,196 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"math/rand"
)
type Bound struct {
Min float64
Max float64
}
// Uniform represents a multivariate uniform distribution.
type Uniform struct {
bounds []Bound
dim int
src *rand.Rand
}
// NewUniform creates a new uniform distribution with the given bounds.
func NewUniform(bnds []Bound, src *rand.Rand) *Uniform {
dim := len(bnds)
if dim == 0 {
panic(badZeroDimension)
}
for _, b := range bnds {
if b.Max < b.Min {
panic("uniform: maximum less than minimum")
}
}
u := &Uniform{
bounds: make([]Bound, dim),
dim: dim,
src: src,
}
for i, b := range bnds {
u.bounds[i].Min = b.Min
u.bounds[i].Max = b.Max
}
return u
}
// NewUnitUniform creates a new Uniform distribution over the dim-dimensional
// unit hypercube. That is, a uniform distribution where each dimension has
// Min = 0 and Max = 1.
func NewUnitUniform(dim int, src *rand.Rand) *Uniform {
if dim <= 0 {
panic(nonPosDimension)
}
bounds := make([]Bound, dim)
for i := range bounds {
bounds[i].Min = 0
bounds[i].Max = 1
}
return &Uniform{
bounds: bounds,
dim: dim,
src: src,
}
}
// Bounds returns the bounds on the variables of the distribution. If the input
// is nil, a new slice is allocated and returned. If the input is non-nil, then
// the bounds are stored in-place into the input argument, and Bounds will panic
// if len(bounds) != u.Dim().
func (u *Uniform) Bounds(bounds []Bound) []Bound {
if bounds == nil {
bounds = make([]Bound, u.Dim())
}
if len(bounds) != u.Dim() {
panic(badInputLength)
}
copy(bounds, u.bounds)
return bounds
}
// CDF returns the multidimensional cumulative distribution function of the
// probability distribution at the point x. If p is non-nil, the CDF is stored
// in-place into the first argument, otherwise a new slice is allocated and
// returned.
//
// CDF will panic if len(x) is not equal to the dimension of the distribution,
// or if p is non-nil and len(p) is not equal to the dimension of the distribution.
func (u *Uniform) CDF(p, x []float64) []float64 {
if len(x) != u.dim {
panic(badSizeMismatch)
}
if p == nil {
p = make([]float64, u.dim)
}
if len(p) != u.dim {
panic(badSizeMismatch)
}
for i, v := range x {
if v < u.bounds[i].Min {
p[i] = 0
} else if v > u.bounds[i].Max {
p[i] = 1
} else {
p[i] = (v - u.bounds[i].Min) / (u.bounds[i].Max - u.bounds[i].Min)
}
}
return p
}
// Dim returns the dimension of the distribution.
func (u *Uniform) Dim() int {
return u.dim
}
// Entropy returns the differential entropy of the distribution.
func (u *Uniform) Entropy() float64 {
// Entropy is log of the volume.
var logVol float64
for _, b := range u.bounds {
logVol += math.Log(b.Max - b.Min)
}
return logVol
}
// LogProb computes the log of the pdf of the point x.
func (u *Uniform) LogProb(x []float64) float64 {
dim := u.dim
if len(x) != dim {
panic(badSizeMismatch)
}
var logprob float64
for i, b := range u.bounds {
if x[i] < b.Min || x[i] > b.Max {
return math.Inf(-1)
}
logprob -= math.Log(b.Max - b.Min)
}
return logprob
}
// Mean returns the mean of the probability distribution at x. If the
// input argument is nil, a new slice will be allocated, otherwise the result
// will be put in-place into the receiver.
func (u *Uniform) Mean(x []float64) []float64 {
x = reuseAs(x, u.dim)
for i, b := range u.bounds {
x[i] = (b.Max + b.Min) / 2
}
return x
}
// Prob computes the value of the probability density function at x.
func (u *Uniform) Prob(x []float64) float64 {
return math.Exp(u.LogProb(x))
}
// Rand generates a random number according to the distributon.
// If the input slice is nil, new memory is allocated, otherwise the result is stored
// in place.
func (u *Uniform) Rand(x []float64) []float64 {
x = reuseAs(x, u.dim)
if u.src == nil {
for i, b := range u.bounds {
x[i] = rand.Float64()*(b.Max-b.Min) + b.Min
}
return x
}
for i, b := range u.bounds {
x[i] = rand.Float64()*(b.Max-b.Min) + b.Min
}
return x
}
// Quantile returns the multi-dimensional inverse cumulative distribution function.
// len(x) must equal len(p), and if x is non-nil, len(x) must also equal len(p).
// If x is nil, a new slice will be allocated and returned, otherwise the quantile
// will be stored in-place into x. All of the values of p must be between 0 and 1,
// or Quantile will panic.
func (u *Uniform) Quantile(x, p []float64) []float64 {
if len(p) != u.dim {
panic(badSizeMismatch)
}
if x == nil {
x = make([]float64, u.dim)
}
if len(x) != u.dim {
panic(badSizeMismatch)
}
for i, v := range p {
if v < 0 || v > 1 {
panic(badQuantile)
}
x[i] = v*(u.bounds[i].Max-u.bounds[i].Min) + u.bounds[i].Min
}
return x
}

View File

@@ -0,0 +1,31 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distmv
import (
"math"
"testing"
)
func TestUniformEntropy(t *testing.T) {
for _, test := range []struct {
Uniform *Uniform
Entropy float64
}{
{
NewUniform([]Bound{{0, 1}, {0, 1}}, nil),
0,
},
{
NewUniform([]Bound{{-1, 3}, {2, 8}, {-5, -3}}, nil),
math.Log(48),
},
} {
ent := test.Uniform.Entropy()
if math.Abs(ent-test.Entropy) > 1e-14 {
t.Errorf("Entropy mismatch. Got %v, want %v", ent, test.Entropy)
}
}
}

131
stat/distuv/bernoulli.go Normal file
View File

@@ -0,0 +1,131 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
)
// Bernoulli represents a random variable whose value is 1 with probability p and
// value of zero with probability 1-P. The value of P must be between 0 and 1.
// More information at https://en.wikipedia.org/wiki/Bernoulli_distribution.
type Bernoulli struct {
P float64
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (b Bernoulli) CDF(x float64) float64 {
if x < 0 {
return 0
}
if x < 1 {
return 1 - b.P
}
return 1
}
// Entropy returns the entropy of the distribution.
func (b Bernoulli) Entropy() float64 {
if b.P == 0 {
return 0
}
if b.P == 1 {
return 1
}
q := 1 - b.P
return -b.P*math.Log(b.P) - q*math.Log(q)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (b Bernoulli) ExKurtosis() float64 {
pq := b.P * (1 - b.P)
return (1 - 6*pq) / pq
}
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (b Bernoulli) LogProb(x float64) float64 {
if x == 0 {
return math.Log(1 - b.P)
}
if x == 1 {
return math.Log(b.P)
}
return math.Inf(-1)
}
// Mean returns the mean of the probability distribution.
func (b Bernoulli) Mean() float64 {
return b.P
}
// Median returns the median of the probability distribution.
func (b Bernoulli) Median() float64 {
p := b.P
switch {
case p < 0.5:
return 0
case p > 0.5:
return 1
default:
return 0.5
}
}
// NumParameters returns the number of parameters in the distribution.
func (Bernoulli) NumParameters() int {
return 1
}
// Prob computes the value of the probability distribution at x.
func (b Bernoulli) Prob(x float64) float64 {
return math.Exp(b.LogProb(x))
}
// Quantile returns the inverse of the cumulative probability distribution.
func (b Bernoulli) Quantile(p float64) float64 {
if p < 0 || 1 < p {
panic(badPercentile)
}
if p < 1-b.P {
return 0
}
return 1
}
// Rand returns a random sample drawn from the distribution.
func (b Bernoulli) Rand() float64 {
var rnd float64
if b.Source == nil {
rnd = rand.Float64()
} else {
rnd = b.Source.Float64()
}
if rnd < b.P {
return 1
}
return 0
}
// Skewness returns the skewness of the distribution.
func (b Bernoulli) Skewness() float64 {
return (1 - 2*b.P) / math.Sqrt(b.P*(1-b.P))
}
// StdDev returns the standard deviation of the probability distribution.
func (b Bernoulli) StdDev() float64 {
return math.Sqrt(b.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (b Bernoulli) Survival(x float64) float64 {
return 1 - b.CDF(x)
}
// Variance returns the variance of the probability distribution.
func (b Bernoulli) Variance() float64 {
return b.P * (1 - b.P)
}

View File

@@ -0,0 +1,23 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import "testing"
func TestBernoulli(t *testing.T) {
for i, dist := range []Bernoulli{
{
P: 0.5,
},
{
P: 0.9,
},
{
P: 0.2,
},
} {
testFullDist(t, dist, i, false)
}
}

126
stat/distuv/beta.go Normal file
View File

@@ -0,0 +1,126 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/mathext"
)
// Beta implements the Beta distribution, a two-parameter continuous distribution
// with support between 0 and 1.
//
// The beta distribution has density function
// x^(α-1) * (1-x)^(β-1) * Γ(α+β) / (Γ(α)*Γ(β))
//
// For more information, see https://en.wikipedia.org/wiki/Beta_distribution
type Beta struct {
// Alpha is the left shape parameter of the distribution. Alpha must be greater
// than 0.
Alpha float64
// Beta is the right shape parameter of the distribution. Beta must be greater
// than 0.
Beta float64
Source *rand.Rand
}
// CDF computes the value of the cumulative distribution function at x.
func (b Beta) CDF(x float64) float64 {
if x <= 0 {
return 0
}
if x >= 1 {
return 1
}
return mathext.RegIncBeta(b.Alpha, b.Beta, x)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (b Beta) ExKurtosis() float64 {
num := 6 * ((b.Alpha-b.Beta)*(b.Alpha-b.Beta)*(b.Alpha+b.Beta+1) - b.Alpha*b.Beta*(b.Alpha+b.Beta+2))
den := b.Alpha * b.Beta * (b.Alpha + b.Beta + 2) * (b.Alpha + b.Beta + 3)
return num / den
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (b Beta) LogProb(x float64) float64 {
if x < 0 || x > 1 {
return math.Inf(-1)
}
if b.Alpha <= 0 || b.Beta <= 0 {
panic("beta: negative parameters")
}
lab, _ := math.Lgamma(b.Alpha + b.Beta)
la, _ := math.Lgamma(b.Alpha)
lb, _ := math.Lgamma(b.Beta)
return lab - la - lb + (b.Alpha-1)*math.Log(x) + (b.Beta-1)*math.Log(1-x)
}
// Mean returns the mean of the probability distribution.
func (b Beta) Mean() float64 {
return b.Alpha / (b.Alpha + b.Beta)
}
// Mode returns the mode of the distribution.
//
// Mode returns NaN if either parameter is less than or equal to 1 as a special case.
func (b Beta) Mode() float64 {
if b.Alpha <= 1 || b.Beta <= 1 {
return math.NaN()
}
return (b.Alpha - 1) / (b.Alpha + b.Beta - 2)
}
// NumParameters returns the number of parameters in the distribution.
func (b Beta) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (b Beta) Prob(x float64) float64 {
return math.Exp(b.LogProb(x))
}
// Quantile returns the inverse of the cumulative distribution function.
func (b Beta) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return mathext.InvRegIncBeta(b.Alpha, b.Beta, p)
}
// Rand returns a random sample drawn from the distribution.
func (b Beta) Rand() float64 {
ga := Gamma{Alpha: b.Alpha, Beta: 1, Source: b.Source}.Rand()
gb := Gamma{Alpha: b.Beta, Beta: 1, Source: b.Source}.Rand()
return ga / (ga + gb)
}
// StdDev returns the standard deviation of the probability distribution.
func (b Beta) StdDev() float64 {
return math.Sqrt(b.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (b Beta) Survival(x float64) float64 {
switch {
case x <= 0:
return 1
case x >= 1:
return 0
}
return mathext.RegIncBeta(b.Beta, b.Alpha, 1-x)
}
// Variance returns the variance of the probability distribution.
func (b Beta) Variance() float64 {
return b.Alpha * b.Beta / ((b.Alpha + b.Beta) * (b.Alpha + b.Beta) * (b.Alpha + b.Beta + 1))
}

61
stat/distuv/beta_test.go Normal file
View File

@@ -0,0 +1,61 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"sort"
"testing"
"github.com/gonum/floats"
)
func TestBetaProb(t *testing.T) {
// Values a comparison with scipy
for _, test := range []struct {
x, alpha, beta, want float64
}{
{0.1, 2, 0.5, 0.079056941504209499},
{0.5, 1, 5.1, 0.29740426605235754},
{0.1, 0.5, 0.5, 1.0610329539459691},
{1, 0.5, 0.5, math.Inf(1)},
{-1, 0.5, 0.5, 0},
} {
pdf := Beta{Alpha: test.alpha, Beta: test.beta}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Pdf mismatch. Got %v, want %v", pdf, test.want)
}
}
}
func TestBetaRand(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, b := range []Beta{
{Alpha: 0.5, Beta: 0.5, Source: src},
{Alpha: 5, Beta: 1, Source: src},
{Alpha: 2, Beta: 2, Source: src},
{Alpha: 2, Beta: 5, Source: src},
} {
testBeta(t, b, i)
}
}
func testBeta(t *testing.T, b Beta, i int) {
tol := 1e-2
const n = 1e6
const bins = 10
x := make([]float64, n)
generateSamples(x, b)
sort.Float64s(x)
testRandLogProbContinuous(t, i, 0, x, b, tol, bins)
checkMean(t, i, x, b, tol)
checkVarAndStd(t, i, x, b, tol)
checkExKurtosis(t, i, x, b, 5e-2)
checkProbContinuous(t, i, x, b, 1e-3)
checkQuantileCDFSurvival(t, i, x, b, tol)
checkProbQuantContinuous(t, i, x, b, tol)
}

184
stat/distuv/categorical.go Normal file
View File

@@ -0,0 +1,184 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
)
// Categorical is an extension of the Bernouilli distribution where x takes
// values {0, 1, ..., len(w)-1} where w is the weight vector. Categorical must
// be initialized with NewCategorical.
type Categorical struct {
weights []float64
// heap is a weight heap.
//
// It keeps a heap-organised sum of remaining
// index weights that are available to be taken
// from.
//
// Each element holds the sum of weights for
// the corresponding index, plus the sum of
// of its children's weights; the children
// of an element i can be found at positions
// 2*(i+1)-1 and 2*(i+1). The root of the
// weight heap is at element 0.
//
// See comments in container/heap for an
// explanation of the layout of a heap.
heap []float64
src *rand.Rand
}
// NewCategorical constructs a new categorical distribution where the probability
// that x equals i is proportional to w[i]. All of the weights must be
// nonnegative, and at least one of the weights must be positive.
func NewCategorical(w []float64, src *rand.Rand) Categorical {
c := Categorical{
weights: make([]float64, len(w)),
heap: make([]float64, len(w)),
src: src,
}
c.ReweightAll(w)
return c
}
// CDF computes the value of the cumulative density function at x.
func (c Categorical) CDF(x float64) float64 {
var cdf float64
for i, w := range c.weights {
if x < float64(i) {
break
}
cdf += w
}
return cdf / c.heap[0]
}
// Entropy returns the entropy of the distribution.
func (c Categorical) Entropy() float64 {
var ent float64
for _, w := range c.weights {
if w == 0 {
continue
}
p := w / c.heap[0]
ent += p * math.Log(p)
}
return -ent
}
// Len returns the number of values x could possibly take (the length of the
// initial supplied weight vector).
func (c Categorical) Len() int {
return len(c.weights)
}
// Mean returns the mean of the probability distribution.
func (c Categorical) Mean() float64 {
var mean float64
for i, v := range c.weights {
mean += float64(i) * v
}
return mean / c.heap[0]
}
// Prob computes the value of the probability density function at x.
func (c Categorical) Prob(x float64) float64 {
xi := int(x)
if float64(xi) != x {
return 0
}
if xi < 0 || xi > len(c.weights)-1 {
return 0
}
return c.weights[xi] / c.heap[0]
}
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (c Categorical) LogProb(x float64) float64 {
return math.Log(c.Prob(x))
}
// Rand returns a random draw from the categorical distribution.
func (c Categorical) Rand() float64 {
var r float64
if c.src == nil {
r = c.heap[0] * rand.Float64()
} else {
r = c.heap[0] * c.src.Float64()
}
i := 1
last := -1
left := len(c.weights)
for {
if r -= c.weights[i-1]; r <= 0 {
break // Fall within item i-1.
}
i <<= 1 // Move to left child.
if d := c.heap[i-1]; r > d {
r -= d
// If enough r to pass left child,
// move to right child state will
// be caught at break above.
i++
}
if i == last || left < 0 {
panic("categorical: bad sample")
}
last = i
left--
}
return float64(i - 1)
}
// Reweight sets the weight of item idx to w. The input weight must be
// non-negative, and after reweighting at least one of the weights must be
// positive.
func (c Categorical) Reweight(idx int, w float64) {
if w < 0 {
panic("categorical: negative weight")
}
w, c.weights[idx] = c.weights[idx]-w, w
idx++
for idx > 0 {
c.heap[idx-1] -= w
idx >>= 1
}
if c.heap[0] <= 0 {
panic("categorical: sum of the weights non-positive")
}
}
// ReweightAll resets the weights of the distribution. ReweightAll panics if
// len(w) != c.Len. All of the weights must be nonnegative, and at least one of
// the weights must be positive.
func (c Categorical) ReweightAll(w []float64) {
if len(w) != c.Len() {
panic("categorical: length of the slices do not match")
}
for _, v := range w {
if v < 0 {
panic("categorical: negative weight")
}
}
copy(c.weights, w)
c.reset()
}
func (c Categorical) reset() {
copy(c.heap, c.weights)
for i := len(c.heap) - 1; i > 0; i-- {
// Sometimes 1-based counting makes sense.
c.heap[((i+1)>>1)-1] += c.heap[i]
}
// TODO(btracey): Renormalization for weird weights?
if c.heap[0] <= 0 {
panic("categorical: sum of the weights non-positive")
}
}

View File

@@ -0,0 +1,196 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
)
func TestCategoricalProb(t *testing.T) {
for _, test := range [][]float64{
{1, 2, 3, 0},
} {
dist := NewCategorical(test, nil)
norm := make([]float64, len(test))
floats.Scale(1/floats.Sum(norm), norm)
for i, v := range norm {
p := dist.Prob(float64(i))
if math.Abs(p-v) > 1e-14 {
t.Errorf("Probability mismatch element %d", i)
}
p = dist.Prob(float64(i) + 0.5)
if p != 0 {
t.Errorf("Non-zero probability for non-integer x")
}
}
p := dist.Prob(-1)
if p != 0 {
t.Errorf("Non-zero probability for -1")
}
p = dist.Prob(float64(len(test)))
if p != 0 {
t.Errorf("Non-zero probability for len(test)")
}
}
}
func TestCategoricalRand(t *testing.T) {
for _, test := range [][]float64{
{1, 2, 3, 0},
} {
dist := NewCategorical(test, nil)
nSamples := 2000000
counts := sampleCategorical(t, dist, nSamples)
probs := make([]float64, len(test))
for i := range probs {
probs[i] = dist.Prob(float64(i))
}
same := samedDistCategorical(dist, counts, probs, 1e-2)
if !same {
t.Errorf("Probability mismatch. Want %v, got %v", probs, counts)
}
dist.Reweight(len(test)-1, 10)
counts = sampleCategorical(t, dist, nSamples)
probs = make([]float64, len(test))
for i := range probs {
probs[i] = dist.Prob(float64(i))
}
same = samedDistCategorical(dist, counts, probs, 1e-2)
if !same {
t.Errorf("Probability mismatch after Reweight. Want %v, got %v", probs, counts)
}
w := make([]float64, len(test))
for i := range w {
w[i] = rand.Float64()
}
dist.ReweightAll(w)
counts = sampleCategorical(t, dist, nSamples)
probs = make([]float64, len(test))
for i := range probs {
probs[i] = dist.Prob(float64(i))
}
same = samedDistCategorical(dist, counts, probs, 1e-2)
if !same {
t.Errorf("Probability mismatch after ReweightAll. Want %v, got %v", probs, counts)
}
}
}
func sampleCategorical(t *testing.T, dist Categorical, nSamples int) []float64 {
counts := make([]float64, dist.Len())
for i := 0; i < nSamples; i++ {
v := dist.Rand()
if float64(int(v)) != v {
t.Fatalf("Random number is not an integer")
}
counts[int(v)]++
}
sum := floats.Sum(counts)
floats.Scale(1/sum, counts)
return counts
}
func samedDistCategorical(dist Categorical, counts, probs []float64, tol float64) bool {
same := true
for i, prob := range probs {
if prob == 0 && counts[i] != 0 {
same = false
break
}
if !floats.EqualWithinAbsOrRel(prob, counts[i], tol, tol) {
same = false
break
}
}
return same
}
func TestCategoricalCDF(t *testing.T) {
for _, test := range [][]float64{
{1, 2, 3, 0, 4},
} {
c := make([]float64, len(test))
copy(c, test)
floats.Scale(1/floats.Sum(c), c)
sum := make([]float64, len(test))
floats.CumSum(sum, c)
dist := NewCategorical(test, nil)
cdf := dist.CDF(-0.5)
if cdf != 0 {
t.Errorf("CDF of negative number not zero")
}
for i := range c {
cdf := dist.CDF(float64(i))
if math.Abs(cdf-sum[i]) > 1e-14 {
t.Errorf("CDF mismatch %v. Want %v, got %v.", float64(i), sum[i], cdf)
}
cdfp := dist.CDF(float64(i) + 0.5)
if cdfp != cdf {
t.Errorf("CDF mismatch for non-integer input")
}
}
}
}
func TestCategoricalEntropy(t *testing.T) {
for _, test := range []struct {
weights []float64
entropy float64
}{
{
weights: []float64{1, 1},
entropy: math.Ln2,
},
{
weights: []float64{1, 1, 1, 1},
entropy: math.Log(4),
},
{
weights: []float64{0, 0, 1, 1, 0, 0},
entropy: math.Ln2,
},
} {
dist := NewCategorical(test.weights, nil)
entropy := dist.Entropy()
if math.IsNaN(entropy) || math.Abs(entropy-test.entropy) > 1e-14 {
t.Errorf("Entropy mismatch. Want %v, got %v.", test.entropy, entropy)
}
}
}
func TestCategoricalMean(t *testing.T) {
for _, test := range []struct {
weights []float64
mean float64
}{
{
weights: []float64{10, 0, 0, 0},
mean: 0,
},
{
weights: []float64{0, 10, 0, 0},
mean: 1,
},
{
weights: []float64{1, 2, 3, 4},
mean: 2,
},
} {
dist := NewCategorical(test.weights, nil)
mean := dist.Mean()
if math.IsNaN(mean) || math.Abs(mean-test.mean) > 1e-14 {
t.Errorf("Entropy mismatch. Want %v, got %v.", test.mean, mean)
}
}
}

99
stat/distuv/chisquared.go Normal file
View File

@@ -0,0 +1,99 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/mathext"
)
// ChiSquared implements the χ² distribution, a one parameter distribution
// with support on the positive numbers.
//
// The density function is given by
// 1/(2^{k/2} * Γ(k/2)) * x^{k/2 - 1} * e^{-x/2}
// It is a special case of the Gamma distribution, Γ(k/2, 1/2).
//
// For more information, see https://en.wikipedia.org/wiki/Chi-squared_distribution.
type ChiSquared struct {
// K is the shape parameter, corresponding to the degrees of freedom. Must
// be greater than 0.
K float64
Src *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (c ChiSquared) CDF(x float64) float64 {
return mathext.GammaInc(c.K/2, x/2)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (c ChiSquared) ExKurtosis() float64 {
return 12 / c.K
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (c ChiSquared) LogProb(x float64) float64 {
if x < 0 {
return math.Inf(-1)
}
lg, _ := math.Lgamma(c.K / 2)
return (c.K/2-1)*math.Log(x) - x/2 - (c.K/2)*math.Ln2 - lg
}
// Mean returns the mean of the probability distribution.
func (c ChiSquared) Mean() float64 {
return c.K
}
// Mode returns the mode of the distribution.
func (c ChiSquared) Mode() float64 {
return math.Min(c.K-2, 0)
}
// NumParameters returns the number of parameters in the distribution.
func (c ChiSquared) NumParameters() int {
return 1
}
// Prob computes the value of the probability density function at x.
func (c ChiSquared) Prob(x float64) float64 {
return math.Exp(c.LogProb(x))
}
// Rand returns a random sample drawn from the distribution.
func (c ChiSquared) Rand() float64 {
return Gamma{c.K / 2, 0.5, c.Src}.Rand()
}
// Quantile returns the inverse of the cumulative distribution function.
func (c ChiSquared) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return mathext.GammaIncInv(0.5*c.K, p) * 2
}
// StdDev returns the standard deviation of the probability distribution.
func (c ChiSquared) StdDev() float64 {
return math.Sqrt(c.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (c ChiSquared) Survival(x float64) float64 {
if x < 0 {
return 1
}
return mathext.GammaIncComp(0.5*c.K, 0.5*x)
}
// Variance returns the variance of the probability distribution.
func (c ChiSquared) Variance() float64 {
return 2 * c.K
}

View File

@@ -0,0 +1,78 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math/rand"
"sort"
"testing"
"github.com/gonum/floats"
)
func TestChiSquaredProb(t *testing.T) {
for _, test := range []struct {
x, k, want float64
}{
{10, 3, 0.0085003666025203432},
{2.3, 3, 0.19157345407042367},
{0.8, 0.2, 0.080363259903912673},
} {
pdf := ChiSquared{test.k, nil}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Pdf mismatch, x = %v, K = %v. Got %v, want %v", test.x, test.k, pdf, test.want)
}
}
}
func TestChiSquaredCDF(t *testing.T) {
for _, test := range []struct {
x, k, want float64
}{
// Values calculated with scipy.stats.chi2.cdf
{0, 1, 0},
{0.01, 5, 5.3002700426865167e-07},
{0.05, 3, 0.002929332764619924},
{0.5, 2, 0.22119921692859512},
{0.95, 3, 0.1866520918701263},
{0.99, 5, 0.036631697220869196},
{1, 1, 0.68268949213708596},
{1.5, 4, 0.17335853270322427},
{10, 10, 0.55950671493478743},
{25, 15, 0.95005656637357172},
} {
cdf := ChiSquared{test.k, nil}.CDF(test.x)
if !floats.EqualWithinAbsOrRel(cdf, test.want, 1e-10, 1e-10) {
t.Errorf("CDF mismatch, x = %v, K = %v. Got %v, want %v", test.x, test.k, cdf, test.want)
}
}
}
func TestChiSquared(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, b := range []ChiSquared{
{3, src},
{1.5, src},
{0.9, src},
} {
testChiSquared(t, b, i)
}
}
func testChiSquared(t *testing.T, c ChiSquared, i int) {
tol := 1e-2
const n = 2e6
const bins = 50
x := make([]float64, n)
generateSamples(x, c)
sort.Float64s(x)
testRandLogProbContinuous(t, i, 0, x, c, tol, bins)
checkMean(t, i, x, c, tol)
checkVarAndStd(t, i, x, c, tol)
checkExKurtosis(t, i, x, c, 5e-2)
checkProbContinuous(t, i, x, c, 1e-3)
checkQuantileCDFSurvival(t, i, x, c, 1e-3)
}

24
stat/distuv/constants.go Normal file
View File

@@ -0,0 +1,24 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
const (
// oneOverRoot2Pi is the value of 1/(2Pi)^(1/2)
// http://www.wolframalpha.com/input/?i=1%2F%282+*+pi%29%5E%281%2F2%29
oneOverRoot2Pi = 0.39894228040143267793994605993438186847585863116493465766592582967065792589930183850125233390730693643030255886263518268
//LogRoot2Pi is the value of log(sqrt(2*Pi))
logRoot2Pi = 0.91893853320467274178032973640561763986139747363778341281715154048276569592726039769474329863595419762200564662463433744
negLogRoot2Pi = -logRoot2Pi
log2Pi = 1.8378770664093454835606594728112352797227949472755668
ln2 = 0.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641868754200148102057068573368552023
// EulerMascheroni constant.
eulerGamma = 0.5772156649015328606065120900824024310421593359399235988057672348848677267776646709369470632917467495146314472498070824809605
)
const (
panicNameMismatch = "parameter name mismatch"
)

View File

@@ -0,0 +1,293 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"sort"
"testing"
"github.com/gonum/floats"
"github.com/gonum/integrate/quad"
"github.com/gonum/stat"
)
type meaner interface {
Mean() float64
}
type quantiler interface {
Quantile(float64) float64
}
type medianer interface {
quantiler
Median() float64
}
type varStder interface {
StdDev() float64
Variance() float64
}
type entropyer interface {
LogProber
Entropy() float64
}
type exKurtosiser interface {
ExKurtosis() float64
Mean() float64
}
type skewnesser interface {
StdDev() float64
Mean() float64
Skewness() float64
}
type cumulanter interface {
Quantiler
CDF(x float64) float64
Survival(x float64) float64
}
func generateSamples(x []float64, r Rander) {
for i := range x {
x[i] = r.Rand()
}
}
type probLogprober interface {
Prob(x float64) float64
LogProb(x float64) float64
}
type cumulantProber interface {
cumulanter
probLogprober
}
func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) {
mean := stat.Mean(x, nil)
if !floats.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean())
}
}
func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) {
median := stat.Quantile(0.5, stat.Empirical, x, nil)
if !floats.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median())
}
}
func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) {
variance := stat.Variance(x, nil)
if !floats.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance())
}
std := math.Sqrt(variance)
if !floats.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev())
}
}
func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) {
tmp := make([]float64, len(x))
for i, v := range x {
tmp[i] = -e.LogProb(v)
}
entropy := stat.Mean(tmp, nil)
if !floats.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy())
}
}
func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) {
mean := e.Mean()
tmp := make([]float64, len(x))
for i, x := range x {
tmp[i] = math.Pow(x-mean, 4)
}
variance := stat.Variance(x, nil)
mu4 := stat.Mean(tmp, nil)
kurtosis := mu4/(variance*variance) - 3
if !floats.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis())
}
}
func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) {
mean := s.Mean()
std := s.StdDev()
tmp := make([]float64, len(x))
for i, v := range x {
tmp[i] = math.Pow(v-mean, 3)
}
mu3 := stat.Mean(tmp, nil)
skewness := mu3 / math.Pow(std, 3)
if !floats.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness())
}
}
func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) {
// Quantile, CDF, and survival check.
for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
x := c.Quantile(p)
cdf := c.CDF(x)
estCDF := stat.CDF(x, stat.Empirical, xs, nil)
if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf)
}
if !floats.EqualWithinAbsOrRel(cdf, p, tol, tol) {
t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf)
}
if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x))
}
}
}
func checkProbContinuous(t *testing.T, i int, x []float64, p probLogprober, tol float64) {
// Check that the PDF is consistent (integrates to 1).
q := quad.Fixed(p.Prob, math.Inf(-1), math.Inf(1), 1000000, nil, 0)
if math.Abs(q-1) > tol {
t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q)
}
// Check that PDF and LogPDF are consistent.
for i, v := range x {
if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v))
break
}
}
}
// checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
// checkProbContinuous only checks that Prob is a valid distribution (integrates
// to 1 and greater than 0). However, this is also true if the PDF of a different
// distribution is used. This checks that PDF is also consistent with the
// CDF implementation and the random samples.
func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) {
ps := make([]float64, 101)
floats.Span(ps, 0, 1)
var xp, x float64
for i, p := range ps {
x = c.Quantile(p)
if p == 0 {
xp = x
if floats.Min(xs) < x {
t.Errorf("Sample of x less than Quantile(0). Case %v.", i)
break
}
continue
}
if p == 1 {
if floats.Max(xs) > x {
t.Errorf("Sample of x greater than Quantile(1). Case %v.", i)
break
}
}
// The integral of the PDF between xp and x should be the difference in
// the quantiles.
q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
if math.Abs(q-(p-ps[i-1])) > 1e-5 {
t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q)
break
}
pEst := stat.CDF(x, stat.Empirical, xs, nil)
if math.Abs(pEst-p) > tol {
t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i)
}
xp = x
}
}
// checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) {
// Make a map of all of the unique samples.
m := make(map[float64]int)
for _, v := range xs {
m[v]++
}
for x, count := range m {
prob := float64(count) / float64(len(xs))
if math.Abs(prob-p.Prob(x)) > tol {
t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x))
}
if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x))
}
}
}
// dist is a type that implements the standard set of routines.
type fullDist interface {
CDF(x float64) float64
Entropy() float64
ExKurtosis() float64
LogProb(x float64) float64
Mean() float64
Median() float64
NumParameters() int
Prob(x float64) float64
Quantile(p float64) float64
Rand() float64
Skewness() float64
StdDev() float64
Survival(x float64) float64
Variance() float64
}
// testFullDist tests all of the functions of a fullDist.
func testFullDist(t *testing.T, f fullDist, i int, continuous bool) {
tol := 1e-2
const n = 1e6
x := make([]float64, n)
generateSamples(x, f)
sort.Float64s(x)
checkMean(t, i, x, f, tol)
checkVarAndStd(t, i, x, f, tol)
checkEntropy(t, i, x, f, tol)
checkExKurtosis(t, i, x, f, tol)
checkSkewness(t, i, x, f, tol)
if continuous {
// In a discrete distribution, the median may not have positive probability.
checkMedian(t, i, x, f, tol)
// In a discrete distribution, the CDF and Quantile may not be perfect mappings.
checkQuantileCDFSurvival(t, i, x, f, tol)
// Integrate over the PDF
checkProbContinuous(t, i, x, f, 1e-10)
checkProbQuantContinuous(t, i, x, f, tol)
} else {
// Check against empirical PDF.
checkProbDiscrete(t, i, x, f, tol)
}
}
// testRandLogProb tests that LogProb and Rand give consistent results. This
// can be used when the distribution does not implement CDF.
func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) {
for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
// Get the estimated CDF from the samples
pt := stat.Quantile(cdf, stat.Empirical, x, nil)
prob := func(x float64) float64 {
return math.Exp(f.LogProb(x))
}
// Integrate the PDF to find the CDF
estCDF := quad.Fixed(prob, min, pt, 1000, nil, 0)
if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF)
}
}
}

259
stat/distuv/exponential.go Normal file
View File

@@ -0,0 +1,259 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/floats"
"github.com/gonum/stat"
)
// Exponential represents the exponential distribution (https://en.wikipedia.org/wiki/Exponential_distribution).
type Exponential struct {
Rate float64
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (e Exponential) CDF(x float64) float64 {
if x < 0 {
return 0
}
return 1 - math.Exp(-e.Rate*x)
}
// ConjugateUpdate updates the parameters of the distribution from the sufficient
// statistics of a set of samples. The sufficient statistics, suffStat, have been
// observed with nSamples observations. The prior values of the distribution are those
// currently in the distribution, and have been observed with priorStrength samples.
//
// For the exponential distribution, the sufficient statistic is the inverse of
// the mean of the samples.
// The prior is having seen priorStrength[0] samples with inverse mean Exponential.Rate
// As a result of this function, Exponential.Rate is updated based on the weighted
// samples, and priorStrength is modified to include the new number of samples observed.
//
// This function panics if len(suffStat) != 1 or len(priorStrength) != 1.
func (e *Exponential) ConjugateUpdate(suffStat []float64, nSamples float64, priorStrength []float64) {
if len(suffStat) != 1 {
panic("exponential: incorrect suffStat length")
}
if len(priorStrength) != 1 {
panic("exponential: incorrect priorStrength length")
}
totalSamples := nSamples + priorStrength[0]
totalSum := nSamples / suffStat[0]
if !(priorStrength[0] == 0) {
totalSum += priorStrength[0] / e.Rate
}
e.Rate = totalSamples / totalSum
priorStrength[0] = totalSamples
}
// Entropy returns the entropy of the distribution.
func (e Exponential) Entropy() float64 {
return 1 - math.Log(e.Rate)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (Exponential) ExKurtosis() float64 {
return 6
}
// Fit sets the parameters of the probability distribution from the
// data samples x with relative weights w.
// If weights is nil, then all the weights are 1.
// If weights is not nil, then the len(weights) must equal len(samples).
func (e *Exponential) Fit(samples, weights []float64) {
suffStat := make([]float64, e.NumSuffStat())
nSamples := e.SuffStat(samples, weights, suffStat)
e.ConjugateUpdate(suffStat, nSamples, make([]float64, e.NumSuffStat()))
}
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (e Exponential) LogProb(x float64) float64 {
if x < 0 {
return math.Inf(-1)
}
return math.Log(e.Rate) - e.Rate*x
}
// Mean returns the mean of the probability distribution.
func (e Exponential) Mean() float64 {
return 1 / e.Rate
}
// Median returns the median of the probability distribution.
func (e Exponential) Median() float64 {
return math.Ln2 / e.Rate
}
// Mode returns the mode of the probability distribution.
func (Exponential) Mode() float64 {
return 0
}
// NumParameters returns the number of parameters in the distribution.
func (Exponential) NumParameters() int {
return 1
}
// NumSuffStat returns the number of sufficient statistics for the distribution.
func (Exponential) NumSuffStat() int {
return 1
}
// Prob computes the value of the probability density function at x.
func (e Exponential) Prob(x float64) float64 {
return math.Exp(e.LogProb(x))
}
// Quantile returns the inverse of the cumulative probability distribution.
func (e Exponential) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return -math.Log(1-p) / e.Rate
}
// Rand returns a random sample drawn from the distribution.
func (e Exponential) Rand() float64 {
var rnd float64
if e.Source == nil {
rnd = rand.ExpFloat64()
} else {
rnd = e.Source.ExpFloat64()
}
return rnd / e.Rate
}
// Score returns the score function with respect to the parameters of the
// distribution at the input location x. The score function is the derivative
// of the log-likelihood at x with respect to the parameters
// (∂/∂θ) log(p(x;θ))
// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise
// Score will panic, and the derivative is stored in-place into deriv. If deriv
// is nil a new slice will be allocated and returned.
//
// The order is [∂LogProb / ∂Rate].
//
// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29.
//
// Special cases:
// Score(0) = [NaN]
func (e Exponential) Score(deriv []float64, x float64) []float64 {
if deriv == nil {
deriv = make([]float64, e.NumParameters())
}
if len(deriv) != e.NumParameters() {
panic(badLength)
}
if x > 0 {
deriv[0] = 1/e.Rate - x
return deriv
}
if x < 0 {
deriv[0] = 0
return deriv
}
deriv[0] = math.NaN()
return deriv
}
// ScoreInput returns the score function with respect to the input of the
// distribution at the input location specified by x. The score function is the
// derivative of the log-likelihood
// (d/dx) log(p(x)) .
// Special cases:
// ScoreInput(0) = NaN
func (e Exponential) ScoreInput(x float64) float64 {
if x > 0 {
return -e.Rate
}
if x < 0 {
return 0
}
return math.NaN()
}
// Skewness returns the skewness of the distribution.
func (Exponential) Skewness() float64 {
return 2
}
// StdDev returns the standard deviation of the probability distribution.
func (e Exponential) StdDev() float64 {
return 1 / e.Rate
}
// SuffStat computes the sufficient statistics of set of samples to update
// the distribution. The sufficient statistics are stored in place, and the
// effective number of samples are returned.
//
// The exponential distribution has one sufficient statistic, the average rate
// of the samples.
//
// If weights is nil, the weights are assumed to be 1, otherwise panics if
// len(samples) != len(weights). Panics if len(suffStat) != NumSuffStat().
func (Exponential) SuffStat(samples, weights, suffStat []float64) (nSamples float64) {
if len(weights) != 0 && len(samples) != len(weights) {
panic(badLength)
}
if len(suffStat) != (Exponential{}).NumSuffStat() {
panic(badSuffStat)
}
if len(weights) == 0 {
nSamples = float64(len(samples))
} else {
nSamples = floats.Sum(weights)
}
mean := stat.Mean(samples, weights)
suffStat[0] = 1 / mean
return nSamples
}
// Survival returns the survival function (complementary CDF) at x.
func (e Exponential) Survival(x float64) float64 {
if x < 0 {
return 1
}
return math.Exp(-e.Rate * x)
}
// setParameters modifies the parameters of the distribution.
func (e *Exponential) setParameters(p []Parameter) {
if len(p) != e.NumParameters() {
panic("exponential: incorrect number of parameters to set")
}
if p[0].Name != "Rate" {
panic("exponential: " + panicNameMismatch)
}
e.Rate = p[0].Value
}
// Variance returns the variance of the probability distribution.
func (e Exponential) Variance() float64 {
return 1 / (e.Rate * e.Rate)
}
// parameters returns the parameters of the distribution.
func (e Exponential) parameters(p []Parameter) []Parameter {
nParam := e.NumParameters()
if p == nil {
p = make([]Parameter, nParam)
} else if len(p) != nParam {
panic("exponential: improper parameter length")
}
p[0].Name = "Rate"
p[0].Value = e.Rate
return p
}

View File

@@ -0,0 +1,71 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"testing"
)
func TestExponentialProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: 1,
cumProb: 0,
logProb: 0,
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: math.Inf(-1),
},
{
loc: 1,
prob: 1 / (math.E),
cumProb: 0.6321205588285576784044762298385391325541888689682321654921631983025385042551001966428527256540803563,
logProb: -1,
},
{
loc: 20,
prob: math.Exp(-20),
cumProb: 0.999999997938846377561442172034059619844179023624192724400896307027755338370835976215440646720089072,
logProb: -20,
},
}
testDistributionProbs(t, Exponential{Rate: 1}, "Exponential", pts)
}
func TestExponentialFitPrior(t *testing.T) {
testConjugateUpdate(t, func() ConjugateUpdater { return &Exponential{Rate: 13.7} })
}
func TestExponentialScore(t *testing.T) {
for _, test := range []*Exponential{
{
Rate: 1,
},
{
Rate: 0.35,
},
{
Rate: 4.6,
},
} {
testDerivParam(t, test)
}
}
func TestExponentialFitPanic(t *testing.T) {
e := Exponential{Rate: 2}
defer func() {
r := recover()
if r != nil {
t.Errorf("unexpected panic for Fit call: %v", r)
}
}()
e.Fit(make([]float64, 10), nil)
}

132
stat/distuv/f.go Normal file
View File

@@ -0,0 +1,132 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/mathext"
)
// F implements the F-distribution, a two-parameter continuous distribution
// with support over the positive real numbers.
//
// The F-distribution has density function
// sqrt(((d1*x)^d1) * d2^d2 / ((d1*x+d2)^(d1+d2))) / (x * B(d1/2,d2/2))
// where B is the beta function.
//
// For more information, see https://en.wikipedia.org/wiki/F-distribution
type F struct {
D1 float64 // Degrees of freedom for the numerator
D2 float64 // Degrees of freedom for the denominator
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (f F) CDF(x float64) float64 {
return mathext.RegIncBeta(f.D1/2, f.D2/2, f.D1*x/(f.D1*x+f.D2))
}
// ExKurtosis returns the excess kurtosis of the distribution.
//
// ExKurtosis returns NaN if the D2 parameter is less or equal to 8.
func (f F) ExKurtosis() float64 {
if f.D2 <= 8 {
return math.NaN()
}
return (12 / (f.D2 - 6)) * ((5*f.D2-22)/(f.D2-8) + ((f.D2-4)/f.D1)*((f.D2-2)/(f.D2-8))*((f.D2-2)/(f.D1+f.D2-2)))
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (f F) LogProb(x float64) float64 {
return 0.5*(f.D1*math.Log(f.D1*x)+f.D2*math.Log(f.D2)-(f.D1+f.D2)*math.Log(f.D1*x+f.D2)) - math.Log(x) - mathext.Lbeta(f.D1/2, f.D2/2)
}
// Mean returns the mean of the probability distribution.
//
// Mean returns NaN if the D2 parameter is less than or equal to 2.
func (f F) Mean() float64 {
if f.D2 <= 2 {
return math.NaN()
}
return f.D2 / (f.D2 - 2)
}
// Mode returns the mode of the distribution.
//
// Mode returns NaN if the D1 parameter is less than or equal to 2.
func (f F) Mode() float64 {
if f.D1 <= 2 {
return math.NaN()
}
return ((f.D1 - 2) / f.D1) * (f.D2 / (f.D2 + 2))
}
// NumParameters returns the number of parameters in the distribution.
func (f F) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (f F) Prob(x float64) float64 {
return math.Exp(f.LogProb(x))
}
// Quantile returns the inverse of the cumulative distribution function.
func (f F) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
y := mathext.InvRegIncBeta(0.5*f.D1, 0.5*f.D2, p)
return f.D2 * y / (f.D1 * (1 - y))
}
// Rand returns a random sample drawn from the distribution.
func (f F) Rand() float64 {
u1 := ChiSquared{f.D1, f.Source}.Rand()
u2 := ChiSquared{f.D2, f.Source}.Rand()
return (u1 / f.D1) / (u2 / f.D2)
}
// Skewness returns the skewness of the distribution.
//
// Skewness returns NaN if the D2 parameter is less than or equal to 6.
func (f F) Skewness() float64 {
if f.D2 <= 6 {
return math.NaN()
}
num := (2*f.D1 + f.D2 - 2) * math.Sqrt(8*(f.D2-4))
den := (f.D2 - 6) * math.Sqrt(f.D1*(f.D1+f.D2-2))
return num / den
}
// StdDev returns the standard deviation of the probability distribution.
//
// StdDev returns NaN if the D2 parameter is less than or equal to 4.
func (f F) StdDev() float64 {
if f.D2 <= 4 {
return math.NaN()
}
return math.Sqrt(f.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (f F) Survival(x float64) float64 {
return 1 - f.CDF(x)
}
// Variance returns the variance of the probability distribution.
//
// Variance returns NaN if the D2 parameter is less than or equal to 4.
func (f F) Variance() float64 {
if f.D2 <= 4 {
return math.NaN()
}
num := 2 * f.D2 * f.D2 * (f.D1 + f.D2 - 2)
den := f.D1 * (f.D2 - 2) * (f.D2 - 2) * (f.D2 - 4)
return num / den
}

89
stat/distuv/f_test.go Normal file
View File

@@ -0,0 +1,89 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math/rand"
"sort"
"testing"
"github.com/gonum/floats"
)
func TestFProb(t *testing.T) {
for _, test := range []struct {
x, d1, d2, want float64
}{
// Values calculated with scipy.stats.f
{0.0001, 4, 6, 0.00053315559110558126},
{0.1, 1, 1, 0.91507658371794609},
{0.5, 11, 7, 0.66644660411410883},
{0.9, 20, 15, 0.88293424959522437},
{1, 1, 1, 0.15915494309189535},
{2, 15, 12, 0.16611971273429088},
{5, 4, 8, 0.013599775603702537},
{10, 12, 9, 0.00032922887567957289},
{100, 7, 7, 6.08037637806889e-08},
{1000, 2, 1, 1.1171959870312232e-05},
} {
pdf := F{test.d1, test.d2, nil}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Prob mismatch, x = %v, d1 = %v, d2 = %v. Got %v, want %v", test.x, test.d1, test.d2, pdf, test.want)
}
}
}
func TestFCDF(t *testing.T) {
for _, test := range []struct {
x, d1, d2, want float64
}{
// Values calculated with scipy.stats.f
{0.0001, 4, 6, 2.6660741629519019e-08},
{0.1, 1, 1, 0.19498222904213672},
{0.5, 11, 7, 0.14625028471336987},
{0.9, 20, 15, 0.40567939897287852},
{1, 1, 1, 0.50000000000000011},
{2, 15, 12, 0.8839384428956264},
{5, 4, 8, 0.97429642410900219},
{10, 12, 9, 0.99915733385467187},
{100, 7, 7, 0.99999823560259171},
{1000, 2, 1, 0.97764490829950534},
} {
cdf := F{test.d1, test.d2, nil}.CDF(test.x)
if !floats.EqualWithinAbsOrRel(cdf, test.want, 1e-10, 1e-10) {
t.Errorf("CDF mismatch, x = %v, d1 = %v, d2 = %v. Got %v, want %v", test.x, test.d1, test.d2, cdf, test.want)
}
}
}
func TestF(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, b := range []F{
{13, 16, src},
{42, 31, src},
{77, 92, src},
} {
testF(t, b, i)
}
}
func testF(t *testing.T, f F, i int) {
const (
tol = 1e-2
n = 2e6
bins = 50
)
x := make([]float64, n)
generateSamples(x, f)
sort.Float64s(x)
testRandLogProbContinuous(t, i, 0, x, f, tol, bins)
checkProbContinuous(t, i, x, f, 1e-3)
checkMean(t, i, x, f, tol)
checkVarAndStd(t, i, x, f, tol)
checkExKurtosis(t, i, x, f, 5e-2)
checkSkewness(t, i, x, f, tol)
checkQuantileCDFSurvival(t, i, x, f, 1e-3)
}

244
stat/distuv/gamma.go Normal file
View File

@@ -0,0 +1,244 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/mathext"
)
// Gamma implements the Gamma distribution, a two-parameter continuous distribution
// with support over the positive real numbers.
//
// The gamma distribution has density function
// β^α / Γ(α) x^(α-1)e^(-βx)
//
// For more information, see https://en.wikipedia.org/wiki/Gamma_distribution
type Gamma struct {
// Alpha is the shape parameter of the distribution. Alpha must be greater
// than 0. If Alpha == 1, this is equivalent to an exponential distribution.
Alpha float64
// Beta is the rate parameter of the distribution. Beta must be greater than 0.
// If Beta == 2, this is equivalent to a Chi-Squared distribution.
Beta float64
Source *rand.Rand
}
// CDF computes the value of the cumulative distribution function at x.
func (g Gamma) CDF(x float64) float64 {
if x < 0 {
return 0
}
return mathext.GammaInc(g.Alpha, g.Beta*x)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (g Gamma) ExKurtosis() float64 {
return 6 / g.Alpha
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (g Gamma) LogProb(x float64) float64 {
if x <= 0 {
return math.Inf(-1)
}
a := g.Alpha
b := g.Beta
lg, _ := math.Lgamma(a)
return a*math.Log(b) - lg + (a-1)*math.Log(x) - b*x
}
// Mean returns the mean of the probability distribution.
func (g Gamma) Mean() float64 {
return g.Alpha / g.Beta
}
// Mode returns the mode of the normal distribution.
//
// The mode is NaN in the special case where the Alpha (shape) parameter
// is less than 1.
func (g Gamma) Mode() float64 {
if g.Alpha < 1 {
return math.NaN()
}
return (g.Alpha - 1) / g.Beta
}
// NumParameters returns the number of parameters in the distribution.
func (Gamma) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (g Gamma) Prob(x float64) float64 {
return math.Exp(g.LogProb(x))
}
// Quantile returns the inverse of the cumulative distribution function.
func (g Gamma) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return mathext.GammaIncInv(g.Alpha, p) / g.Beta
}
// Rand returns a random sample drawn from the distribution.
//
// Rand panics if either alpha or beta is <= 0.
func (g Gamma) Rand() float64 {
if g.Beta <= 0 {
panic("gamma: beta <= 0")
}
unifrnd := rand.Float64
exprnd := rand.ExpFloat64
normrnd := rand.NormFloat64
if g.Source != nil {
unifrnd = g.Source.Float64
exprnd = g.Source.ExpFloat64
normrnd = g.Source.NormFloat64
}
a := g.Alpha
b := g.Beta
switch {
case a <= 0:
panic("gamma: alpha < 0")
case a == 1:
// Generate from exponential
return exprnd() / b
case a < 0.3:
// Generate using
// Liu, Chuanhai, Martin, Ryan and Syring, Nick. "Simulating from a
// gamma distribution with small shape parameter"
// https://arxiv.org/abs/1302.1884
// use this reference: http://link.springer.com/article/10.1007/s00180-016-0692-0
// Algorithm adjusted to work in log space as much as possible.
lambda := 1/a - 1
lw := math.Log(a) - 1 - math.Log(1-a)
lr := -math.Log(1 + math.Exp(lw))
lc, _ := math.Lgamma(a + 1)
for {
e := exprnd()
var z float64
if e >= -lr {
z = e + lr
} else {
z = -exprnd() / lambda
}
lh := lc - z - math.Exp(-z/a)
var lEta float64
if z >= 0 {
lEta = lc - z
} else {
lEta = lc + lw + math.Log(lambda) + lambda*z
}
if lh-lEta > -exprnd() {
return math.Exp(-z/a) / b
}
}
case a >= 0.3 && a < 1:
// Generate using:
// Kundu, Debasis, and Rameshwar D. Gupta. "A convenient way of generating
// gamma random variables using generalized exponential distribution."
// Computational Statistics & Data Analysis 51.6 (2007): 2796-2802.
// TODO(btracey): Change to using Algorithm 3 if we can find the bug in
// the implementation below.
// Algorithm 2.
alpha := g.Alpha
a := math.Pow(1-expNegOneHalf, alpha) / (math.Pow(1-expNegOneHalf, alpha) + alpha*math.Exp(-1)/math.Pow(2, alpha))
b := math.Pow(1-expNegOneHalf, alpha) + alpha/math.E/math.Pow(2, alpha)
var x float64
for {
u := unifrnd()
if u <= a {
x = -2 * math.Log(1-math.Pow(u*b, 1/alpha))
} else {
x = -math.Log(math.Pow(2, alpha) / alpha * b * (1 - u))
}
v := unifrnd()
if x <= 1 {
if v <= math.Pow(x, alpha-1)*math.Exp(-x/2)/(math.Pow(2, alpha-1)*math.Pow(1-math.Exp(-x/2), alpha-1)) {
break
}
} else {
if v <= math.Pow(x, alpha-1) {
break
}
}
}
return x / g.Beta
/*
// Algorithm 3.
d := 1.0334 - 0.0766*math.Exp(2.2942*alpha)
a := math.Pow(2, alpha) * math.Pow(1-math.Exp(-d/2), alpha)
b := alpha * math.Pow(d, alpha-1) * math.Exp(-d)
c := a + b
var x float64
for {
u := unifrnd()
if u <= a/(a+b) {
x = -2 * math.Log(1-math.Pow(c*u, 1/a)/2)
} else {
x = -math.Log(c * (1 - u) / (alpha * math.Pow(d, alpha-1)))
}
v := unifrnd()
if x <= d {
if v <= (math.Pow(x, alpha-1)*math.Exp(-x/2))/(math.Pow(2, alpha-1)*math.Pow(1-math.Exp(-x/2), alpha-1)) {
break
}
} else {
if v <= math.Pow(d/x, 1-alpha) {
break
}
}
}
return x / g.Beta
*/
case a > 1:
// Generate using:
// Marsaglia, George, and Wai Wan Tsang. "A simple method for generating
// gamma variables." ACM Transactions on Mathematical Software (TOMS)
// 26.3 (2000): 363-372.
d := a - 1.0/3
c := 1 / (3 * math.Sqrt(d))
for {
u := -exprnd()
x := normrnd()
v := 1 + x*c
v = v * v * v
if u < 0.5*x*x+d*(1-v+math.Log(v)) {
return d * v / b
}
}
}
panic("unreachable")
}
// Survival returns the survival function (complementary CDF) at x.
func (g Gamma) Survival(x float64) float64 {
if x < 0 {
return 1
}
return mathext.GammaIncComp(g.Alpha, g.Beta*x)
}
// StdDev returns the standard deviation of the probability distribution.
func (g Gamma) StdDev() float64 {
return math.Sqrt(g.Variance())
}
// Variance returns the variance of the probability distribution.
func (g Gamma) Variance() float64 {
return g.Alpha / g.Beta / g.Beta
}

63
stat/distuv/gamma_test.go Normal file
View File

@@ -0,0 +1,63 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math/rand"
"sort"
"testing"
"github.com/gonum/floats"
)
func TestGamma(t *testing.T) {
// Values a comparison with scipy
for _, test := range []struct {
x, alpha, want float64
}{
{0.9, 0.1, 0.046986817861555757},
{0.9, 0.01, 0.0045384353289090401},
{0.45, 0.01, 0.014137035997241795},
} {
pdf := Gamma{Alpha: test.alpha, Beta: 1}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Pdf mismatch. Got %v, want %v", pdf, test.want)
}
}
src := rand.New(rand.NewSource(1))
for i, g := range []Gamma{
{Alpha: 0.5, Beta: 0.8, Source: src},
{Alpha: 0.9, Beta: 6, Source: src},
{Alpha: 0.9, Beta: 500, Source: src},
{Alpha: 1, Beta: 1, Source: src},
{Alpha: 1.6, Beta: 0.4, Source: src},
{Alpha: 2.6, Beta: 1.5, Source: src},
{Alpha: 5.6, Beta: 0.5, Source: src},
{Alpha: 30, Beta: 1.7, Source: src},
{Alpha: 30.2, Beta: 1.7, Source: src},
} {
testGamma(t, g, i)
}
}
func testGamma(t *testing.T, f Gamma, i int) {
// TODO(btracey): Replace this when Gamma implements FullDist.
tol := 2e-3
const n = 1e6
const bins = 50
x := make([]float64, n)
generateSamples(x, f)
sort.Float64s(x)
testRandLogProbContinuous(t, i, 0, x, f, tol, bins)
checkMean(t, i, x, f, tol)
checkVarAndStd(t, i, x, f, 2e-2)
checkExKurtosis(t, i, x, f, 5e-2)
checkProbContinuous(t, i, x, f, 1e-3)
checkQuantileCDFSurvival(t, i, x, f, 1e-2)
}

25
stat/distuv/general.go Normal file
View File

@@ -0,0 +1,25 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package distuv provides univariate random distribution types.
package distuv
import "math"
// Parameter represents a parameter of a probability distribution
type Parameter struct {
Name string
Value float64
}
var (
badPercentile = "distuv: percentile out of bounds"
badLength = "distuv: slice length mismatch"
badSuffStat = "distuv: wrong suffStat length"
badNoSamples = "distuv: must have at least one sample"
)
var (
expNegOneHalf = math.Exp(-0.5)
)

205
stat/distuv/general_test.go Normal file
View File

@@ -0,0 +1,205 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"fmt"
"math"
"testing"
"github.com/gonum/diff/fd"
"github.com/gonum/floats"
)
type univariateProbPoint struct {
loc float64
logProb float64
cumProb float64
prob float64
}
type UniProbDist interface {
Prob(float64) float64
CDF(float64) float64
LogProb(float64) float64
Quantile(float64) float64
Survival(float64) float64
}
func absEq(a, b float64) bool {
if math.Abs(a-b) > 1e-14 {
return false
}
return true
}
// TODO: Implement a better test for Quantile
func testDistributionProbs(t *testing.T, dist UniProbDist, name string, pts []univariateProbPoint) {
for _, pt := range pts {
logProb := dist.LogProb(pt.loc)
if !absEq(logProb, pt.logProb) {
t.Errorf("Log probability doesnt match for "+name+". Expected %v. Found %v", pt.logProb, logProb)
}
prob := dist.Prob(pt.loc)
if !absEq(prob, pt.prob) {
t.Errorf("Probability doesn't match for "+name+". Expected %v. Found %v", pt.prob, prob)
}
cumProb := dist.CDF(pt.loc)
if !absEq(cumProb, pt.cumProb) {
t.Errorf("Cumulative Probability doesn't match for "+name+". Expected %v. Found %v", pt.cumProb, cumProb)
}
if !absEq(dist.Survival(pt.loc), 1-pt.cumProb) {
t.Errorf("Survival doesn't match for %v. Expected %v, Found %v", name, 1-pt.cumProb, dist.Survival(pt.loc))
}
if pt.prob != 0 {
if math.Abs(dist.Quantile(pt.cumProb)-pt.loc) > 1e-4 {
fmt.Println("true =", pt.loc)
fmt.Println("calculated=", dist.Quantile(pt.cumProb))
t.Errorf("Quantile doesn't match for "+name+", loc = %v", pt.loc)
}
}
}
}
type ConjugateUpdater interface {
NumParameters() int
parameters([]Parameter) []Parameter
NumSuffStat() int
SuffStat([]float64, []float64, []float64) float64
ConjugateUpdate([]float64, float64, []float64)
Rand() float64
}
func testConjugateUpdate(t *testing.T, newFittable func() ConjugateUpdater) {
for i, test := range []struct {
samps []float64
weights []float64
}{
{
samps: randn(newFittable(), 10),
weights: nil,
},
{
samps: randn(newFittable(), 10),
weights: ones(10),
},
{
samps: randn(newFittable(), 10),
weights: randn(&Exponential{Rate: 1}, 10),
},
} {
// ensure that conjugate produces the same result both incrementally and all at once
incDist := newFittable()
stats := make([]float64, incDist.NumSuffStat())
prior := make([]float64, incDist.NumParameters())
for j := range test.samps {
var incWeights, allWeights []float64
if test.weights != nil {
incWeights = test.weights[j : j+1]
allWeights = test.weights[0 : j+1]
}
nsInc := incDist.SuffStat(test.samps[j:j+1], incWeights, stats)
incDist.ConjugateUpdate(stats, nsInc, prior)
allDist := newFittable()
nsAll := allDist.SuffStat(test.samps[0:j+1], allWeights, stats)
allDist.ConjugateUpdate(stats, nsAll, make([]float64, allDist.NumParameters()))
if !parametersEqual(incDist.parameters(nil), allDist.parameters(nil), 1e-12) {
t.Errorf("prior doesn't match after incremental update for (%d, %d). Incremental is %v, all at once is %v", i, j, incDist, allDist)
}
if test.weights == nil {
onesDist := newFittable()
nsOnes := onesDist.SuffStat(test.samps[0:j+1], ones(j+1), stats)
onesDist.ConjugateUpdate(stats, nsOnes, make([]float64, onesDist.NumParameters()))
if !parametersEqual(onesDist.parameters(nil), incDist.parameters(nil), 1e-14) {
t.Errorf("nil and uniform weighted prior doesn't match for incremental update for (%d, %d). Uniform weighted is %v, nil is %v", i, j, onesDist, incDist)
}
if !parametersEqual(onesDist.parameters(nil), allDist.parameters(nil), 1e-14) {
t.Errorf("nil and uniform weighted prior doesn't match for all at once update for (%d, %d). Uniform weighted is %v, nil is %v", i, j, onesDist, incDist)
}
}
}
}
}
// randn generates a specified number of random samples
func randn(dist Rander, n int) []float64 {
x := make([]float64, n)
for i := range x {
x[i] = dist.Rand()
}
return x
}
func ones(n int) []float64 {
x := make([]float64, n)
for i := range x {
x[i] = 1
}
return x
}
func parametersEqual(p1, p2 []Parameter, tol float64) bool {
for i, p := range p1 {
if p.Name != p2[i].Name {
return false
}
if math.Abs(p.Value-p2[i].Value) > tol {
return false
}
}
return true
}
type derivParamTester interface {
LogProb(x float64) float64
Score(deriv []float64, x float64) []float64
Quantile(p float64) float64
NumParameters() int
parameters([]Parameter) []Parameter
setParameters([]Parameter)
}
func testDerivParam(t *testing.T, d derivParamTester) {
// Tests that the derivative matches for a number of different quantiles
// along the distribution.
nTest := 10
quantiles := make([]float64, nTest)
floats.Span(quantiles, 0.1, 0.9)
deriv := make([]float64, d.NumParameters())
fdDeriv := make([]float64, d.NumParameters())
initParams := d.parameters(nil)
init := make([]float64, d.NumParameters())
for i, v := range initParams {
init[i] = v.Value
}
for _, v := range quantiles {
d.setParameters(initParams)
x := d.Quantile(v)
d.Score(deriv, x)
f := func(p []float64) float64 {
params := d.parameters(nil)
for i, v := range p {
params[i].Value = v
}
d.setParameters(params)
return d.LogProb(x)
}
fd.Gradient(fdDeriv, f, init, nil)
if !floats.EqualApprox(deriv, fdDeriv, 1e-6) {
t.Fatal("Derivative mismatch. Want", fdDeriv, ", got", deriv, ".")
}
d.setParameters(initParams)
d2 := d.Score(nil, x)
if !floats.EqualApprox(d2, deriv, 1e-14) {
t.Errorf("Derivative mismatch when input nil Want %v, got %v", d2, deriv)
}
}
}

22
stat/distuv/interfaces.go Normal file
View File

@@ -0,0 +1,22 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
type LogProber interface {
LogProb(float64) float64
}
type Rander interface {
Rand() float64
}
type RandLogProber interface {
Rander
LogProber
}
type Quantiler interface {
Quantile(p float64) float64
}

252
stat/distuv/laplace.go Normal file
View File

@@ -0,0 +1,252 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"sort"
"github.com/gonum/floats"
"github.com/gonum/stat"
)
// Laplace represents the Laplace distribution (https://en.wikipedia.org/wiki/Laplace_distribution).
type Laplace struct {
Mu float64 // Mean of the Laplace distribution
Scale float64 // Scale of the Laplace distribution
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (l Laplace) CDF(x float64) float64 {
if x < l.Mu {
return 0.5 * math.Exp((x-l.Mu)/l.Scale)
}
return 1 - 0.5*math.Exp(-(x-l.Mu)/l.Scale)
}
// Entropy returns the entropy of the distribution.
func (l Laplace) Entropy() float64 {
return 1 + math.Log(2*l.Scale)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (l Laplace) ExKurtosis() float64 {
return 3
}
// Fit sets the parameters of the probability distribution from the
// data samples x with relative weights w.
// If weights is nil, then all the weights are 1.
// If weights is not nil, then the len(weights) must equal len(samples).
//
// Note: Laplace distribution has no FitPrior because it has no sufficient
// statistics.
func (l *Laplace) Fit(samples, weights []float64) {
if len(samples) != len(weights) {
panic(badLength)
}
if len(samples) == 0 {
panic(badNoSamples)
}
if len(samples) == 1 {
l.Mu = samples[0]
l.Scale = 0
return
}
var (
sortedSamples []float64
sortedWeights []float64
)
if sort.Float64sAreSorted(samples) {
sortedSamples = samples
sortedWeights = weights
} else {
// Need to copy variables so the input variables aren't effected by the sorting
sortedSamples = make([]float64, len(samples))
copy(sortedSamples, samples)
sortedWeights := make([]float64, len(samples))
copy(sortedWeights, weights)
stat.SortWeighted(sortedSamples, sortedWeights)
}
// The (weighted) median of the samples is the maximum likelihood estimate
// of the mean parameter
// TODO: Rethink quantile type when stat has more options
l.Mu = stat.Quantile(0.5, stat.Empirical, sortedSamples, sortedWeights)
sumWeights := floats.Sum(weights)
// The scale parameter is the average absolute distance
// between the sample and the mean
absError := stat.MomentAbout(1, samples, l.Mu, weights)
l.Scale = absError / sumWeights
}
// LogProb computes the natural logarithm of the value of the probability density
// function at x.
func (l Laplace) LogProb(x float64) float64 {
return -math.Ln2 - math.Log(l.Scale) - math.Abs(x-l.Mu)/l.Scale
}
// MarshalParameters implements the ParameterMarshaler interface
func (l Laplace) MarshalParameters(p []Parameter) {
if len(p) != l.NumParameters() {
panic(badLength)
}
p[0].Name = "Mu"
p[0].Value = l.Mu
p[1].Name = "Scale"
p[1].Value = l.Scale
return
}
// Mean returns the mean of the probability distribution.
func (l Laplace) Mean() float64 {
return l.Mu
}
// Median returns the median of the LaPlace distribution.
func (l Laplace) Median() float64 {
return l.Mu
}
// Mode returns the mode of the LaPlace distribution.
func (l Laplace) Mode() float64 {
return l.Mu
}
// NumParameters returns the number of parameters in the distribution.
func (l Laplace) NumParameters() int {
return 2
}
// Quantile returns the inverse of the cumulative probability distribution.
func (l Laplace) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
if p < 0.5 {
return l.Mu + l.Scale*math.Log(1+2*(p-0.5))
}
return l.Mu - l.Scale*math.Log(1-2*(p-0.5))
}
// Prob computes the value of the probability density function at x.
func (l Laplace) Prob(x float64) float64 {
return math.Exp(l.LogProb(x))
}
// Rand returns a random sample drawn from the distribution.
func (l Laplace) Rand() float64 {
var rnd float64
if l.Source == nil {
rnd = rand.Float64()
} else {
rnd = l.Source.Float64()
}
u := rnd - 0.5
if u < 0 {
return l.Mu + l.Scale*math.Log(1+2*u)
}
return l.Mu - l.Scale*math.Log(1-2*u)
}
// Score returns the score function with respect to the parameters of the
// distribution at the input location x. The score function is the derivative
// of the log-likelihood at x with respect to the parameters
// (∂/∂θ) log(p(x;θ))
// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise
// Score will panic, and the derivative is stored in-place into deriv. If deriv
// is nil a new slice will be allocated and returned.
//
// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Scale].
//
// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29.
//
// Special cases:
// Score(0) = [0, -0.5/l.Scale]
func (l Laplace) Score(deriv []float64, x float64) []float64 {
if deriv == nil {
deriv = make([]float64, l.NumParameters())
}
if len(deriv) != l.NumParameters() {
panic(badLength)
}
diff := x - l.Mu
if diff > 0 {
deriv[0] = 1 / l.Scale
} else if diff < 0 {
deriv[0] = -1 / l.Scale
} else if diff == 0 {
deriv[0] = 0
} else {
// must be NaN
deriv[0] = math.NaN()
}
deriv[1] = math.Abs(diff)/(l.Scale*l.Scale) - 0.5/(l.Scale)
return deriv
}
// ScoreInput returns the score function with respect to the input of the
// distribution at the input location specified by x. The score function is the
// derivative of the log-likelihood
// (d/dx) log(p(x)) .
// Special cases:
// ScoreInput(l.Mu) = 0
func (l Laplace) ScoreInput(x float64) float64 {
diff := x - l.Mu
if diff == 0 {
return 0
}
if diff > 0 {
return -1 / l.Scale
}
return 1 / l.Scale
}
// Skewness returns the skewness of the distribution.
func (Laplace) Skewness() float64 {
return 0
}
// StdDev returns the standard deviation of the distribution.
func (l Laplace) StdDev() float64 {
return math.Sqrt2 * l.Scale
}
// Survival returns the survival function (complementary CDF) at x.
func (l Laplace) Survival(x float64) float64 {
if x < l.Mu {
return 1 - 0.5*math.Exp((x-l.Mu)/l.Scale)
}
return 0.5 * math.Exp(-(x-l.Mu)/l.Scale)
}
// UnmarshalParameters implements the ParameterMarshaler interface
func (l *Laplace) UnmarshalParameters(p []Parameter) {
if len(p) != l.NumParameters() {
panic(badLength)
}
if p[0].Name != "Mu" {
panic("laplace: " + panicNameMismatch)
}
if p[1].Name != "Scale" {
panic("laplace: " + panicNameMismatch)
}
l.Mu = p[0].Value
l.Scale = p[1].Value
}
// Variance returns the variance of the probability distribution.
func (l Laplace) Variance() float64 {
return 2 * l.Scale * l.Scale
}

View File

@@ -0,0 +1,58 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"testing"
)
func TestLaplaceProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: 0.5,
cumProb: 0.5,
logProb: math.Log(0.5),
},
{
loc: -1,
prob: 1 / (2 * math.E),
cumProb: 0.1839397205857211607977618850807304337229055655158839172539184008487307478724499016785736371729598219,
logProb: math.Log(1 / (2 * math.E)),
},
{
loc: 1,
prob: 1 / (2 * math.E),
cumProb: 0.8160602794142788392022381149192695662770944344841160827460815991512692521275500983214263628270401781,
logProb: math.Log(1 / (2 * math.E)),
},
{
loc: -7,
prob: 1 / (2 * math.Pow(math.E, 7)),
cumProb: 0.0004559409827772581040015680422046413132368622637180269204080667109447399446551532646631395032324502210,
logProb: math.Log(1 / (2 * math.Pow(math.E, 7))),
},
{
loc: 7,
prob: 1 / (2 * math.Pow(math.E, 7)),
cumProb: 0.9995440590172227418959984319577953586867631377362819730795919332890552600553448467353368604967675498,
logProb: math.Log(1 / (2 * math.Pow(math.E, 7))),
},
{
loc: -20,
prob: math.Exp(-20.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641869),
cumProb: 1.030576811219278913982970190077910488187903637799551846486122330814582011892279676639955463952790684 * 1e-9,
logProb: -20.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641869,
},
{
loc: 20,
prob: math.Exp(-20.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641869),
cumProb: 0.999999998969423188780721086017029809922089511812096362200448153513877669185417988107720323360044536,
logProb: -20.69314718055994530941723212145817656807550013436025525412068000949339362196969471560586332699641869,
},
}
testDistributionProbs(t, Laplace{Mu: 0, Scale: 1}, "Laplace", pts)
}

112
stat/distuv/lognormal.go Normal file
View File

@@ -0,0 +1,112 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
)
// LogNormal represents a random variable whose log is normally distributed.
// The probability density function is given by
// 1/(x σ √2π) exp(-(ln(x)-μ)^2)/(2σ^2))
type LogNormal struct {
Mu float64
Sigma float64
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (l LogNormal) CDF(x float64) float64 {
return 0.5 + 0.5*math.Erf((math.Log(x)-l.Mu)/(math.Sqrt2*l.Sigma))
}
// Entropy returns the differential entropy of the distribution.
func (l LogNormal) Entropy() float64 {
return 0.5 + 0.5*math.Log(2*math.Pi*l.Sigma*l.Sigma) + l.Mu
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (l LogNormal) ExKurtosis() float64 {
s2 := l.Sigma * l.Sigma
return math.Exp(4*s2) + 2*math.Exp(3*s2) + 3*math.Exp(2*s2) - 6
}
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (l LogNormal) LogProb(x float64) float64 {
if x < 0 {
return math.Inf(-1)
}
logx := math.Log(x)
normdiff := (logx - l.Mu) / l.Sigma
return -0.5*normdiff*normdiff - logx - math.Log(l.Sigma) - logRoot2Pi
}
// Mean returns the mean of the probability distribution.
func (l LogNormal) Mean() float64 {
return math.Exp(l.Mu + 0.5*l.Sigma*l.Sigma)
}
// Median returns the median of the probability distribution.
func (l LogNormal) Median() float64 {
return math.Exp(l.Mu)
}
// Mode returns the mode of the probability distribution.
func (l LogNormal) Mode() float64 {
return l.Mu
}
// NumParameters returns the number of parameters in the distribution.
func (LogNormal) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (l LogNormal) Prob(x float64) float64 {
return math.Exp(l.LogProb(x))
}
// Quantile returns the inverse of the cumulative probability distribution.
func (l LogNormal) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
// Formula from http://www.math.uah.edu/stat/special/LogNormal.html.
return math.Exp(l.Mu + l.Sigma*UnitNormal.Quantile(p))
}
// Rand returns a random sample drawn from the distribution.
func (l LogNormal) Rand() float64 {
var rnd float64
if l.Source == nil {
rnd = rand.NormFloat64()
} else {
rnd = l.Source.NormFloat64()
}
return math.Exp(rnd*l.Sigma + l.Mu)
}
// Skewness returns the skewness of the distribution.
func (l LogNormal) Skewness() float64 {
s2 := l.Sigma * l.Sigma
return (math.Exp(s2) + 2) * math.Sqrt(math.Exp(s2)-1)
}
// StdDev returns the standard deviation of the probability distribution.
func (l LogNormal) StdDev() float64 {
return math.Sqrt(l.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (l LogNormal) Survival(x float64) float64 {
return 0.5 * (1 - math.Erf((math.Log(x)-l.Mu)/(math.Sqrt2*l.Sigma)))
}
// Variance returns the variance of the probability distribution.
func (l LogNormal) Variance() float64 {
s2 := l.Sigma * l.Sigma
return (math.Exp(s2) - 1) * math.Exp(2*l.Mu+s2)
}

View File

@@ -0,0 +1,26 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import "testing"
func TestLognormal(t *testing.T) {
for i, dist := range []LogNormal{
{
Mu: 0.1,
Sigma: 0.3,
},
{
Mu: 0.01,
Sigma: 0.01,
},
{
Mu: 2,
Sigma: 0.01,
},
} {
testFullDist(t, dist, i, true)
}
}

254
stat/distuv/norm.go Normal file
View File

@@ -0,0 +1,254 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/floats"
"github.com/gonum/mathext"
"github.com/gonum/stat"
)
// UnitNormal is an instantiation of the normal distribution with Mu = 0 and Sigma = 1.
var UnitNormal = Normal{Mu: 0, Sigma: 1}
// Normal respresents a normal (Gaussian) distribution (https://en.wikipedia.org/wiki/Normal_distribution).
type Normal struct {
Mu float64 // Mean of the normal distribution
Sigma float64 // Standard deviation of the normal distribution
Source *rand.Rand
// Needs to be Mu and Sigma and not Mean and StdDev because Normal has functions
// Mean and StdDev
}
// CDF computes the value of the cumulative density function at x.
func (n Normal) CDF(x float64) float64 {
return 0.5 * (1 + math.Erf((x-n.Mu)/(n.Sigma*math.Sqrt2)))
}
// ConjugateUpdate updates the parameters of the distribution from the sufficient
// statistics of a set of samples. The sufficient statistics, suffStat, have been
// observed with nSamples observations. The prior values of the distribution are those
// currently in the distribution, and have been observed with priorStrength samples.
//
// For the normal distribution, the sufficient statistics are the mean and
// uncorrected standard deviation of the samples.
// The prior is having seen strength[0] samples with mean Normal.Mu
// and strength[1] samples with standard deviation Normal.Sigma. As a result of
// this function, Normal.Mu and Normal.Sigma are updated based on the weighted
// samples, and strength is modified to include the new number of samples observed.
//
// This function panics if len(suffStat) != 2 or len(priorStrength) != 2.
func (n *Normal) ConjugateUpdate(suffStat []float64, nSamples float64, priorStrength []float64) {
// TODO: Support prior strength with math.Inf(1) to allow updating with
// a known mean/standard deviation
totalMeanSamples := nSamples + priorStrength[0]
totalSum := suffStat[0]*nSamples + n.Mu*priorStrength[0]
totalVarianceSamples := nSamples + priorStrength[1]
// sample variance
totalVariance := nSamples * suffStat[1] * suffStat[1]
// add prior variance
totalVariance += priorStrength[1] * n.Sigma * n.Sigma
// add cross variance from the difference of the means
meanDiff := (suffStat[0] - n.Mu)
totalVariance += priorStrength[0] * nSamples * meanDiff * meanDiff / totalMeanSamples
n.Mu = totalSum / totalMeanSamples
n.Sigma = math.Sqrt(totalVariance / totalVarianceSamples)
floats.AddConst(nSamples, priorStrength)
}
// Entropy returns the differential entropy of the distribution.
func (n Normal) Entropy() float64 {
return 0.5 * (log2Pi + 1 + 2*math.Log(n.Sigma))
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (Normal) ExKurtosis() float64 {
return 0
}
// Fit sets the parameters of the probability distribution from the
// data samples x with relative weights w. If weights is nil, then all the weights
// are 1. If weights is not nil, then the len(weights) must equal len(samples).
func (n *Normal) Fit(samples, weights []float64) {
suffStat := make([]float64, n.NumSuffStat())
nSamples := n.SuffStat(samples, weights, suffStat)
n.ConjugateUpdate(suffStat, nSamples, make([]float64, n.NumSuffStat()))
}
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (n Normal) LogProb(x float64) float64 {
return negLogRoot2Pi - math.Log(n.Sigma) - (x-n.Mu)*(x-n.Mu)/(2*n.Sigma*n.Sigma)
}
// Mean returns the mean of the probability distribution.
func (n Normal) Mean() float64 {
return n.Mu
}
// Median returns the median of the normal distribution.
func (n Normal) Median() float64 {
return n.Mu
}
// Mode returns the mode of the normal distribution.
func (n Normal) Mode() float64 {
return n.Mu
}
// NumParameters returns the number of parameters in the distribution.
func (Normal) NumParameters() int {
return 2
}
// NumSuffStat returns the number of sufficient statistics for the distribution.
func (Normal) NumSuffStat() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (n Normal) Prob(x float64) float64 {
return math.Exp(n.LogProb(x))
}
// Quantile returns the inverse of the cumulative probability distribution.
func (n Normal) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return n.Mu + n.Sigma*mathext.NormalQuantile(p)
}
// Rand returns a random sample drawn from the distribution.
func (n Normal) Rand() float64 {
var rnd float64
if n.Source == nil {
rnd = rand.NormFloat64()
} else {
rnd = n.Source.NormFloat64()
}
return rnd*n.Sigma + n.Mu
}
// Score returns the score function with respect to the parameters of the
// distribution at the input location x. The score function is the derivative
// of the log-likelihood at x with respect to the parameters
// (∂/∂θ) log(p(x;θ))
// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise
// Score will panic, and the derivative is stored in-place into deriv. If deriv
// is nil a new slice will be allocated and returned.
//
// The order is [∂LogProb / ∂Mu, ∂LogProb / ∂Sigma].
//
// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29.
func (n Normal) Score(deriv []float64, x float64) []float64 {
if deriv == nil {
deriv = make([]float64, n.NumParameters())
}
if len(deriv) != n.NumParameters() {
panic(badLength)
}
deriv[0] = (x - n.Mu) / (n.Sigma * n.Sigma)
deriv[1] = 1 / n.Sigma * (-1 + ((x-n.Mu)/n.Sigma)*((x-n.Mu)/n.Sigma))
return deriv
}
// ScoreInput returns the score function with respect to the input of the
// distribution at the input location specified by x. The score function is the
// derivative of the log-likelihood
// (d/dx) log(p(x)) .
func (n Normal) ScoreInput(x float64) float64 {
return -(1 / (2 * n.Sigma * n.Sigma)) * 2 * (x - n.Mu)
}
// Skewness returns the skewness of the distribution.
func (Normal) Skewness() float64 {
return 0
}
// StdDev returns the standard deviation of the probability distribution.
func (n Normal) StdDev() float64 {
return n.Sigma
}
// SuffStat computes the sufficient statistics of a set of samples to update
// the distribution. The sufficient statistics are stored in place, and the
// effective number of samples are returned.
//
// The normal distribution has two sufficient statistics, the mean of the samples
// and the standard deviation of the samples.
//
// If weights is nil, the weights are assumed to be 1, otherwise panics if
// len(samples) != len(weights). Panics if len(suffStat) != NumSuffStat().
func (Normal) SuffStat(samples, weights, suffStat []float64) (nSamples float64) {
lenSamp := len(samples)
if len(weights) != 0 && len(samples) != len(weights) {
panic(badLength)
}
if len(suffStat) != (Normal{}).NumSuffStat() {
panic(badSuffStat)
}
if len(weights) == 0 {
nSamples = float64(lenSamp)
} else {
nSamples = floats.Sum(weights)
}
mean := stat.Mean(samples, weights)
suffStat[0] = mean
// Use Moment and not StdDev because we want it to be uncorrected
variance := stat.MomentAbout(2, samples, mean, weights)
suffStat[1] = math.Sqrt(variance)
return nSamples
}
// Survival returns the survival function (complementary CDF) at x.
func (n Normal) Survival(x float64) float64 {
return 0.5 * (1 - math.Erf((x-n.Mu)/(n.Sigma*math.Sqrt2)))
}
// setParameters modifies the parameters of the distribution.
func (n *Normal) setParameters(p []Parameter) {
if len(p) != n.NumParameters() {
panic("normal: incorrect number of parameters to set")
}
if p[0].Name != "Mu" {
panic("normal: " + panicNameMismatch)
}
if p[1].Name != "Sigma" {
panic("normal: " + panicNameMismatch)
}
n.Mu = p[0].Value
n.Sigma = p[1].Value
}
// Variance returns the variance of the probability distribution.
func (n Normal) Variance() float64 {
return n.Sigma * n.Sigma
}
// parameters returns the parameters of the distribution.
func (n Normal) parameters(p []Parameter) []Parameter {
nParam := n.NumParameters()
if p == nil {
p = make([]Parameter, nParam)
} else if len(p) != nParam {
panic("normal: improper parameter length")
}
p[0].Name = "Mu"
p[0].Value = n.Mu
p[1].Name = "Sigma"
p[1].Value = n.Sigma
return p
}

View File

@@ -0,0 +1,35 @@
// Copyright ©2017 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv_test
import (
"fmt"
"github.com/gonum/stat"
"github.com/gonum/stat/distuv"
)
func ExampleNormal() {
// Create a normal distribution
dist := distuv.Normal{
Mu: 2,
Sigma: 5,
}
data := make([]float64, 1e5)
// Draw some random values from the standard normal distribution
for i := range data {
data[i] = dist.Rand()
}
mean, std := stat.MeanStdDev(data, nil)
meanErr := stat.StdErr(std, float64(len(data)))
fmt.Printf("mean= %1.1f ± %0.1v\n", mean, meanErr)
// Output:
// mean= 2.0 ± 0.02
}

171
stat/distuv/norm_test.go Normal file
View File

@@ -0,0 +1,171 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"testing"
"github.com/gonum/floats"
)
// TestNormalProbs tests LogProb, Prob, CumProb, and Quantile
func TestNormalProbs(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: oneOverRoot2Pi,
cumProb: 0.5,
logProb: -0.91893853320467274178032973640561763986139747363778341281715,
},
{
loc: -1,
prob: 0.2419707245191433497978301929355606548286719707374350254875550842811000635700832945083112946939424047,
cumProb: 0.158655253931457051414767454367962077522087033273395609012605,
logProb: math.Log(0.2419707245191433497978301929355606548286719707374350254875550842811000635700832945083112946939424047),
},
{
loc: 1,
prob: 0.2419707245191433497978301929355606548286719707374350254875550842811000635700832945083112946939424047,
cumProb: 0.841344746068542948585232545632037922477912966726604390987394,
logProb: math.Log(0.2419707245191433497978301929355606548286719707374350254875550842811000635700832945083112946939424047),
},
{
loc: -7,
prob: 9.134720408364593342868613916794233023000190834851937054490546361277622761970225469305158915808284566e-12,
cumProb: 1.279812543885835004383623690780832998032844154198717929e-12,
logProb: math.Log(9.134720408364593342868613916794233023000190834851937054490546361277622761970225469305158915808284566e-12),
},
{
loc: 7,
prob: 9.134720408364593342868613916794233023000190834851937054490546361277622761970225469305158915808284566e-12,
cumProb: 0.99999999999872018745611416499561637630921916700196715584580,
logProb: math.Log(9.134720408364593342868613916794233023000190834851937054490546361277622761970225469305158915808284566e-12),
},
}
testDistributionProbs(t, Normal{Mu: 0, Sigma: 1}, "normal", pts)
pts = []univariateProbPoint{
{
loc: 2,
prob: 0.07978845608028653558798921198687637369517172623298693153318516593413158517986036770025046678146138729,
cumProb: 0.5,
logProb: math.Log(0.07978845608028653558798921198687637369517172623298693153318516593413158517986036770025046678146138729),
},
{
loc: -3,
prob: 0.04839414490382866995956603858711213096573439414748700509751101685622001271401665890166225893878848095,
cumProb: 0.158655253931457051414767454367962077522087033273395609012605,
logProb: math.Log(0.04839414490382866995956603858711213096573439414748700509751101685622001271401665890166225893878848095),
},
{
loc: 7,
prob: 0.04839414490382866995956603858711213096573439414748700509751101685622001271401665890166225893878848095,
cumProb: 0.841344746068542948585232545632037922477912966726604390987394,
logProb: math.Log(0.04839414490382866995956603858711213096573439414748700509751101685622001271401665890166225893878848095),
},
{
loc: -33,
prob: 1.826944081672918668573722783358846604600038166970387410898109272255524552394045093861031783161656913e-12,
cumProb: 1.279812543885835004383623690780832998032844154198717929e-12,
logProb: math.Log(1.826944081672918668573722783358846604600038166970387410898109272255524552394045093861031783161656913e-12),
},
{
loc: 37,
prob: 1.826944081672918668573722783358846604600038166970387410898109272255524552394045093861031783161656913e-12,
cumProb: 0.99999999999872018745611416499561637630921916700196715584580,
logProb: math.Log(1.826944081672918668573722783358846604600038166970387410898109272255524552394045093861031783161656913e-12),
},
}
testDistributionProbs(t, Normal{Mu: 2, Sigma: 5}, "normal", pts)
}
func TestNormFitPrior(t *testing.T) {
testConjugateUpdate(t, func() ConjugateUpdater { return &Normal{Mu: -10, Sigma: 6} })
}
func TestNormScore(t *testing.T) {
for _, test := range []*Normal{
{
Mu: 0,
Sigma: 1,
},
{
Mu: 0.32238,
Sigma: 13.69,
},
} {
testDerivParam(t, test)
}
}
func TestNormalQuantile(t *testing.T) {
// Values from https://www.johndcook.com/blog/normal_cdf_inverse/
p := []float64{
0.0000001,
0.00001,
0.001,
0.05,
0.15,
0.25,
0.35,
0.45,
0.55,
0.65,
0.75,
0.85,
0.95,
0.999,
0.99999,
0.9999999,
}
ans := []float64{
-5.199337582187471,
-4.264890793922602,
-3.090232306167813,
-1.6448536269514729,
-1.0364333894937896,
-0.6744897501960817,
-0.38532046640756773,
-0.12566134685507402,
0.12566134685507402,
0.38532046640756773,
0.6744897501960817,
1.0364333894937896,
1.6448536269514729,
3.090232306167813,
4.264890793922602,
5.199337582187471,
}
for i, v := range p {
got := UnitNormal.Quantile(v)
if !floats.EqualWithinAbsOrRel(got, ans[i], 1e-10, 1e-10) {
t.Errorf("Quantile mismatch. Case %d, want: %v, got: %v", i, ans[i], got)
}
}
}
func TestNormFitPanic(t *testing.T) {
n := Normal{Mu: 0, Sigma: 1}
defer func() {
r := recover()
if r != nil {
t.Errorf("unexpected panic for Fit call: %v", r)
}
}()
n.Fit(make([]float64, 10), nil)
}
func BenchmarkNormalQuantile(b *testing.B) {
n := Normal{Mu: 2, Sigma: 3.1}
ps := make([]float64, 1000) // ensure there are small values
floats.Span(ps, 0, 1)
for i := 0; i < b.N; i++ {
for _, v := range ps {
x := n.Quantile(v)
_ = x
}
}
}

160
stat/distuv/studentst.go Normal file
View File

@@ -0,0 +1,160 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"github.com/gonum/mathext"
)
const logPi = 1.1447298858494001741 // http://oeis.org/A053510
// StudentsT implements the three-parameter Student's T distribution, a distribution
// over the real numbers.
//
// The Student's T distribution has density function
// Γ((ν+1)/2) / (sqrt(νπ) Γ(ν/2) σ) (1 + 1/ν * ((x-μ)/σ)^2)^(-(ν+1)/2)
//
// The Student's T distribution approaches the normal distribution as ν → ∞.
//
// For more information, see https://en.wikipedia.org/wiki/Student%27s_t-distribution,
// specifically https://en.wikipedia.org/wiki/Student%27s_t-distribution#Non-standardized_Student.27s_t-distribution .
//
// The standard Student's T distribution is with Mu = 0, and Sigma = 1.
type StudentsT struct {
// Mu is the location parameter of the distribution, and the mean of the
// distribution
Mu float64
// Sigma is the scale parameter of the distribution. It is related to the
// standard deviation by std = Sigma * sqrt(Nu/(Nu-2))
Sigma float64
// Nu is the shape prameter of the distribution, representing the number of
// degrees of the distribution, and one less than the number of observations
// from a Normal distribution.
Nu float64
Src *rand.Rand
}
// CDF computes the value of the cumulative distribution function at x.
func (s StudentsT) CDF(x float64) float64 {
// transform to standard normal
y := (x - s.Mu) / s.Sigma
if y == 0 {
return 0.5
}
// For t > 0
// F(y) = 1 - 0.5 * I_t(y)(nu/2, 1/2)
// t(y) = nu/(y^2 + nu)
// and 1 - F(y) for t < 0
t := s.Nu / (y*y + s.Nu)
if y > 0 {
return 1 - 0.5*mathext.RegIncBeta(0.5*s.Nu, 0.5, t)
}
return 0.5 * mathext.RegIncBeta(s.Nu/2, 0.5, t)
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (s StudentsT) LogProb(x float64) float64 {
g1, _ := math.Lgamma((s.Nu + 1) / 2)
g2, _ := math.Lgamma(s.Nu / 2)
z := (x - s.Mu) / s.Sigma
return g1 - g2 - 0.5*math.Log(s.Nu) - 0.5*logPi - math.Log(s.Sigma) - ((s.Nu+1)/2)*math.Log(1+z*z/s.Nu)
}
// Mean returns the mean of the probability distribution.
func (s StudentsT) Mean() float64 {
return s.Mu
}
// Mode returns the mode of the distribution.
func (s StudentsT) Mode() float64 {
return s.Mu
}
// NumParameters returns the number of parameters in the distribution.
func (StudentsT) NumParameters() int {
return 3
}
// Prob computes the value of the probability density function at x.
func (s StudentsT) Prob(x float64) float64 {
return math.Exp(s.LogProb(x))
}
// Quantile returns the inverse of the cumulative distribution function.
func (s StudentsT) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
// F(x) = 1 - 0.5 * I_t(x)(nu/2, 1/2)
// t(x) = nu/(t^2 + nu)
if p == 0.5 {
return s.Mu
}
var y float64
if p > 0.5 {
// Know t > 0
t := mathext.InvRegIncBeta(s.Nu/2, 0.5, 2*(1-p))
y = math.Sqrt(s.Nu * (1 - t) / t)
} else {
t := mathext.InvRegIncBeta(s.Nu/2, 0.5, 2*p)
y = -math.Sqrt(s.Nu * (1 - t) / t)
}
// Convert out of standard normal
return y*s.Sigma + s.Mu
}
// Rand returns a random sample drawn from the distribution.
func (s StudentsT) Rand() float64 {
// http://www.math.uah.edu/stat/special/Student.html
n := Normal{0, 1, s.Src}.Rand()
c := Gamma{s.Nu / 2, 0.5, s.Src}.Rand()
z := n / math.Sqrt(c/s.Nu)
return z*s.Sigma + s.Mu
}
// StdDev returns the standard deviation of the probability distribution.
//
// The standard deviation is undefined for ν <= 1, and this returns math.NaN().
func (s StudentsT) StdDev() float64 {
return math.Sqrt(s.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (s StudentsT) Survival(x float64) float64 {
// transform to standard normal
y := (x - s.Mu) / s.Sigma
if y == 0 {
return 0.5
}
// For t > 0
// F(y) = 1 - 0.5 * I_t(y)(nu/2, 1/2)
// t(y) = nu/(y^2 + nu)
// and 1 - F(y) for t < 0
t := s.Nu / (y*y + s.Nu)
if y > 0 {
return 0.5 * mathext.RegIncBeta(s.Nu/2, 0.5, t)
}
return 1 - 0.5*mathext.RegIncBeta(s.Nu/2, 0.5, t)
}
// Variance returns the variance of the probability distribution.
//
// The variance is undefined for ν <= 1, and this returns math.NaN().
func (s StudentsT) Variance() float64 {
if s.Nu < 1 {
return math.NaN()
}
if s.Nu <= 2 {
return math.Inf(1)
}
return s.Sigma * s.Sigma * s.Nu / (s.Nu - 2)
}

View File

@@ -0,0 +1,83 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
"sort"
"testing"
"github.com/gonum/floats"
)
func TestStudentsTProb(t *testing.T) {
for _, test := range []struct {
x, mu, sigma, nu, want float64
}{
// Values comparison with scipy.
{0.01, 0, 1, 2.74, 0.364778548181318},
{-0.01, 0, 1, 2.74, 0.364778548181318},
{0.4, 0, 1, 1.6, 0.30376391362582678},
{-0.4, 0, 1, 1.6, 0.30376391362582678},
{0.2, 15, 5, 10, 0.0024440848858034393},
} {
pdf := StudentsT{test.mu, test.sigma, test.nu, nil}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Pdf mismatch, x = %v, Nu = %v. Got %v, want %v", test.x, test.nu, pdf, test.want)
}
}
}
func TestStudentsT(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, b := range []StudentsT{
{0, 1, 3.3, src},
{0, 1, 7.2, src},
{0, 1, 12, src},
{0.9, 0.8, 6, src},
} {
testStudentsT(t, b, i)
}
}
func testStudentsT(t *testing.T, c StudentsT, i int) {
tol := 1e-2
const n = 1e6
const bins = 50
x := make([]float64, n)
generateSamples(x, c)
sort.Float64s(x)
testRandLogProbContinuous(t, i, math.Inf(-1), x, c, tol, bins)
checkMean(t, i, x, c, tol)
if c.Nu > 2 {
checkVarAndStd(t, i, x, c, tol)
}
checkProbContinuous(t, i, x, c, 1e-3)
checkQuantileCDFSurvival(t, i, x, c, tol)
checkProbQuantContinuous(t, i, x, c, tol)
}
func TestStudentsTQuantile(t *testing.T) {
nSteps := 101
probs := make([]float64, nSteps)
floats.Span(probs, 0, 1)
for i, b := range []StudentsT{
{0, 1, 3.3, nil},
{0, 1, 7.2, nil},
{0, 1, 12, nil},
{0.9, 0.8, 6, nil},
} {
for _, p := range probs {
x := b.Quantile(p)
p2 := b.CDF(x)
if !floats.EqualWithinAbsOrRel(p, p2, 1e-10, 1e-10) {
t.Errorf("mismatch between CDF and Quantile. Case %v. Want %v, got %v", i, p, p2)
break
}
}
}
}

159
stat/distuv/uniform.go Normal file
View File

@@ -0,0 +1,159 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/rand"
)
// UnitUniform is an instantiation of the uniform distribution with Min = 0
// and Max = 1.
var UnitUniform = Uniform{Min: 0, Max: 1}
// Uniform represents a continuous uniform distribution (https://en.wikipedia.org/wiki/Uniform_distribution_%28continuous%29).
type Uniform struct {
Min float64
Max float64
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (u Uniform) CDF(x float64) float64 {
if x < u.Min {
return 0
}
if x > u.Max {
return 1
}
return (x - u.Min) / (u.Max - u.Min)
}
// Uniform doesn't have any of the DLogProbD? because the derivative is 0 everywhere
// except where it's undefined
// Entropy returns the entropy of the distribution.
func (u Uniform) Entropy() float64 {
return math.Log(u.Max - u.Min)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (Uniform) ExKurtosis() float64 {
return -6.0 / 5.0
}
// Uniform doesn't have Fit because it's a bad idea to fit a uniform from data.
// LogProb computes the natural logarithm of the value of the probability density function at x.
func (u Uniform) LogProb(x float64) float64 {
if x < u.Min {
return math.Inf(-1)
}
if x > u.Max {
return math.Inf(-1)
}
return -math.Log(u.Max - u.Min)
}
// MarshalParameters implements the ParameterMarshaler interface
func (u Uniform) MarshalParameters(p []Parameter) {
if len(p) != u.NumParameters() {
panic("uniform: improper parameter length")
}
p[0].Name = "Min"
p[0].Value = u.Min
p[1].Name = "Max"
p[1].Value = u.Max
return
}
// Mean returns the mean of the probability distribution.
func (u Uniform) Mean() float64 {
return (u.Max + u.Min) / 2
}
// Median returns the median of the probability distribution.
func (u Uniform) Median() float64 {
return (u.Max + u.Min) / 2
}
// Uniform doesn't have a mode because it's any value in the distribution
// NumParameters returns the number of parameters in the distribution.
func (Uniform) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (u Uniform) Prob(x float64) float64 {
if x < u.Min {
return 0
}
if x > u.Max {
return 0
}
return 1 / (u.Max - u.Min)
}
// Quantile returns the inverse of the cumulative probability distribution.
func (u Uniform) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return p*(u.Max-u.Min) + u.Min
}
// Rand returns a random sample drawn from the distribution.
func (u Uniform) Rand() float64 {
var rnd float64
if u.Source == nil {
rnd = rand.Float64()
} else {
rnd = u.Source.Float64()
}
return rnd*(u.Max-u.Min) + u.Min
}
// Skewness returns the skewness of the distribution.
func (Uniform) Skewness() float64 {
return 0
}
// StdDev returns the standard deviation of the probability distribution.
func (u Uniform) StdDev() float64 {
return math.Sqrt(u.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (u Uniform) Survival(x float64) float64 {
if x < u.Min {
return 1
}
if x > u.Max {
return 0
}
return (u.Max - x) / (u.Max - u.Min)
}
// UnmarshalParameters implements the ParameterMarshaler interface
func (u *Uniform) UnmarshalParameters(p []Parameter) {
if len(p) != u.NumParameters() {
panic("uniform: incorrect number of parameters to set")
}
if p[0].Name != "Min" {
panic("uniform: " + panicNameMismatch)
}
if p[1].Name != "Max" {
panic("uniform: " + panicNameMismatch)
}
u.Min = p[0].Value
u.Max = p[1].Value
}
// Variance returns the variance of the probability distribution.
func (u Uniform) Variance() float64 {
return 1.0 / 12.0 * (u.Max - u.Min) * (u.Max - u.Min)
}

247
stat/distuv/weibull.go Normal file
View File

@@ -0,0 +1,247 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"math/cmplx"
"math/rand"
)
// Weibull distribution. Valid range for x is [0,+∞).
type Weibull struct {
// Shape parameter of the distribution. A value of 1 represents
// the exponential distribution. A value of 2 represents the
// Rayleigh distribution. Valid range is (0,+∞).
K float64
// Scale parameter of the distribution. Valid range is (0,+∞).
Lambda float64
// Source of random numbers
Source *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (w Weibull) CDF(x float64) float64 {
if x < 0 {
return 0
} else {
return 1 - cmplx.Abs(cmplx.Exp(w.LogCDF(x)))
}
}
// Entropy returns the entropy of the distribution.
func (w Weibull) Entropy() float64 {
return eulerGamma*(1-1/w.K) + math.Log(w.Lambda/w.K) + 1
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (w Weibull) ExKurtosis() float64 {
return (-6*w.gammaIPow(1, 4) + 12*w.gammaIPow(1, 2)*math.Gamma(1+2/w.K) - 3*w.gammaIPow(2, 2) - 4*math.Gamma(1+1/w.K)*math.Gamma(1+3/w.K) + math.Gamma(1+4/w.K)) / math.Pow(math.Gamma(1+2/w.K)-w.gammaIPow(1, 2), 2)
}
// gammIPow is a shortcut for computing the gamma function to a power.
func (w Weibull) gammaIPow(i, pow float64) float64 {
return math.Pow(math.Gamma(1+i/w.K), pow)
}
// LogCDF computes the value of the log of the cumulative density function at x.
func (w Weibull) LogCDF(x float64) complex128 {
if x < 0 {
return 0
} else {
return cmplx.Log(-1) + complex(-math.Pow(x/w.Lambda, w.K), 0)
}
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x. Zero is returned if x is less than zero.
//
// Special cases occur when x == 0, and the result depends on the shape
// parameter as follows:
// If 0 < K < 1, LogProb returns +Inf.
// If K == 1, LogProb returns 0.
// If K > 1, LogProb returns -Inf.
func (w Weibull) LogProb(x float64) float64 {
if x < 0 {
return 0
} else {
return math.Log(w.K) - math.Log(w.Lambda) + (w.K-1)*(math.Log(x)-math.Log(w.Lambda)) - math.Pow(x/w.Lambda, w.K)
}
}
// Survival returns the log of the survival function (complementary CDF) at x.
func (w Weibull) LogSurvival(x float64) float64 {
if x < 0 {
return 0
} else {
return -math.Pow(x/w.Lambda, w.K)
}
}
// Mean returns the mean of the probability distribution.
func (w Weibull) Mean() float64 {
return w.Lambda * math.Gamma(1+1/w.K)
}
// Median returns the median of the normal distribution.
func (w Weibull) Median() float64 {
return w.Lambda * math.Pow(ln2, 1/w.K)
}
// Mode returns the mode of the normal distribution.
//
// The mode is NaN in the special case where the K (shape) parameter
// is less than 1.
func (w Weibull) Mode() float64 {
if w.K > 1 {
return w.Lambda * math.Pow((w.K-1)/w.K, 1/w.K)
} else if w.K == 1 {
return 0
} else {
return math.NaN()
}
}
// NumParameters returns the number of parameters in the distribution.
func (Weibull) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (w Weibull) Prob(x float64) float64 {
if x < 0 {
return 0
} else {
return math.Exp(w.LogProb(x))
}
}
// Quantile returns the inverse of the cumulative probability distribution.
func (w Weibull) Quantile(p float64) float64 {
if p < 0 || p > 1 {
panic(badPercentile)
}
return w.Lambda * math.Pow(-math.Log(1-p), 1/w.K)
}
// Rand returns a random sample drawn from the distribution.
func (w Weibull) Rand() float64 {
var rnd float64
if w.Source == nil {
rnd = rand.Float64()
} else {
rnd = w.Source.Float64()
}
return w.Quantile(rnd)
}
// Score returns the score function with respect to the parameters of the
// distribution at the input location x. The score function is the derivative
// of the log-likelihood at x with respect to the parameters
// (∂/∂θ) log(p(x;θ))
// If deriv is non-nil, len(deriv) must equal the number of parameters otherwise
// Score will panic, and the derivative is stored in-place into deriv. If deriv
// is nil a new slice will be allocated and returned.
//
// The order is [∂LogProb / ∂K, ∂LogProb / ∂λ].
//
// For more information, see https://en.wikipedia.org/wiki/Score_%28statistics%29.
//
// Special cases:
// Score(0) = [NaN, NaN]
func (w Weibull) Score(deriv []float64, x float64) []float64 {
if deriv == nil {
deriv = make([]float64, w.NumParameters())
}
if len(deriv) != w.NumParameters() {
panic(badLength)
}
if x > 0 {
deriv[0] = 1/w.K + math.Log(x) - math.Log(w.Lambda) - (math.Log(x)-math.Log(w.Lambda))*math.Pow(x/w.Lambda, w.K)
deriv[1] = (w.K * (math.Pow(x/w.Lambda, w.K) - 1)) / w.Lambda
return deriv
}
if x < 0 {
deriv[0] = 0
deriv[1] = 0
return deriv
}
deriv[0] = math.NaN()
deriv[0] = math.NaN()
return deriv
}
// ScoreInput returns the score function with respect to the input of the
// distribution at the input location specified by x. The score function is the
// derivative of the log-likelihood
// (d/dx) log(p(x)) .
//
// Special cases:
// ScoreInput(0) = NaN
func (w Weibull) ScoreInput(x float64) float64 {
if x > 0 {
return (-w.K*math.Pow(x/w.Lambda, w.K) + w.K - 1) / x
}
if x < 0 {
return 0
}
return math.NaN()
}
// Skewness returns the skewness of the distribution.
func (w Weibull) Skewness() float64 {
stdDev := w.StdDev()
firstGamma, firstGammaSign := math.Lgamma(1 + 3/w.K)
logFirst := firstGamma + 3*(math.Log(w.Lambda)-math.Log(stdDev))
logSecond := math.Log(3) + math.Log(w.Mean()) + 2*math.Log(stdDev) - 3*math.Log(stdDev)
logThird := 3 * (math.Log(w.Mean()) - math.Log(stdDev))
return float64(firstGammaSign)*math.Exp(logFirst) - math.Exp(logSecond) - math.Exp(logThird)
}
// StdDev returns the standard deviation of the probability distribution.
func (w Weibull) StdDev() float64 {
return math.Sqrt(w.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (w Weibull) Survival(x float64) float64 {
return math.Exp(w.LogSurvival(x))
}
// setParameters modifies the parameters of the distribution.
func (w *Weibull) setParameters(p []Parameter) {
if len(p) != w.NumParameters() {
panic("weibull: incorrect number of parameters to set")
}
if p[0].Name != "K" {
panic("weibull: " + panicNameMismatch)
}
if p[1].Name != "λ" {
panic("weibull: " + panicNameMismatch)
}
w.K = p[0].Value
w.Lambda = p[1].Value
}
// Variance returns the variance of the probability distribution.
func (w Weibull) Variance() float64 {
return math.Pow(w.Lambda, 2) * (math.Gamma(1+2/w.K) - w.gammaIPow(1, 2))
}
// parameters returns the parameters of the distribution.
func (w Weibull) parameters(p []Parameter) []Parameter {
nParam := w.NumParameters()
if p == nil {
p = make([]Parameter, nParam)
} else if len(p) != nParam {
panic("weibull: improper parameter length")
}
p[0].Name = "K"
p[0].Value = w.K
p[1].Name = "λ"
p[1].Value = w.Lambda
return p
}

209
stat/distuv/weibull_test.go Normal file
View File

@@ -0,0 +1,209 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"testing"
)
func TestHalfKStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: math.Inf(1),
cumProb: 0,
logProb: math.Inf(1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 0.183939720585721,
cumProb: 0.632120558828558,
logProb: -1.693147180559950,
},
{
loc: 20,
prob: 0.001277118038048,
cumProb: 0.988577109006533,
logProb: -6.663149272336520,
},
}
testDistributionProbs(t, Weibull{K: 0.5, Lambda: 1}, "0.5K Standard Weibull", pts)
}
func TestExponentialStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: 1,
cumProb: 0,
logProb: math.Inf(1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 0.367879441171442,
cumProb: 0.632120558828558,
logProb: -1.0,
},
{
loc: 20,
prob: 0.000000002061154,
cumProb: 0.999999997938846,
logProb: -20.0,
},
}
testDistributionProbs(t, Weibull{K: 1, Lambda: 1}, "1K (Exponential) Standard Weibull", pts)
}
func TestRayleighStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: 0,
cumProb: 0,
logProb: math.Inf(-1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 0.735758882342885,
cumProb: 0.632120558828558,
logProb: -0.306852819440055,
},
{
loc: 20,
prob: 0,
cumProb: 1,
logProb: -396.31112054588607,
},
}
testDistributionProbs(t, Weibull{K: 2, Lambda: 1}, "2K (Rayleigh) Standard Weibull", pts)
}
func TestFiveKStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: 0,
cumProb: 0,
logProb: math.Inf(-1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 1.839397205857210,
cumProb: 0.632120558828558,
logProb: 0.609437912434100,
},
{
loc: 20,
prob: 0,
cumProb: 1,
logProb: -3199986.4076329935,
},
}
testDistributionProbs(t, Weibull{K: 5, Lambda: 1}, "5K Standard Weibull", pts)
}
func TestScaledUpHalfKStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: math.Inf(1),
cumProb: 0,
logProb: math.Inf(1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 0.180436508682207,
cumProb: 0.558022622759326,
logProb: -1.712376315541750,
},
{
loc: 20,
prob: 0.002369136850928,
cumProb: 0.974047406098605,
logProb: -6.045229588092130,
},
}
testDistributionProbs(t, Weibull{K: 0.5, Lambda: 1.5}, "0.5K 1.5λ Weibull", pts)
}
func TestScaledDownHalfKStandardWeibullProb(t *testing.T) {
pts := []univariateProbPoint{
{
loc: 0,
prob: math.Inf(1),
cumProb: 0,
logProb: math.Inf(1),
},
{
loc: -1,
prob: 0,
cumProb: 0,
logProb: 0,
},
{
loc: 1,
prob: 0.171909491538362,
cumProb: 0.756883265565786,
logProb: -1.760787152653070,
},
{
loc: 20,
prob: 0.000283302579100,
cumProb: 0.998208237166091,
logProb: -8.168995047393730,
},
}
testDistributionProbs(t, Weibull{K: 0.5, Lambda: 0.5}, "0.5K 0.5λ Weibull", pts)
}
func TestWeibullScore(t *testing.T) {
for _, test := range []*Weibull{
{
K: 1,
Lambda: 1,
},
{
K: 2,
Lambda: 3.6,
},
{
K: 3.4,
Lambda: 8,
},
} {
testDerivParam(t, test)
}
}

81
stat/faithful_test.go Normal file
View File

@@ -0,0 +1,81 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
// faithful is the faithful data set from R.
var faithful = struct{ waiting, eruptions []float64 }{
waiting: []float64{
79, 54, 74, 62, 85, 55, 88, 85,
51, 85, 54, 84, 78, 47, 83, 52,
62, 84, 52, 79, 51, 47, 78, 69,
74, 83, 55, 76, 78, 79, 73, 77,
66, 80, 74, 52, 48, 80, 59, 90,
80, 58, 84, 58, 73, 83, 64, 53,
82, 59, 75, 90, 54, 80, 54, 83,
71, 64, 77, 81, 59, 84, 48, 82,
60, 92, 78, 78, 65, 73, 82, 56,
79, 71, 62, 76, 60, 78, 76, 83,
75, 82, 70, 65, 73, 88, 76, 80,
48, 86, 60, 90, 50, 78, 63, 72,
84, 75, 51, 82, 62, 88, 49, 83,
81, 47, 84, 52, 86, 81, 75, 59,
89, 79, 59, 81, 50, 85, 59, 87,
53, 69, 77, 56, 88, 81, 45, 82,
55, 90, 45, 83, 56, 89, 46, 82,
51, 86, 53, 79, 81, 60, 82, 77,
76, 59, 80, 49, 96, 53, 77, 77,
65, 81, 71, 70, 81, 93, 53, 89,
45, 86, 58, 78, 66, 76, 63, 88,
52, 93, 49, 57, 77, 68, 81, 81,
73, 50, 85, 74, 55, 77, 83, 83,
51, 78, 84, 46, 83, 55, 81, 57,
76, 84, 77, 81, 87, 77, 51, 78,
60, 82, 91, 53, 78, 46, 77, 84,
49, 83, 71, 80, 49, 75, 64, 76,
53, 94, 55, 76, 50, 82, 54, 75,
78, 79, 78, 78, 70, 79, 70, 54,
86, 50, 90, 54, 54, 77, 79, 64,
75, 47, 86, 63, 85, 82, 57, 82,
67, 74, 54, 83, 73, 73, 88, 80,
71, 83, 56, 79, 78, 84, 58, 83,
43, 60, 75, 81, 46, 90, 46, 74,
},
eruptions: []float64{
3.600, 1.800, 3.333, 2.283, 4.533, 2.883, 4.700, 3.600,
1.950, 4.350, 1.833, 3.917, 4.200, 1.750, 4.700, 2.167,
1.750, 4.800, 1.600, 4.250, 1.800, 1.750, 3.450, 3.067,
4.533, 3.600, 1.967, 4.083, 3.850, 4.433, 4.300, 4.467,
3.367, 4.033, 3.833, 2.017, 1.867, 4.833, 1.833, 4.783,
4.350, 1.883, 4.567, 1.750, 4.533, 3.317, 3.833, 2.100,
4.633, 2.000, 4.800, 4.716, 1.833, 4.833, 1.733, 4.883,
3.717, 1.667, 4.567, 4.317, 2.233, 4.500, 1.750, 4.800,
1.817, 4.400, 4.167, 4.700, 2.067, 4.700, 4.033, 1.967,
4.500, 4.000, 1.983, 5.067, 2.017, 4.567, 3.883, 3.600,
4.133, 4.333, 4.100, 2.633, 4.067, 4.933, 3.950, 4.517,
2.167, 4.000, 2.200, 4.333, 1.867, 4.817, 1.833, 4.300,
4.667, 3.750, 1.867, 4.900, 2.483, 4.367, 2.100, 4.500,
4.050, 1.867, 4.700, 1.783, 4.850, 3.683, 4.733, 2.300,
4.900, 4.417, 1.700, 4.633, 2.317, 4.600, 1.817, 4.417,
2.617, 4.067, 4.250, 1.967, 4.600, 3.767, 1.917, 4.500,
2.267, 4.650, 1.867, 4.167, 2.800, 4.333, 1.833, 4.383,
1.883, 4.933, 2.033, 3.733, 4.233, 2.233, 4.533, 4.817,
4.333, 1.983, 4.633, 2.017, 5.100, 1.800, 5.033, 4.000,
2.400, 4.600, 3.567, 4.000, 4.500, 4.083, 1.800, 3.967,
2.200, 4.150, 2.000, 3.833, 3.500, 4.583, 2.367, 5.000,
1.933, 4.617, 1.917, 2.083, 4.583, 3.333, 4.167, 4.333,
4.500, 2.417, 4.000, 4.167, 1.883, 4.583, 4.250, 3.767,
2.033, 4.433, 4.083, 1.833, 4.417, 2.183, 4.800, 1.833,
4.800, 4.100, 3.966, 4.233, 3.500, 4.366, 2.250, 4.667,
2.100, 4.350, 4.133, 1.867, 4.600, 1.783, 4.367, 3.850,
1.933, 4.500, 2.383, 4.700, 1.867, 3.833, 3.417, 4.233,
2.400, 4.800, 2.000, 4.150, 1.867, 4.267, 1.750, 4.483,
4.000, 4.117, 4.083, 4.267, 3.917, 4.550, 4.083, 2.417,
4.183, 2.217, 4.450, 1.883, 1.850, 4.283, 3.950, 2.333,
4.150, 2.350, 4.933, 2.900, 4.583, 3.833, 2.083, 4.367,
2.133, 4.350, 2.200, 4.450, 3.567, 4.500, 4.150, 3.817,
3.917, 4.450, 2.000, 4.283, 4.767, 4.533, 1.850, 4.250,
1.983, 2.250, 4.750, 4.117, 2.150, 4.417, 1.817, 4.467,
},
}

611
stat/moments_bench_test.go Normal file
View File

@@ -0,0 +1,611 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// a set of benchmarks to evaluate the performance of the various
// moment statistics: Mean, Variance, StdDev, MeanVariance, MeanStdDev,
// Covariance, Correlation, Skew, ExKurtosis, Moment, MomentAbout, ...
//
// It tests both weighted and unweighted versions by using a slice of
// all ones.
package stat
import (
"math/rand"
"testing"
)
const (
small = 10
medium = 1000
large = 100000
huge = 10000000
)
// tests for unweighted versions
func RandomSlice(l int) []float64 {
s := make([]float64, l)
for i := range s {
s[i] = rand.Float64()
}
return s
}
func benchmarkMean(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Mean(s, wts)
}
}
func BenchmarkMeanSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkMean(b, s, nil)
}
func BenchmarkMeanMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkMean(b, s, nil)
}
func BenchmarkMeanLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkMean(b, s, nil)
}
func BenchmarkMeanHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkMean(b, s, nil)
}
func BenchmarkMeanSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkMean(b, s, wts)
}
func BenchmarkMeanMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkMean(b, s, wts)
}
func BenchmarkMeanLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkMean(b, s, wts)
}
func BenchmarkMeanHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkMean(b, s, wts)
}
func benchmarkVariance(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Variance(s, wts)
}
}
func BenchmarkVarianceSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkVariance(b, s, nil)
}
func BenchmarkVarianceMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkVariance(b, s, nil)
}
func BenchmarkVarianceLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkVariance(b, s, nil)
}
func BenchmarkVarianceHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkVariance(b, s, nil)
}
func BenchmarkVarianceSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkVariance(b, s, wts)
}
func BenchmarkVarianceMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkVariance(b, s, wts)
}
func BenchmarkVarianceLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkVariance(b, s, wts)
}
func BenchmarkVarianceHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkVariance(b, s, wts)
}
func benchmarkStdDev(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
StdDev(s, wts)
}
}
func BenchmarkStdDevSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkStdDev(b, s, nil)
}
func BenchmarkStdDevMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkStdDev(b, s, nil)
}
func BenchmarkStdDevLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkStdDev(b, s, nil)
}
func BenchmarkStdDevHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkStdDev(b, s, nil)
}
func BenchmarkStdDevSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkStdDev(b, s, wts)
}
func BenchmarkStdDevMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkStdDev(b, s, wts)
}
func BenchmarkStdDevLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkStdDev(b, s, wts)
}
func BenchmarkStdDevHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkStdDev(b, s, wts)
}
func benchmarkMeanVariance(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
MeanVariance(s, wts)
}
}
func BenchmarkMeanVarianceSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkMeanVariance(b, s, nil)
}
func BenchmarkMeanVarianceMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkMeanVariance(b, s, nil)
}
func BenchmarkMeanVarianceLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkMeanVariance(b, s, nil)
}
func BenchmarkMeanVarianceHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkMeanVariance(b, s, nil)
}
func BenchmarkMeanVarianceSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkMeanVariance(b, s, wts)
}
func BenchmarkMeanVarianceMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkMeanVariance(b, s, wts)
}
func BenchmarkMeanVarianceLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkMeanVariance(b, s, wts)
}
func BenchmarkMeanVarianceHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkMeanVariance(b, s, wts)
}
func benchmarkMeanStdDev(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
MeanStdDev(s, wts)
}
}
func BenchmarkMeanStdDevSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkMeanStdDev(b, s, nil)
}
func BenchmarkMeanStdDevMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkMeanStdDev(b, s, nil)
}
func BenchmarkMeanStdDevLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkMeanStdDev(b, s, nil)
}
func BenchmarkMeanStdDevHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkMeanStdDev(b, s, nil)
}
func BenchmarkMeanStdDevSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkMeanStdDev(b, s, wts)
}
func BenchmarkMeanStdDevMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkMeanStdDev(b, s, wts)
}
func BenchmarkMeanStdDevLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkMeanStdDev(b, s, wts)
}
func BenchmarkMeanStdDevHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkMeanStdDev(b, s, wts)
}
func benchmarkCovariance(b *testing.B, s1, s2, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Covariance(s1, s2, wts)
}
}
func BenchmarkCovarianceSmall(b *testing.B) {
s1 := RandomSlice(small)
s2 := RandomSlice(small)
benchmarkCovariance(b, s1, s2, nil)
}
func BenchmarkCovarianceMedium(b *testing.B) {
s1 := RandomSlice(medium)
s2 := RandomSlice(medium)
benchmarkCovariance(b, s1, s2, nil)
}
func BenchmarkCovarianceLarge(b *testing.B) {
s1 := RandomSlice(large)
s2 := RandomSlice(large)
benchmarkCovariance(b, s1, s2, nil)
}
func BenchmarkCovarianceHuge(b *testing.B) {
s1 := RandomSlice(huge)
s2 := RandomSlice(huge)
benchmarkCovariance(b, s1, s2, nil)
}
func BenchmarkCovarianceSmallWeighted(b *testing.B) {
s1 := RandomSlice(small)
s2 := RandomSlice(small)
wts := RandomSlice(small)
benchmarkCovariance(b, s1, s2, wts)
}
func BenchmarkCovarianceMediumWeighted(b *testing.B) {
s1 := RandomSlice(medium)
s2 := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkCovariance(b, s1, s2, wts)
}
func BenchmarkCovarianceLargeWeighted(b *testing.B) {
s1 := RandomSlice(large)
s2 := RandomSlice(large)
wts := RandomSlice(large)
benchmarkCovariance(b, s1, s2, wts)
}
func BenchmarkCovarianceHugeWeighted(b *testing.B) {
s1 := RandomSlice(huge)
s2 := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkCovariance(b, s1, s2, wts)
}
func benchmarkCorrelation(b *testing.B, s1, s2, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Correlation(s1, s2, wts)
}
}
func BenchmarkCorrelationSmall(b *testing.B) {
s1 := RandomSlice(small)
s2 := RandomSlice(small)
benchmarkCorrelation(b, s1, s2, nil)
}
func BenchmarkCorrelationMedium(b *testing.B) {
s1 := RandomSlice(medium)
s2 := RandomSlice(medium)
benchmarkCorrelation(b, s1, s2, nil)
}
func BenchmarkCorrelationLarge(b *testing.B) {
s1 := RandomSlice(large)
s2 := RandomSlice(large)
benchmarkCorrelation(b, s1, s2, nil)
}
func BenchmarkCorrelationHuge(b *testing.B) {
s1 := RandomSlice(huge)
s2 := RandomSlice(huge)
benchmarkCorrelation(b, s1, s2, nil)
}
func BenchmarkCorrelationSmallWeighted(b *testing.B) {
s1 := RandomSlice(small)
s2 := RandomSlice(small)
wts := RandomSlice(small)
benchmarkCorrelation(b, s1, s2, wts)
}
func BenchmarkCorrelationMediumWeighted(b *testing.B) {
s1 := RandomSlice(medium)
s2 := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkCorrelation(b, s1, s2, wts)
}
func BenchmarkCorrelationLargeWeighted(b *testing.B) {
s1 := RandomSlice(large)
s2 := RandomSlice(large)
wts := RandomSlice(large)
benchmarkCorrelation(b, s1, s2, wts)
}
func BenchmarkCorrelationHugeWeighted(b *testing.B) {
s1 := RandomSlice(huge)
s2 := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkCorrelation(b, s1, s2, wts)
}
func benchmarkSkew(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Skew(s, wts)
}
}
func BenchmarkSkewSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkSkew(b, s, nil)
}
func BenchmarkSkewMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkSkew(b, s, nil)
}
func BenchmarkSkewLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkSkew(b, s, nil)
}
func BenchmarkSkewHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkSkew(b, s, nil)
}
func BenchmarkSkewSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkSkew(b, s, wts)
}
func BenchmarkSkewMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkSkew(b, s, wts)
}
func BenchmarkSkewLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkSkew(b, s, wts)
}
func BenchmarkSkewHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkSkew(b, s, wts)
}
func benchmarkExKurtosis(b *testing.B, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
ExKurtosis(s, wts)
}
}
func BenchmarkExKurtosisSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkExKurtosis(b, s, nil)
}
func BenchmarkExKurtosisMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkExKurtosis(b, s, nil)
}
func BenchmarkExKurtosisLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkExKurtosis(b, s, nil)
}
func BenchmarkExKurtosisHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkExKurtosis(b, s, nil)
}
func BenchmarkExKurtosisSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkExKurtosis(b, s, wts)
}
func BenchmarkExKurtosisMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkExKurtosis(b, s, wts)
}
func BenchmarkExKurtosisLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkExKurtosis(b, s, wts)
}
func BenchmarkExKurtosisHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkExKurtosis(b, s, wts)
}
func benchmarkMoment(b *testing.B, n float64, s, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
Moment(n, s, wts)
}
}
func BenchmarkMomentSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkMoment(b, 5, s, nil)
}
func BenchmarkMomentMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkMoment(b, 5, s, nil)
}
func BenchmarkMomentLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkMoment(b, 5, s, nil)
}
func BenchmarkMomentHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkMoment(b, 5, s, nil)
}
func BenchmarkMomentSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkMoment(b, 5, s, wts)
}
func BenchmarkMomentMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkMoment(b, 5, s, wts)
}
func BenchmarkMomentLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkMoment(b, 5, s, wts)
}
func BenchmarkMomentHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkMoment(b, 5, s, wts)
}
func benchmarkMomentAbout(b *testing.B, n float64, s []float64, mean float64, wts []float64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
MomentAbout(n, s, mean, wts)
}
}
func BenchmarkMomentAboutSmall(b *testing.B) {
s := RandomSlice(small)
benchmarkMomentAbout(b, 5, s, 0, nil)
}
func BenchmarkMomentAboutMedium(b *testing.B) {
s := RandomSlice(medium)
benchmarkMomentAbout(b, 5, s, 0, nil)
}
func BenchmarkMomentAboutLarge(b *testing.B) {
s := RandomSlice(large)
benchmarkMomentAbout(b, 5, s, 0, nil)
}
func BenchmarkMomentAboutHuge(b *testing.B) {
s := RandomSlice(huge)
benchmarkMomentAbout(b, 5, s, 0, nil)
}
func BenchmarkMomentAboutSmallWeighted(b *testing.B) {
s := RandomSlice(small)
wts := RandomSlice(small)
benchmarkMomentAbout(b, 5, s, 0, wts)
}
func BenchmarkMomentAboutMediumWeighted(b *testing.B) {
s := RandomSlice(medium)
wts := RandomSlice(medium)
benchmarkMomentAbout(b, 5, s, 0, wts)
}
func BenchmarkMomentAboutLargeWeighted(b *testing.B) {
s := RandomSlice(large)
wts := RandomSlice(large)
benchmarkMomentAbout(b, 5, s, 0, wts)
}
func BenchmarkMomentAboutHugeWeighted(b *testing.B) {
s := RandomSlice(huge)
wts := RandomSlice(huge)
benchmarkMomentAbout(b, 5, s, 0, wts)
}

320
stat/pca_cca.go Normal file
View File

@@ -0,0 +1,320 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"errors"
"math"
"github.com/gonum/floats"
"github.com/gonum/matrix"
"github.com/gonum/matrix/mat64"
)
// PC is a type for computing and extracting the principal components of a
// matrix. The results of the principal components analysis are only valid
// if the call to PrincipalComponents was successful.
type PC struct {
n, d int
weights []float64
svd *mat64.SVD
ok bool
}
// PrincipalComponents performs a weighted principal components analysis on the
// matrix of the input data which is represented as an n×d matrix a where each
// row is an observation and each column is a variable.
//
// PrincipalComponents centers the variables but does not scale the variance.
//
// The weights slice is used to weight the observations. If weights is nil, each
// weight is considered to have a value of one, otherwise the length of weights
// must match the number of observations or PrincipalComponents will panic.
//
// PrincipalComponents returns whether the analysis was successful.
func (c *PC) PrincipalComponents(a mat64.Matrix, weights []float64) (ok bool) {
c.n, c.d = a.Dims()
if weights != nil && len(weights) != c.n {
panic("stat: len(weights) != observations")
}
c.svd, c.ok = svdFactorizeCentered(c.svd, a, weights)
if c.ok {
c.weights = append(c.weights[:0], weights...)
}
return c.ok
}
// Vectors returns the component direction vectors of a principal components
// analysis. The vectors are returned in the columns of a d×min(n, d) matrix.
// If dst is not nil it must either be zero-sized or be a d×min(n, d) matrix.
// dst will be used as the destination for the direction vector data. If dst
// is nil, a new mat64.Dense is allocated for the destination.
func (c *PC) Vectors(dst *mat64.Dense) *mat64.Dense {
if !c.ok {
panic("stat: use of unsuccessful principal components analysis")
}
if dst == nil {
dst = &mat64.Dense{}
} else if d, n := dst.Dims(); (n != 0 || d != 0) && (d != c.d || n != min(c.n, c.d)) {
panic(matrix.ErrShape)
}
dst.VFromSVD(c.svd)
return dst
}
// Vars returns the column variances of the principal component scores,
// b * vecs, where b is a matrix with centered columns. Variances are returned
// in descending order.
// If dst is not nil it is used to store the variances and returned.
// Vars will panic if the receiver has not successfully performed a principal
// components analysis or dst is not nil and the length of dst is not min(n, d).
func (c *PC) Vars(dst []float64) []float64 {
if !c.ok {
panic("stat: use of unsuccessful principal components analysis")
}
if dst != nil && len(dst) != min(c.n, c.d) {
panic("stat: length of slice does not match analysis")
}
dst = c.svd.Values(dst)
var f float64
if c.weights == nil {
f = 1 / float64(c.n-1)
} else {
f = 1 / (floats.Sum(c.weights) - 1)
}
for i, v := range dst {
dst[i] = f * v * v
}
return dst
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// CC is a type for computing the canonical correlations of a pair of matrices.
// The results of the canonical correlation analysis are only valid
// if the call to CanonicalCorrelations was successful.
type CC struct {
// n is the number of observations used to
// construct the canonical correlations.
n int
// xd and yd are used for size checks.
xd, yd int
x, y, c *mat64.SVD
ok bool
}
// CanonicalCorrelations returns a CC which can provide the results of canonical
// correlation analysis of the input data x and y, columns of which should be
// interpretable as two sets of measurements on the same observations (rows).
// These observations are optionally weighted by weights.
//
// Canonical correlation analysis finds associations between two sets of
// variables on the same observations by finding linear combinations of the two
// sphered datasets that maximize the correlation between them.
//
// Some notation: let Xc and Yc denote the centered input data matrices x
// and y (column means subtracted from each column), let Sx and Sy denote the
// sample covariance matrices within x and y respectively, and let Sxy denote
// the covariance matrix between x and y. The sphered data can then be expressed
// as Xc * Sx^{-1/2} and Yc * Sy^{-1/2} respectively, and the correlation matrix
// between the sphered data is called the canonical correlation matrix,
// Sx^{-1/2} * Sxy * Sy^{-1/2}. In cases where S^{-1/2} is ambiguous for some
// covariance matrix S, S^{-1/2} is taken to be E * D^{-1/2} * E^T where S can
// be eigendecomposed as S = E * D * E^T.
//
// The canonical correlations are the correlations between the corresponding
// pairs of canonical variables and can be obtained with c.Corrs(). Canonical
// variables can be obtained by projecting the sphered data into the left and
// right eigenvectors of the canonical correlation matrix, and these
// eigenvectors can be obtained with c.Left(m, true) and c.Right(m, true)
// respectively. The canonical variables can also be obtained directly from the
// centered raw data by using the back-transformed eigenvectors which can be
// obtained with c.Left(m, false) and c.Right(m, false) respectively.
//
// The first pair of left and right eigenvectors of the canonical correlation
// matrix can be interpreted as directions into which the respective sphered
// data can be projected such that the correlation between the two projections
// is maximized. The second pair and onwards solve the same optimization but
// under the constraint that they are uncorrelated (orthogonal in sphered space)
// to previous projections.
//
// CanonicalCorrelations will panic if the inputs x and y do not have the same
// number of rows.
//
// The slice weights is used to weight the observations. If weights is nil, each
// weight is considered to have a value of one, otherwise the length of weights
// must match the number of observations (rows of both x and y) or
// CanonicalCorrelations will panic.
//
// More details can be found at
// https://en.wikipedia.org/wiki/Canonical_correlation
// or in Chapter 3 of
// Koch, Inge. Analysis of multivariate and high-dimensional data.
// Vol. 32. Cambridge University Press, 2013. ISBN: 9780521887939
func (c *CC) CanonicalCorrelations(x, y mat64.Matrix, weights []float64) error {
var yn int
c.n, c.xd = x.Dims()
yn, c.yd = y.Dims()
if c.n != yn {
panic("stat: unequal number of observations")
}
if weights != nil && len(weights) != c.n {
panic("stat: len(weights) != observations")
}
// Center and factorize x and y.
c.x, c.ok = svdFactorizeCentered(c.x, x, weights)
if !c.ok {
return errors.New("stat: failed to factorize x")
}
c.y, c.ok = svdFactorizeCentered(c.y, y, weights)
if !c.ok {
return errors.New("stat: failed to factorize y")
}
var xu, xv, yu, yv mat64.Dense
xu.UFromSVD(c.x)
xv.VFromSVD(c.x)
yu.UFromSVD(c.y)
yv.VFromSVD(c.y)
// Calculate and factorise the canonical correlation matrix.
var ccor mat64.Dense
ccor.Product(&xv, xu.T(), &yu, yv.T())
if c.c == nil {
c.c = &mat64.SVD{}
}
c.ok = c.c.Factorize(&ccor, matrix.SVDThin)
if !c.ok {
return errors.New("stat: failed to factorize ccor")
}
return nil
}
// Corrs returns the canonical correlations, using dst if it is not nil.
// If dst is not nil and len(dst) does not match the number of columns in
// the y input matrix, Corrs will panic.
func (c *CC) Corrs(dst []float64) []float64 {
if !c.ok {
panic("stat: canonical correlations missing or invalid")
}
if dst != nil && len(dst) != c.yd {
panic("stat: length of destination does not match input dimension")
}
return c.c.Values(dst)
}
// Left returns the left eigenvectors of the canonical correlation matrix if
// spheredSpace is true. If spheredSpace is false it returns these eigenvectors
// back-transformed to the original data space.
// If dst is not nil it must either be zero-sized or be an xd×yd matrix where xd
// and yd are the number of variables in the input x and y matrices. dst will
// be used as the destination for the vector data. If dst is nil, a new
// mat64.Dense is allocated for the destination.
func (c *CC) Left(dst *mat64.Dense, spheredSpace bool) *mat64.Dense {
if !c.ok || c.n < 2 {
panic("stat: canonical correlations missing or invalid")
}
if dst == nil {
dst = &mat64.Dense{}
} else if d, n := dst.Dims(); (n != 0 || d != 0) && (n != c.yd || d != c.xd) {
panic(matrix.ErrShape)
}
dst.UFromSVD(c.c)
if spheredSpace {
return dst
}
var xv mat64.Dense
xs := c.x.Values(nil)
xv.VFromSVD(c.x)
scaleColsReciSqrt(&xv, xs)
dst.Product(&xv, xv.T(), dst)
dst.Scale(math.Sqrt(float64(c.n-1)), dst)
return dst
}
// Right returns the right eigenvectors of the canonical correlation matrix if
// spheredSpace is true. If spheredSpace is false it returns these eigenvectors
// back-transformed to the original data space.
// If dst is not nil it must either be zero-sized or be an yd×yd matrix where yd
// is the number of variables in the input y matrix. dst will
// be used as the destination for the vector data. If dst is nil, a new
// mat64.Dense is allocated for the destination.
func (c *CC) Right(dst *mat64.Dense, spheredSpace bool) *mat64.Dense {
if !c.ok || c.n < 2 {
panic("stat: canonical correlations missing or invalid")
}
if dst == nil {
dst = &mat64.Dense{}
} else if d, n := dst.Dims(); (n != 0 || d != 0) && (n != c.yd || d != c.yd) {
panic(matrix.ErrShape)
}
dst.VFromSVD(c.c)
if spheredSpace {
return dst
}
var yv mat64.Dense
ys := c.y.Values(nil)
yv.VFromSVD(c.y)
scaleColsReciSqrt(&yv, ys)
dst.Product(&yv, yv.T(), dst)
dst.Scale(math.Sqrt(float64(c.n-1)), dst)
return dst
}
func svdFactorizeCentered(work *mat64.SVD, m mat64.Matrix, weights []float64) (svd *mat64.SVD, ok bool) {
n, d := m.Dims()
centered := mat64.NewDense(n, d, nil)
col := make([]float64, n)
for j := 0; j < d; j++ {
mat64.Col(col, j, m)
floats.AddConst(-Mean(col, weights), col)
centered.SetCol(j, col)
}
for i, w := range weights {
floats.Scale(math.Sqrt(w), centered.RawRowView(i))
}
if work == nil {
work = &mat64.SVD{}
}
ok = work.Factorize(centered, matrix.SVDThin)
return work, ok
}
// scaleColsReciSqrt scales the columns of cols
// by the reciprocal square-root of vals.
func scaleColsReciSqrt(cols *mat64.Dense, vals []float64) {
if cols == nil {
panic("stat: input nil")
}
n, d := cols.Dims()
if len(vals) != d {
panic("stat: input length mismatch")
}
col := make([]float64, n)
for j := 0; j < d; j++ {
mat64.Col(col, j, cols)
floats.Scale(math.Sqrt(1/vals[j]), col)
cols.SetCol(j, col)
}
}

60
stat/pca_example_test.go Normal file
View File

@@ -0,0 +1,60 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import (
"fmt"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
)
func ExamplePrincipalComponents() {
// iris is a truncated sample of the Fisher's Iris dataset.
n := 10
d := 4
iris := mat64.NewDense(n, d, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
})
// Calculate the principal component direction vectors
// and variances.
var pc stat.PC
ok := pc.PrincipalComponents(iris, nil)
if !ok {
return
}
fmt.Printf("variances = %.4f\n\n", pc.Vars(nil))
// Project the data onto the first 2 principal components.
k := 2
var proj mat64.Dense
proj.Mul(iris, pc.Vectors(nil).Slice(0, d, 0, k))
fmt.Printf("proj = %.4f", mat64.Formatted(&proj, mat64.Prefix(" ")))
// Output:
// variances = [0.1666 0.0207 0.0079 0.0019]
//
// proj = ⎡-6.1686 1.4659⎤
// ⎢-5.6767 1.6459⎥
// ⎢-5.6699 1.3642⎥
// ⎢-5.5643 1.3816⎥
// ⎢-6.1734 1.3309⎥
// ⎢-6.7278 1.4021⎥
// ⎢-5.7743 1.1498⎥
// ⎢-6.0466 1.4714⎥
// ⎢-5.2709 1.3570⎥
// ⎣-5.7533 1.6207⎦
}

183
stat/pca_test.go Normal file
View File

@@ -0,0 +1,183 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
func TestPrincipalComponents(t *testing.T) {
tests:
for i, test := range []struct {
data mat64.Matrix
weights []float64
wantVecs *mat64.Dense
wantVars []float64
epsilon float64
}{
// Test results verified using R.
{
data: mat64.NewDense(3, 3, []float64{
1, 2, 3,
4, 5, 6,
7, 8, 9,
}),
wantVecs: mat64.NewDense(3, 3, []float64{
0.5773502691896258, 0.8164965809277261, 0,
0.577350269189626, -0.4082482904638632, -0.7071067811865476,
0.5773502691896258, -0.4082482904638631, 0.7071067811865475,
}),
wantVars: []float64{27, 0, 0},
epsilon: 1e-12,
},
{ // Truncated iris data.
data: mat64.NewDense(10, 4, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
}),
wantVecs: mat64.NewDense(4, 4, []float64{
-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
}),
wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
epsilon: 1e-12,
},
{ // Truncated iris data to form wide matrix.
data: mat64.NewDense(3, 4, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
}),
wantVecs: mat64.NewDense(4, 3, []float64{
-0.5705187254552365, -0.7505979435049239, 0.08084520834544455,
-0.8166537769529318, 0.5615147645527523, -0.032338083338177705,
-0.08709186238359454, -0.3482870890450082, -0.22636658336724505,
0, 0, -0.9701425001453315,
}),
wantVars: []float64{0.0844692361537822, 0.022197430512884326, 0},
epsilon: 1e-12,
},
{ // Truncated iris data transposed to check for operation on fat input.
data: mat64.NewDense(10, 4, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
}).T(),
wantVecs: mat64.NewDense(10, 4, []float64{
-0.3366602459946619, -0.1373634006401213, 0.3465102523547623, -0.10290179303893479,
-0.31381852053861975, 0.5197145790632827, 0.5567296129086686, -0.15923062170153618,
-0.30857197637565165, -0.07670930360819002, 0.36159923003337235, 0.3342301027853355,
-0.29527124351656137, 0.16885455995353074, -0.5056204762881208, 0.32580913261444344,
-0.3327611073694004, -0.39365834489416474, 0.04900050959307464, 0.46812879383236555,
-0.34445484362044815, -0.2985206914561878, -0.1009714701361799, -0.16803618186050803,
-0.2986246350957691, -0.4222037823717799, -0.11838613462182519, -0.580283530375069,
-0.325911246223126, 0.024366468758217238, -0.12082035131864265, 0.16756027181337868,
-0.2814284432361538, 0.240812316260054, -0.24061437569068145, -0.365034616264623,
-0.31906138507685167, 0.4423912824105986, -0.2906412122303604, 0.027551046870337714,
}),
wantVars: []float64{41.8851906634233, 0.07762619213464989, 0.010516477775373585, 0},
epsilon: 1e-12,
},
{ // Truncated iris data unitary weights.
data: mat64.NewDense(10, 4, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
}),
weights: []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
wantVecs: mat64.NewDense(4, 4, []float64{
-0.6681110197952722, 0.7064764857539533, -0.14026590216895132, -0.18666578956412125,
-0.7166344774801547, -0.6427036135482664, -0.135650285905254, 0.23444848208629923,
-0.164411275166307, 0.11898477441068218, 0.9136367900709548, 0.35224901970831746,
-0.11415613655453069, -0.2714141920887426, 0.35664028439226514, -0.8866286823515034,
}),
wantVars: []float64{0.1665786313282786, 0.02065509475412993, 0.007944620317765855, 0.0019327647109368329},
epsilon: 1e-12,
},
{ // Truncated iris data non-unitary weights.
data: mat64.NewDense(10, 4, []float64{
5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
}),
weights: []float64{2, 3, 1, 1, 1, 1, 1, 1, 1, 2},
wantVecs: mat64.NewDense(4, 4, []float64{
-0.618936145422414, 0.763069301531647, 0.124857741232537, 0.138035623677211,
-0.763958271606519, -0.603881770702898, 0.118267155321333, -0.194184052457746,
-0.143552119754944, 0.090014599564871, -0.942209377020044, -0.289018426115945,
-0.112599271966947, -0.212012782487076, -0.287515067921680, 0.927203898682805,
}),
wantVars: []float64{0.129621985550623, 0.022417487771598, 0.006454461065715, 0.002495076601075},
epsilon: 1e-12,
},
} {
var pc PC
var vecs *mat64.Dense
var vars []float64
for j := 0; j < 2; j++ {
ok := pc.PrincipalComponents(test.data, test.weights)
vecs = pc.Vectors(vecs)
vars = pc.Vars(vars)
if !ok {
t.Errorf("unexpected SVD failure for test %d use %d", i, j)
continue tests
}
if !mat64.EqualApprox(vecs, test.wantVecs, test.epsilon) {
t.Errorf("%d use %d: unexpected PCA result got:\n%v\nwant:\n%v",
i, j, mat64.Formatted(vecs), mat64.Formatted(test.wantVecs))
}
if !approxEqual(vars, test.wantVars, test.epsilon) {
t.Errorf("%d use %d: unexpected variance result got:%v, want:%v",
i, j, vars, test.wantVars)
}
}
}
}
func approxEqual(a, b []float64, epsilon float64) bool {
if len(a) != len(b) {
return false
}
for i, v := range a {
if !floats.EqualWithinAbsOrRel(v, b[i], epsilon, epsilon) {
return false
}
}
return true
}

121
stat/roc.go Normal file
View File

@@ -0,0 +1,121 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import "sort"
// ROC returns paired false positive rate (FPR) and true positive rate
// (TPR) values corresponding to n cutoffs spanning the relative
// (or receiver) operator characteristic (ROC) curve obtained when y is
// treated as a binary classifier for classes with weights.
//
// Cutoffs are equally spaced from eps less than the minimum value of y
// to the maximum value of y, including both endpoints meaning that the
// resulting ROC curve will always begin at (0,0) and end at (1,1).
//
// The input y must be sorted, and SortWeightedLabeled can be used in
// order to sort y together with classes and weights.
//
// For a given cutoff value, observations corresponding to entries in y
// greater than the cutoff value are classified as false, while those
// below (or equal to) the cutoff value are classified as true. These
// assigned class labels are compared with the true values in the classes
// slice and used to calculate the FPR and TPR.
//
// If weights is nil, all weights are treated as 1.
//
// When n is zero all possible cutoffs are calculated, resulting
// in fpr and tpr having length one greater than the number of unique
// values in y. When n is greater than one fpr and tpr will be returned
// with length n. ROC will panic if n is equal to one or less than 0.
//
// More details about ROC curves are available at
// https://en.wikipedia.org/wiki/Receiver_operating_characteristic
func ROC(n int, y []float64, classes []bool, weights []float64) (tpr, fpr []float64) {
if len(y) != len(classes) {
panic("stat: slice length mismatch")
}
if weights != nil && len(y) != len(weights) {
panic("stat: slice length mismatch")
}
if !sort.Float64sAreSorted(y) {
panic("stat: input must be sorted")
}
var incWidth, tol float64
if n == 0 {
if len(y) == 0 {
return nil, nil
}
tpr = make([]float64, len(y)+1)
fpr = make([]float64, len(y)+1)
} else {
if n < 2 {
panic("stat: cannot calculate fewer than 2 points on a ROC curve")
}
if len(y) == 0 {
return nil, nil
}
tpr = make([]float64, n)
fpr = make([]float64, n)
incWidth = (y[len(y)-1] - y[0]) / float64(n-1)
tol = y[0] + incWidth
if incWidth == 0 {
tpr[n-1] = 1
fpr[n-1] = 1
return
}
}
var bin int = 1 // the initial bin is known to have 0 fpr and 0 tpr
var nPos, nNeg float64
for i, u := range classes {
var posWeight, negWeight float64 = 0, 1
if weights != nil {
negWeight = weights[i]
}
if u {
posWeight, negWeight = negWeight, posWeight
}
nPos += posWeight
nNeg += negWeight
tpr[bin] += posWeight
fpr[bin] += negWeight
// Assess if the bin needs to be updated. If n is zero,
// the bin is always updated, unless consecutive y values
// are equal. Otherwise, the bin must be updated until it
// matches the next y value (skipping empty bins).
if n == 0 {
if i != (len(y)-1) && y[i] != y[i+1] {
bin++
tpr[bin] = tpr[bin-1]
fpr[bin] = fpr[bin-1]
}
} else {
for i != (len(y)-1) && y[i+1] > tol {
tol += incWidth
bin++
tpr[bin] = tpr[bin-1]
fpr[bin] = fpr[bin-1]
}
}
}
if n == 0 {
tpr = tpr[:(bin + 1)]
fpr = fpr[:(bin + 1)]
}
invNeg := 1 / nNeg
invPos := 1 / nPos
for i := range tpr {
tpr[i] *= invPos
fpr[i] *= invNeg
}
tpr[len(tpr)-1] = 1
fpr[len(fpr)-1] = 1
return tpr, fpr
}

56
stat/roc_example_test.go Normal file
View File

@@ -0,0 +1,56 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat_test
import (
"fmt"
"github.com/gonum/integrate"
"github.com/gonum/stat"
)
func ExampleROC_unweighted() {
y := []float64{0, 3, 5, 6, 7.5, 8}
classes := []bool{true, false, true, false, false, false}
weights := []float64{4, 1, 6, 3, 2, 2}
tpr, fpr := stat.ROC(0, y, classes, weights)
fmt.Printf("true positive rate: %v\n", tpr)
fmt.Printf("false positive rate: %v\n", fpr)
// Output:
// true positive rate: [0 0.4 0.4 1 1 1 1]
// false positive rate: [0 0 0.125 0.125 0.5 0.75 1]
}
func ExampleROC_weighted() {
y := []float64{0, 3, 5, 6, 7.5, 8}
classes := []bool{true, false, true, false, false, false}
tpr, fpr := stat.ROC(0, y, classes, nil)
fmt.Printf("true positive rate: %v\n", tpr)
fmt.Printf("false positive rate: %v\n", fpr)
// Output:
// true positive rate: [0 0.5 0.5 1 1 1 1]
// false positive rate: [0 0 0.25 0.25 0.5 0.75 1]
}
func ExampleROC_aUC() {
y := []float64{0.1, 0.35, 0.4, 0.8}
classes := []bool{true, false, true, false}
tpr, fpr := stat.ROC(0, y, classes, nil)
// compute Area Under Curve
auc := integrate.Trapezoidal(fpr, tpr)
fmt.Printf("true positive rate: %v\n", tpr)
fmt.Printf("false positive rate: %v\n", fpr)
fmt.Printf("auc: %v\n", auc)
// Output:
// true positive rate: [0 0.5 0.5 1 1]
// false positive rate: [0 0 0.5 0.5 1]
// auc: 0.75
}

178
stat/roc_test.go Normal file
View File

@@ -0,0 +1,178 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"testing"
"github.com/gonum/floats"
)
// Test cases where calculated manually.
func TestROC(t *testing.T) {
cases := []struct {
y []float64
c []bool
w []float64
n int
wantTPR []float64
wantFPR []float64
}{
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
wantTPR: []float64{0, 0.5, 0.5, 1, 1, 1, 1},
wantFPR: []float64{0, 0, 0.25, 0.25, 0.5, 0.75, 1},
},
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
wantTPR: []float64{0, 0.4, 0.4, 1, 1, 1, 1},
wantFPR: []float64{0, 0, 0.125, 0.125, 0.5, 0.75, 1},
},
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
n: int(5),
wantTPR: []float64{0, 0.5, 0.5, 1, 1},
wantFPR: []float64{0, 0, 0.25, 0.5, 1},
},
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
n: int(9),
wantTPR: []float64{0, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1},
wantFPR: []float64{0, 0, 0, 0.25, 0.25, 0.25, 0.5, 0.5, 1},
},
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
n: int(5),
wantTPR: []float64{0, 0.4, 0.4, 1, 1},
wantFPR: []float64{0, 0, 0.125, 0.5, 1},
},
{
y: []float64{0, 3, 5, 6, 7.5, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
n: int(9),
wantTPR: []float64{0, 0.4, 0.4, 0.4, 0.4, 1, 1, 1, 1},
wantFPR: []float64{0, 0, 0, 0.125, 0.125, 0.125, 0.5, 0.5, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
wantTPR: []float64{0, 0.5, 0.5, 1, 1},
wantFPR: []float64{0, 0, 0.25, 0.75, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
wantTPR: []float64{0, 0.4, 0.4, 1, 1},
wantFPR: []float64{0, 0, 0.125, 0.75, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
n: int(5),
wantTPR: []float64{0, 0.5, 0.5, 1, 1},
wantFPR: []float64{0, 0, 0.25, 0.75, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
n: int(9),
wantTPR: []float64{0, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1},
wantFPR: []float64{0, 0, 0, 0.25, 0.25, 0.25, 0.75, 0.75, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
n: int(5),
wantTPR: []float64{0, 0.4, 0.4, 1, 1},
wantFPR: []float64{0, 0, 0.125, 0.75, 1},
},
{
y: []float64{0, 3, 6, 6, 6, 8},
c: []bool{true, false, true, false, false, false},
w: []float64{4, 1, 6, 3, 2, 2},
n: int(9),
wantTPR: []float64{0, 0.4, 0.4, 0.4, 0.4, 0.4, 1, 1, 1},
wantFPR: []float64{0, 0, 0, 0.125, 0.125, 0.125, 0.75, 0.75, 1},
},
{
y: []float64{1, 2},
c: []bool{true, true},
wantTPR: []float64{0, 0.5, 1},
wantFPR: []float64{0, 0, 1},
},
{
y: []float64{1, 2},
c: []bool{true, true},
n: int(2),
wantTPR: []float64{0, 1},
wantFPR: []float64{0, 1},
},
{
y: []float64{1, 2},
c: []bool{true, true},
n: int(7),
wantTPR: []float64{0, 0.5, 0.5, 0.5, 0.5, 0.5, 1},
wantFPR: []float64{0, 0, 0, 0, 0, 0, 1},
},
{
y: []float64{1},
c: []bool{true},
wantTPR: []float64{0, 1},
wantFPR: []float64{0, 1},
},
{
y: []float64{1},
c: []bool{true},
n: int(2),
wantTPR: []float64{0, 1},
wantFPR: []float64{0, 1},
},
{
y: []float64{1},
c: []bool{false},
wantTPR: []float64{0, 1},
wantFPR: []float64{0, 1},
},
{
y: []float64{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 10},
c: []bool{true, false, true, true, false, false, true},
n: int(5),
wantTPR: []float64{0, 0.75, 0.75, 0.75, 1},
wantFPR: []float64{0, 1, 1, 1, 1},
},
{
y: []float64{},
c: []bool{},
wantTPR: nil,
wantFPR: nil,
},
{
y: []float64{},
c: []bool{},
n: int(5),
wantTPR: nil,
wantFPR: nil,
},
}
for i, test := range cases {
gotTPR, gotFPR := ROC(test.n, test.y, test.c, test.w)
if !floats.Same(gotTPR, test.wantTPR) {
t.Errorf("%d: unexpected TPR got:%v want:%v", i, gotTPR, test.wantTPR)
}
if !floats.Same(gotFPR, test.wantFPR) {
t.Errorf("%d: unexpected FPR got:%v want:%v", i, gotFPR, test.wantFPR)
}
}
}

View File

@@ -0,0 +1,221 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package samplemv
import (
"math"
"math/rand"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat/distmv"
)
var _ Sampler = MetropolisHastingser{}
// MHProposal defines a proposal distribution for Metropolis Hastings.
type MHProposal interface {
// ConditionalLogProb returns the probability of the first argument
// conditioned on being at the second argument.
// p(x|y)
// ConditionalLogProb panics if the input slices are not the same length.
ConditionalLogProb(x, y []float64) (prob float64)
// ConditionalRand generates a new random location conditioned being at the
// location y. If the first arguement is nil, a new slice is allocated and
// returned. Otherwise, the random location is stored in-place into the first
// argument, and ConditionalRand will panic if the input slice lengths differ.
ConditionalRand(x, y []float64) []float64
}
// MetropolisHastingser is a wrapper around the MetropolisHastings sampling type.
//
// BurnIn sets the number of samples to discard before keeping the first sample.
// A properly set BurnIn rate will decorrelate the sampling chain from the initial
// location. The proper BurnIn value will depend on the mixing time of the
// Markov chain defined by the target and proposal distributions.
//
// Rate sets the number of samples to discard in between each kept sample. A
// higher rate will better approximate independently and identically distributed
// samples, while a lower rate will keep more information (at the cost of
// higher correlation between samples). If Rate is 0 it is defaulted to 1.
//
// The initial value is NOT changed during calls to Sample.
type MetropolisHastingser struct {
Initial []float64
Target distmv.LogProber
Proposal MHProposal
Src *rand.Rand
BurnIn int
Rate int
}
// Sample generates rows(batch) samples using the Metropolis Hastings sample
// generation method. The initial location is NOT updated during the call to Sample.
//
// The number of columns in batch must equal len(m.Initial), otherwise Sample
// will panic.
func (m MetropolisHastingser) Sample(batch *mat64.Dense) {
burnIn := m.BurnIn
rate := m.Rate
if rate == 0 {
rate = 1
}
r, c := batch.Dims()
if len(m.Initial) != c {
panic("metropolishastings: length mismatch")
}
// Use the optimal size for the temporary memory to allow the fewest calls
// to MetropolisHastings. The case where tmp shadows samples must be
// aligned with the logic after burn-in so that tmp does not shadow samples
// during the rate portion.
tmp := batch
if rate > r {
tmp = mat64.NewDense(rate, c, nil)
}
rTmp, _ := tmp.Dims()
// Perform burn-in.
remaining := burnIn
initial := make([]float64, c)
copy(initial, m.Initial)
for remaining != 0 {
newSamp := min(rTmp, remaining)
MetropolisHastings(tmp.View(0, 0, newSamp, c).(*mat64.Dense), initial, m.Target, m.Proposal, m.Src)
copy(initial, tmp.RawRowView(newSamp-1))
remaining -= newSamp
}
if rate == 1 {
MetropolisHastings(batch, initial, m.Target, m.Proposal, m.Src)
return
}
if rTmp <= r {
tmp = mat64.NewDense(rate, c, nil)
}
// Take a single sample from the chain.
MetropolisHastings(batch.View(0, 0, 1, c).(*mat64.Dense), initial, m.Target, m.Proposal, m.Src)
copy(initial, batch.RawRowView(0))
// For all of the other samples, first generate Rate samples and then actually
// accept the last one.
for i := 1; i < r; i++ {
MetropolisHastings(tmp, initial, m.Target, m.Proposal, m.Src)
v := tmp.RawRowView(rate - 1)
batch.SetRow(i, v)
copy(initial, v)
}
}
// MetropolisHastings generates rows(batch) samples using the Metropolis Hastings
// algorithm (http://en.wikipedia.org/wiki/Metropolis%E2%80%93Hastings_algorithm),
// with the given target and proposal distributions, starting at the intial location
// and storing the results in-place into samples. If src != nil, it will be used to generate random
// numbers, otherwise rand.Float64 will be used.
//
// Metropolis-Hastings is a Markov-chain Monte Carlo algorithm that generates
// samples according to the distribution specified by target by using the Markov
// chain implicitly defined by the proposal distribution. At each
// iteration, a proposal point is generated randomly from the current location.
// This proposal point is accepted with probability
// p = min(1, (target(new) * proposal(current|new)) / (target(current) * proposal(new|current)))
// If the new location is accepted, it is stored into batch and becomes the
// new current location. If it is rejected, the current location remains and
// is stored into samples. Thus, a location is stored into batch at every iteration.
//
// The samples in Metropolis Hastings are correlated with one another through the
// Markov chain. As a result, the initial value can have a significant influence
// on the early samples, and so, typically, the first samples generated by the chain
// are ignored. This is known as "burn-in", and can be accomplished with slicing.
// The best choice for burn-in length will depend on the sampling and target
// distributions.
//
// Many choose to have a sampling "rate" where a number of samples
// are ignored in between each kept sample. This helps decorrelate
// the samples from one another, but also reduces the number of available samples.
// A sampling rate can be implemented with successive calls to MetropolisHastings.
func MetropolisHastings(batch *mat64.Dense, initial []float64, target distmv.LogProber, proposal MHProposal, src *rand.Rand) {
f64 := rand.Float64
if src != nil {
f64 = src.Float64
}
if len(initial) == 0 {
panic("metropolishastings: zero length initial")
}
r, _ := batch.Dims()
current := make([]float64, len(initial))
copy(current, initial)
proposed := make([]float64, len(initial))
currentLogProb := target.LogProb(initial)
for i := 0; i < r; i++ {
proposal.ConditionalRand(proposed, current)
proposedLogProb := target.LogProb(proposed)
probTo := proposal.ConditionalLogProb(proposed, current)
probBack := proposal.ConditionalLogProb(current, proposed)
accept := math.Exp(proposedLogProb + probBack - probTo - currentLogProb)
if accept > f64() {
copy(current, proposed)
currentLogProb = proposedLogProb
}
batch.SetRow(i, current)
}
}
// ProposalNormal is a sampling distribution for Metropolis-Hastings. It has a
// fixed covariance matrix and changes the mean based on the current sampling
// location.
type ProposalNormal struct {
normal *distmv.Normal
}
// NewProposalNormal constructs a new ProposalNormal for use as a proposal
// distribution for Metropolis-Hastings. ProposalNormal is a multivariate normal
// distribution (implemented by distmv.Normal) where the covariance matrix is fixed
// and the mean of the distribution changes.
//
// NewProposalNormal returns {nil, false} if the covariance matrix is not positive-definite.
func NewProposalNormal(sigma *mat64.SymDense, src *rand.Rand) (*ProposalNormal, bool) {
mu := make([]float64, sigma.Symmetric())
normal, ok := distmv.NewNormal(mu, sigma, src)
if !ok {
return nil, false
}
p := &ProposalNormal{
normal: normal,
}
return p, true
}
// ConditionalLogProb returns the probability of the first argument conditioned on
// being at the second argument.
// p(x|y)
// ConditionalLogProb panics if the input slices are not the same length or
// are not equal to the dimension of the covariance matrix.
func (p *ProposalNormal) ConditionalLogProb(x, y []float64) (prob float64) {
// Either SetMean or LogProb will panic if the slice lengths are innaccurate.
p.normal.SetMean(y)
return p.normal.LogProb(x)
}
// ConditionalRand generates a new random location conditioned being at the
// location y. If the first arguement is nil, a new slice is allocated and
// returned. Otherwise, the random location is stored in-place into the first
// argument, and ConditionalRand will panic if the input slice lengths differ or
// if they are not equal to the dimension of the covariance matrix.
func (p *ProposalNormal) ConditionalRand(x, y []float64) []float64 {
if x == nil {
x = make([]float64, p.normal.Dim())
}
if len(x) != len(y) {
panic(badLengthMismatch)
}
p.normal.SetMean(y)
p.normal.Rand(x)
return x
}

View File

@@ -0,0 +1,280 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package samplemv
import (
"fmt"
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat"
"github.com/gonum/stat/distmv"
)
type lhDist interface {
Quantile(x, p []float64) []float64
CDF(p, x []float64) []float64
Dim() int
}
func TestLatinHypercube(t *testing.T) {
for _, nSamples := range []int{1, 2, 5, 10, 20} {
for _, dist := range []lhDist{
distmv.NewUniform([]distmv.Bound{{0, 3}}, nil),
distmv.NewUniform([]distmv.Bound{{0, 3}, {-1, 5}, {-4, -1}}, nil),
} {
dim := dist.Dim()
batch := mat64.NewDense(nSamples, dim, nil)
LatinHypercube(batch, dist, nil)
// Latin hypercube should have one entry per hyperrow.
present := make([][]bool, nSamples)
for i := range present {
present[i] = make([]bool, dim)
}
cdf := make([]float64, dim)
for i := 0; i < nSamples; i++ {
dist.CDF(cdf, batch.RawRowView(i))
for j := 0; j < dim; j++ {
p := cdf[j]
quadrant := int(math.Floor(p * float64(nSamples)))
present[quadrant][j] = true
}
}
allPresent := true
for i := 0; i < nSamples; i++ {
for j := 0; j < dim; j++ {
if present[i][j] == false {
allPresent = false
}
}
}
if !allPresent {
t.Errorf("All quadrants not present")
}
}
}
}
func TestImportance(t *testing.T) {
// Test by finding the expected value of a multi-variate normal.
dim := 3
target, ok := randomNormal(dim)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
muImp := make([]float64, dim)
sigmaImp := mat64.NewSymDense(dim, nil)
for i := 0; i < dim; i++ {
sigmaImp.SetSym(i, i, 3)
}
proposal, ok := distmv.NewNormal(muImp, sigmaImp, nil)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
nSamples := 100000
batch := mat64.NewDense(nSamples, dim, nil)
weights := make([]float64, nSamples)
Importance(batch, weights, target, proposal)
compareNormal(t, target, batch, weights)
}
func TestRejection(t *testing.T) {
// Test by finding the expected value of a uniform.
dim := 3
bounds := make([]distmv.Bound, dim)
for i := 0; i < dim; i++ {
min := rand.NormFloat64()
max := rand.NormFloat64()
if min > max {
min, max = max, min
}
bounds[i].Min = min
bounds[i].Max = max
}
target := distmv.NewUniform(bounds, nil)
mu := target.Mean(nil)
muImp := make([]float64, dim)
sigmaImp := mat64.NewSymDense(dim, nil)
for i := 0; i < dim; i++ {
sigmaImp.SetSym(i, i, 6)
}
proposal, ok := distmv.NewNormal(muImp, sigmaImp, nil)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
nSamples := 1000
batch := mat64.NewDense(nSamples, dim, nil)
weights := make([]float64, nSamples)
_, ok = Rejection(batch, target, proposal, 1000, nil)
if !ok {
t.Error("Bad test, nan samples")
}
for i := 0; i < dim; i++ {
col := mat64.Col(nil, i, batch)
ev := stat.Mean(col, weights)
if math.Abs(ev-mu[i]) > 1e-2 {
t.Errorf("Mean mismatch: Want %v, got %v", mu[i], ev)
}
}
}
func TestMetropolisHastings(t *testing.T) {
// Test by finding the expected value of a normal distribution.
dim := 3
target, ok := randomNormal(dim)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
sigmaImp := mat64.NewSymDense(dim, nil)
for i := 0; i < dim; i++ {
sigmaImp.SetSym(i, i, 0.25)
}
proposal, ok := NewProposalNormal(sigmaImp, nil)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
nSamples := 1000000
burnin := 5000
batch := mat64.NewDense(nSamples, dim, nil)
initial := make([]float64, dim)
MetropolisHastings(batch, initial, target, proposal, nil)
batch = batch.View(burnin, 0, nSamples-burnin, dim).(*mat64.Dense)
compareNormal(t, target, batch, nil)
}
// randomNormal constructs a random Normal distribution.
func randomNormal(dim int) (*distmv.Normal, bool) {
data := make([]float64, dim*dim)
for i := range data {
data[i] = rand.Float64()
}
a := mat64.NewDense(dim, dim, data)
var sigma mat64.SymDense
sigma.SymOuterK(1, a)
mu := make([]float64, dim)
for i := range mu {
mu[i] = rand.NormFloat64()
}
return distmv.NewNormal(mu, &sigma, nil)
}
func compareNormal(t *testing.T, want *distmv.Normal, batch *mat64.Dense, weights []float64) {
dim := want.Dim()
mu := want.Mean(nil)
sigma := want.CovarianceMatrix(nil)
n, _ := batch.Dims()
if weights == nil {
weights = make([]float64, n)
for i := range weights {
weights[i] = 1
}
}
for i := 0; i < dim; i++ {
col := mat64.Col(nil, i, batch)
ev := stat.Mean(col, weights)
if math.Abs(ev-mu[i]) > 1e-2 {
t.Errorf("Mean mismatch: Want %v, got %v", mu[i], ev)
}
}
cov := stat.CovarianceMatrix(nil, batch, weights)
if !mat64.EqualApprox(cov, sigma, 1.5e-1) {
t.Errorf("Covariance matrix mismatch")
}
}
func TestMetropolisHastingser(t *testing.T) {
for seed, test := range []struct {
dim, burnin, rate, samples int
}{
{3, 10, 1, 1},
{3, 10, 2, 1},
{3, 10, 1, 2},
{3, 10, 3, 2},
{3, 10, 7, 4},
{3, 10, 7, 4},
{3, 11, 51, 103},
{3, 11, 103, 51},
{3, 51, 11, 103},
{3, 51, 103, 11},
{3, 103, 11, 51},
{3, 103, 51, 11},
} {
dim := test.dim
initial := make([]float64, dim)
target, ok := randomNormal(dim)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
sigmaImp := mat64.NewSymDense(dim, nil)
for i := 0; i < dim; i++ {
sigmaImp.SetSym(i, i, 0.25)
}
proposal, ok := NewProposalNormal(sigmaImp, nil)
if !ok {
t.Fatal("bad test, sigma not pos def")
}
// Test the Metropolis Hastingser by generating all the samples, then generating
// the same samples with a burnin and rate.
rand.Seed(int64(seed))
mh := MetropolisHastingser{
Initial: initial,
Target: target,
Proposal: proposal,
Src: nil,
BurnIn: 0,
Rate: 0,
}
samples := test.samples
burnin := test.burnin
rate := test.rate
fullBatch := mat64.NewDense(1+burnin+rate*(samples-1), dim, nil)
mh.Sample(fullBatch)
mh = MetropolisHastingser{
Initial: initial,
Target: target,
Proposal: proposal,
Src: nil,
BurnIn: burnin,
Rate: rate,
}
rand.Seed(int64(seed))
batch := mat64.NewDense(samples, dim, nil)
mh.Sample(batch)
same := true
count := burnin
for i := 0; i < samples; i++ {
if !floats.Equal(batch.RawRowView(i), fullBatch.RawRowView(count)) {
fmt.Println("sample ", i, "is different")
same = false
break
}
count += rate
}
if !same {
fmt.Printf("%v\n", mat64.Formatted(batch))
fmt.Printf("%v\n", mat64.Formatted(fullBatch))
t.Errorf("sampling mismatch: dim = %v, burnin = %v, rate = %v, samples = %v", dim, burnin, rate, samples)
}
}
}

282
stat/samplemv/samplemv.go Normal file
View File

@@ -0,0 +1,282 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package samplemv implements advanced sampling routines from explicit and implicit
// probability distributions.
//
// Each sampling routine is implemented as a stateless function with a
// complementary wrapper type. The wrapper types allow the sampling routines
// to implement interfaces.
package samplemv
import (
"errors"
"math"
"math/rand"
"github.com/gonum/matrix/mat64"
"github.com/gonum/stat/distmv"
)
var (
badLengthMismatch = "samplemv: slice length mismatch"
)
var (
_ Sampler = LatinHypercuber{}
_ Sampler = (*Rejectioner)(nil)
_ Sampler = IIDer{}
_ WeightedSampler = SampleUniformWeighted{}
_ WeightedSampler = Importancer{}
)
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Sampler generates a batch of samples according to the rule specified by the
// implementing type. The number of samples generated is equal to rows(batch),
// and the samples are stored in-place into the input.
type Sampler interface {
Sample(batch *mat64.Dense)
}
// WeightedSampler generates a batch of samples and their relative weights
// according to the rule specified by the implementing type. The number of samples
// generated is equal to rows(batch), and the samples and weights
// are stored in-place into the inputs. The length of weights must equal
// rows(batch), otherwise SampleWeighted will panic.
type WeightedSampler interface {
SampleWeighted(batch *mat64.Dense, weights []float64)
}
// SampleUniformWeighted wraps a Sampler type to create a WeightedSampler where all
// weights are equal.
type SampleUniformWeighted struct {
Sampler
}
// SampleWeighted generates rows(batch) samples from the embedded Sampler type
// and sets all of the weights equal to 1. If rows(batch) and len(weights)
// of weights are not equal, SampleWeighted will panic.
func (w SampleUniformWeighted) SampleWeighted(batch *mat64.Dense, weights []float64) {
r, _ := batch.Dims()
if r != len(weights) {
panic(badLengthMismatch)
}
w.Sample(batch)
for i := range weights {
weights[i] = 1
}
}
// LatinHypercuber is a wrapper around the LatinHypercube sampling generation
// method.
type LatinHypercuber struct {
Q distmv.Quantiler
Src *rand.Rand
}
// Sample generates rows(batch) samples using the LatinHypercube generation
// procedure.
func (l LatinHypercuber) Sample(batch *mat64.Dense) {
LatinHypercube(batch, l.Q, l.Src)
}
// LatinHypercube generates rows(batch) samples using Latin hypercube sampling
// from the given distribution. If src is not nil, it will be used to generate
// random numbers, otherwise rand.Float64 will be used.
//
// Latin hypercube sampling divides the cumulative distribution function into equally
// spaced bins and guarantees that one sample is generated per bin. Within each bin,
// the location is randomly sampled. The distmv.NewUnitUniform function can be used
// for easy sampling from the unit hypercube.
func LatinHypercube(batch *mat64.Dense, q distmv.Quantiler, src *rand.Rand) {
r, c := batch.Dims()
var f64 func() float64
var perm func(int) []int
if src != nil {
f64 = src.Float64
perm = src.Perm
} else {
f64 = rand.Float64
perm = rand.Perm
}
r64 := float64(r)
for i := 0; i < c; i++ {
p := perm(r)
for j := 0; j < r; j++ {
var v float64
v = f64()
v = v/r64 + float64(j)/r64
batch.Set(p[j], i, v)
}
}
p := make([]float64, c)
for i := 0; i < r; i++ {
copy(p, batch.RawRowView(i))
q.Quantile(batch.RawRowView(i), p)
}
}
// Importancer is a wrapper around the Importance sampling generation method.
type Importancer struct {
Target distmv.LogProber
Proposal distmv.RandLogProber
}
// SampleWeighted generates rows(batch) samples using the Importance sampling
// generation procedure.
func (l Importancer) SampleWeighted(batch *mat64.Dense, weights []float64) {
Importance(batch, weights, l.Target, l.Proposal)
}
// Importance sampling generates rows(batch) samples from the proposal distribution,
// and stores the locations and importance sampling weights in place.
//
// Importance sampling is a variance reduction technique where samples are
// generated from a proposal distribution, q(x), instead of the target distribution
// p(x). This allows relatively unlikely samples in p(x) to be generated more frequently.
//
// The importance sampling weight at x is given by p(x)/q(x). To reduce variance,
// a good proposal distribution will bound this sampling weight. This implies the
// support of q(x) should be at least as broad as p(x), and q(x) should be "fatter tailed"
// than p(x).
//
// If weights is nil, the weights are not stored. The length of weights must equal
// the length of batch, otherwise Importance will panic.
func Importance(batch *mat64.Dense, weights []float64, target distmv.LogProber, proposal distmv.RandLogProber) {
r, _ := batch.Dims()
if r != len(weights) {
panic(badLengthMismatch)
}
for i := 0; i < r; i++ {
v := batch.RawRowView(i)
proposal.Rand(v)
weights[i] = math.Exp(target.LogProb(v) - proposal.LogProb(v))
}
}
// ErrRejection is returned when the constant in Rejection is not sufficiently high.
var ErrRejection = errors.New("rejection: acceptance ratio above 1")
// Rejectioner is a wrapper around the Rejection sampling generation procedure.
// If the rejection sampling fails during the call to Sample, all samples will
// be set to math.NaN() and a call to Err will return a non-nil value.
type Rejectioner struct {
C float64
Target distmv.LogProber
Proposal distmv.RandLogProber
Src *rand.Rand
err error
proposed int
}
// Err returns nil if the most recent call to sample was successful, and returns
// ErrRejection if it was not.
func (r *Rejectioner) Err() error {
return r.err
}
// Proposed returns the number of samples proposed during the most recent call to
// Sample.
func (r *Rejectioner) Proposed() int {
return r.proposed
}
// Sample generates rows(batch) using the Rejection sampling generation procedure.
// Rejection sampling may fail if the constant is insufficiently high, as described
// in the function comment for Rejection. If the generation fails, the samples
// are set to math.NaN(), and a call to Err will return a non-nil value.
func (r *Rejectioner) Sample(batch *mat64.Dense) {
r.err = nil
r.proposed = 0
proposed, ok := Rejection(batch, r.Target, r.Proposal, r.C, r.Src)
if !ok {
r.err = ErrRejection
}
r.proposed = proposed
}
// Rejection generates rows(batch) samples using the rejection sampling algorithm and
// stores them in place into samples.
// Sampling continues until batch is filled. Rejection returns the total number of proposed
// locations and a boolean indicating if the rejection sampling assumption is
// violated (see details below). If the returned boolean is false, all elements
// of samples are set to NaN. If src != nil, it will be used to generate random
// numbers, otherwise rand.Float64 will be used.
//
// Rejection sampling generates points from the target distribution by using
// the proposal distribution. At each step of the algorithm, the proposaed point
// is accepted with probability
// p = target(x) / (proposal(x) * c)
// where target(x) is the probability of the point according to the target distribution
// and proposal(x) is the probability according to the proposal distribution.
// The constant c must be chosen such that target(x) < proposal(x) * c for all x.
// The expected number of proposed samples is len(samples) * c.
//
// Target may return the true (log of) the probablity of the location, or it may return
// a value that is proportional to the probability (logprob + constant). This is
// useful for cases where the probability distribution is only known up to a normalization
// constant.
func Rejection(batch *mat64.Dense, target distmv.LogProber, proposal distmv.RandLogProber, c float64, src *rand.Rand) (nProposed int, ok bool) {
if c < 1 {
panic("rejection: acceptance constant must be greater than 1")
}
f64 := rand.Float64
if src != nil {
f64 = src.Float64
}
r, dim := batch.Dims()
v := make([]float64, dim)
var idx int
for {
nProposed++
proposal.Rand(v)
qx := proposal.LogProb(v)
px := target.LogProb(v)
accept := math.Exp(px-qx) / c
if accept > 1 {
// Invalidate the whole result and return a failure.
for i := 0; i < r; i++ {
for j := 0; j < dim; j++ {
batch.Set(i, j, math.NaN())
}
}
return nProposed, false
}
if accept > f64() {
batch.SetRow(idx, v)
idx++
if idx == r {
break
}
}
}
return nProposed, true
}
// IIDer is a wrapper around the IID sample generation method.
type IIDer struct {
Dist distmv.Rander
}
// Sample generates a set of identically and independently distributed samples.
func (iid IIDer) Sample(batch *mat64.Dense) {
IID(batch, iid.Dist)
}
// IID generates a set of independently and identically distributed samples from
// the input distribution.
func IID(batch *mat64.Dense, d distmv.Rander) {
r, _ := batch.Dims()
for i := 0; i < r; i++ {
d.Rand(batch.RawRowView(i))
}
}

View File

@@ -0,0 +1,39 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sampleuv
import "github.com/gonum/stat/distuv"
type ProposalDist struct {
Sigma float64
}
func (p ProposalDist) ConditionalRand(y float64) float64 {
return distuv.Normal{Mu: y, Sigma: p.Sigma}.Rand()
}
func (p ProposalDist) ConditionalLogProb(x, y float64) float64 {
return distuv.Normal{Mu: y, Sigma: p.Sigma}.LogProb(x)
}
func ExampleMetropolisHastings_burnin() {
n := 1000 // The number of samples to generate.
burnin := 50 // Number of samples to ignore at the start.
var initial float64
// target is the distribution from which we would like to sample.
target := distuv.Weibull{K: 5, Lambda: 0.5}
// proposal is the proposal distribution. Here, we are choosing
// a tight Gaussian distribution around the current location. In
// typical problems, if Sigma is too small, it takes a lot of samples
// to move around the distribution. If Sigma is too large, it can be hard
// to find acceptable samples.
proposal := ProposalDist{Sigma: 0.2}
samples := make([]float64, n+burnin)
MetropolisHastings(samples, initial, target, proposal, nil)
// Remove the initial samples through slicing.
samples = samples[burnin:]
}

View File

@@ -0,0 +1,49 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sampleuv
import "github.com/gonum/stat/distuv"
func max(a, b int) int {
if a < b {
return b
}
return a
}
func ExampleMetropolisHastings_samplingRate() {
// See Burnin example for a description of these quantities.
n := 1000
burnin := 300
var initial float64
target := distuv.Weibull{K: 5, Lambda: 0.5}
proposal := ProposalDist{Sigma: 0.2}
// Successive samples are correlated with one another through the
// Markov Chain defined by the proposal distribution. To get less
// correlated samples, one may use a sampling rate, in which only
// one sample from every few is accepted from the chain. This can
// be accomplished through a for loop.
rate := 50
tmp := make([]float64, max(rate, burnin))
// First deal with burnin.
tmp = tmp[:burnin]
MetropolisHastings(tmp, initial, target, proposal, nil)
// The final sample in tmp in the final point in the chain.
// Use it as the new initial location.
initial = tmp[len(tmp)-1]
// Now, generate samples by using one every rate samples.
tmp = tmp[:rate]
samples := make([]float64, n)
samples[0] = initial
for i := 1; i < len(samples); i++ {
MetropolisHastings(tmp, initial, target, proposal, nil)
initial = tmp[len(tmp)-1]
samples[i] = initial
}
}

399
stat/sampleuv/sample.go Normal file
View File

@@ -0,0 +1,399 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package sampleuv implements advanced sampling routines from explicit and implicit
// probability distributions.
//
// Each sampling routine is implemented as a stateless function with a
// complementary wrapper type. The wrapper types allow the sampling routines
// to implement interfaces.
package sampleuv
import (
"errors"
"math"
"math/rand"
"github.com/gonum/stat/distuv"
)
var (
badLengthMismatch = "sample: slice length mismatch"
)
var (
_ Sampler = LatinHypercuber{}
_ Sampler = MetropolisHastingser{}
_ Sampler = (*Rejectioner)(nil)
_ Sampler = IIDer{}
_ WeightedSampler = SampleUniformWeighted{}
_ WeightedSampler = Importancer{}
)
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Sampler generates a batch of samples according to the rule specified by the
// implementing type. The number of samples generated is equal to len(batch),
// and the samples are stored in-place into the input.
type Sampler interface {
Sample(batch []float64)
}
// WeightedSampler generates a batch of samples and their relative weights
// according to the rule specified by the implementing type. The number of samples
// generated is equal to len(batch), and the samples and weights
// are stored in-place into the inputs. The length of weights must equal
// len(batch), otherwise SampleWeighted will panic.
type WeightedSampler interface {
SampleWeighted(batch, weights []float64)
}
// SampleUniformWeighted wraps a Sampler type to create a WeightedSampler where all
// weights are equal.
type SampleUniformWeighted struct {
Sampler
}
// SampleWeighted generates len(batch) samples from the embedded Sampler type
// and sets all of the weights equal to 1. If len(batch) and len(weights)
// are not equal, SampleWeighted will panic.
func (w SampleUniformWeighted) SampleWeighted(batch, weights []float64) {
if len(batch) != len(weights) {
panic(badLengthMismatch)
}
w.Sample(batch)
for i := range weights {
weights[i] = 1
}
}
// LatinHypercuber is a wrapper around the LatinHypercube sampling generation
// method.
type LatinHypercuber struct {
Q distuv.Quantiler
Src *rand.Rand
}
// Sample generates len(batch) samples using the LatinHypercube generation
// procedure.
func (l LatinHypercuber) Sample(batch []float64) {
LatinHypercube(batch, l.Q, l.Src)
}
// LatinHypercube generates len(batch) samples using Latin hypercube sampling
// from the given distribution. If src != nil, it will be used to generate
// random numbers, otherwise rand.Float64 will be used.
//
// Latin hypercube sampling divides the cumulative distribution function into equally
// spaced bins and guarantees that one sample is generated per bin. Within each bin,
// the location is randomly sampled. The distuv.UnitUniform variable can be used
// for easy generation from the unit interval.
func LatinHypercube(batch []float64, q distuv.Quantiler, src *rand.Rand) {
n := len(batch)
var perm []int
var f64 func() float64
if src != nil {
f64 = src.Float64
perm = src.Perm(n)
} else {
f64 = rand.Float64
perm = rand.Perm(n)
}
for i := range batch {
v := f64()/float64(n) + float64(i)/float64(n)
batch[perm[i]] = q.Quantile(v)
}
}
// Importancer is a wrapper around the Importance sampling generation method.
type Importancer struct {
Target distuv.LogProber
Proposal distuv.RandLogProber
}
// Sample generates len(batch) samples using the Importance sampling generation
// procedure.
func (l Importancer) SampleWeighted(batch, weights []float64) {
Importance(batch, weights, l.Target, l.Proposal)
}
// Importance sampling generates len(batch) samples from the proposal distribution,
// and stores the locations and importance sampling weights in place.
//
// Importance sampling is a variance reduction technique where samples are
// generated from a proposal distribution, q(x), instead of the target distribution
// p(x). This allows relatively unlikely samples in p(x) to be generated more frequently.
//
// The importance sampling weight at x is given by p(x)/q(x). To reduce variance,
// a good proposal distribution will bound this sampling weight. This implies the
// support of q(x) should be at least as broad as p(x), and q(x) should be "fatter tailed"
// than p(x).
//
// If weights is nil, the weights are not stored. The length of weights must equal
// the length of batch, otherwise Importance will panic.
func Importance(batch, weights []float64, target distuv.LogProber, proposal distuv.RandLogProber) {
if len(batch) != len(weights) {
panic(badLengthMismatch)
}
for i := range batch {
v := proposal.Rand()
batch[i] = v
weights[i] = math.Exp(target.LogProb(v) - proposal.LogProb(v))
}
}
// ErrRejection is returned when the constant in Rejection is not sufficiently high.
var ErrRejection = errors.New("rejection: acceptance ratio above 1")
// Rejectioner is a wrapper around the Rejection sampling generation procedure.
// If the rejection sampling fails during the call to Sample, all samples will
// be set to math.NaN() and a call to Err will return a non-nil value.
type Rejectioner struct {
C float64
Target distuv.LogProber
Proposal distuv.RandLogProber
Src *rand.Rand
err error
proposed int
}
// Err returns nil if the most recent call to sample was successful, and returns
// ErrRejection if it was not.
func (r *Rejectioner) Err() error {
return r.err
}
// Proposed returns the number of samples proposed during the most recent call to
// Sample.
func (r *Rejectioner) Proposed() int {
return r.proposed
}
// Sample generates len(batch) using the Rejection sampling generation procedure.
// Rejection sampling may fail if the constant is insufficiently high, as described
// in the function comment for Rejection. If the generation fails, the samples
// are set to math.NaN(), and a call to Err will return a non-nil value.
func (r *Rejectioner) Sample(batch []float64) {
r.err = nil
r.proposed = 0
proposed, ok := Rejection(batch, r.Target, r.Proposal, r.C, r.Src)
if !ok {
r.err = ErrRejection
}
r.proposed = proposed
}
// Rejection generates len(batch) samples using the rejection sampling algorithm
// and stores them in place into samples. Sampling continues until batch is
// filled. Rejection returns the total number of proposed locations and a boolean
// indicating if the rejection sampling assumption is violated (see details
// below). If the returned boolean is false, all elements of samples are set to
// NaN. If src is not nil, it will be used to generate random numbers, otherwise
// rand.Float64 will be used.
//
// Rejection sampling generates points from the target distribution by using
// the proposal distribution. At each step of the algorithm, the proposed point
// is accepted with probability
// p = target(x) / (proposal(x) * c)
// where target(x) is the probability of the point according to the target distribution
// and proposal(x) is the probability according to the proposal distribution.
// The constant c must be chosen such that target(x) < proposal(x) * c for all x.
// The expected number of proposed samples is len(samples) * c.
//
// Target may return the true (log of) the probablity of the location, or it may return
// a value that is proportional to the probability (logprob + constant). This is
// useful for cases where the probability distribution is only known up to a normalization
// constant.
func Rejection(batch []float64, target distuv.LogProber, proposal distuv.RandLogProber, c float64, src *rand.Rand) (nProposed int, ok bool) {
if c < 1 {
panic("rejection: acceptance constant must be greater than 1")
}
f64 := rand.Float64
if src != nil {
f64 = src.Float64
}
var idx int
for {
nProposed++
v := proposal.Rand()
qx := proposal.LogProb(v)
px := target.LogProb(v)
accept := math.Exp(px-qx) / c
if accept > 1 {
// Invalidate the whole result and return a failure.
for i := range batch {
batch[i] = math.NaN()
}
return nProposed, false
}
if accept > f64() {
batch[idx] = v
idx++
if idx == len(batch) {
break
}
}
}
return nProposed, true
}
// MHProposal defines a proposal distribution for Metropolis Hastings.
type MHProposal interface {
// ConditionalDist returns the probability of the first argument conditioned on
// being at the second argument
// p(x|y)
ConditionalLogProb(x, y float64) (prob float64)
// ConditionalRand generates a new random location conditioned being at the
// location y.
ConditionalRand(y float64) (x float64)
}
// MetropolisHastingser is a wrapper around the MetropolisHastings sampling type.
//
// BurnIn sets the number of samples to discard before keeping the first sample.
// A properly set BurnIn rate will decorrelate the sampling chain from the initial
// location. The proper BurnIn value will depend on the mixing time of the
// Markov chain defined by the target and proposal distributions.
//
// Rate sets the number of samples to discard in between each kept sample. A
// higher rate will better approximate independently and identically distributed
// samples, while a lower rate will keep more information (at the cost of
// higher correlation between samples). If Rate is 0 it is defaulted to 1.
//
// The initial value is NOT changed during calls to Sample.
type MetropolisHastingser struct {
Initial float64
Target distuv.LogProber
Proposal MHProposal
Src *rand.Rand
BurnIn int
Rate int
}
// Sample generates len(batch) samples using the Metropolis Hastings sample
// generation method. The initial location is NOT updated during the call to Sample.
func (m MetropolisHastingser) Sample(batch []float64) {
burnIn := m.BurnIn
rate := m.Rate
if rate == 0 {
rate = 1
}
// Use the optimal size for the temporary memory to allow the fewest calls
// to MetropolisHastings. The case where tmp shadows samples must be
// aligned with the logic after burn-in so that tmp does not shadow samples
// during the rate portion.
tmp := batch
if rate > len(batch) {
tmp = make([]float64, rate)
}
// Perform burn-in.
remaining := burnIn
initial := m.Initial
for remaining != 0 {
newSamp := min(len(tmp), remaining)
MetropolisHastings(tmp[newSamp:], initial, m.Target, m.Proposal, m.Src)
initial = tmp[newSamp-1]
remaining -= newSamp
}
if rate == 1 {
MetropolisHastings(batch, initial, m.Target, m.Proposal, m.Src)
return
}
if len(tmp) <= len(batch) {
tmp = make([]float64, rate)
}
// Take a single sample from the chain
MetropolisHastings(batch[0:1], initial, m.Target, m.Proposal, m.Src)
initial = batch[0]
// For all of the other samples, first generate Rate samples and then actually
// accept the last one.
for i := 1; i < len(batch); i++ {
MetropolisHastings(tmp, initial, m.Target, m.Proposal, m.Src)
v := tmp[rate-1]
batch[i] = v
initial = v
}
}
// MetropolisHastings generates len(batch) samples using the Metropolis Hastings
// algorithm (http://en.wikipedia.org/wiki/Metropolis%E2%80%93Hastings_algorithm),
// with the given target and proposal distributions, starting at the intial location
// and storing the results in-place into samples. If src != nil, it will be used to generate random
// numbers, otherwise rand.Float64 will be used.
//
// Metropolis-Hastings is a Markov-chain Monte Carlo algorithm that generates
// samples according to the distribution specified by target by using the Markov
// chain implicitly defined by the proposal distribution. At each
// iteration, a proposal point is generated randomly from the current location.
// This proposal point is accepted with probability
// p = min(1, (target(new) * proposal(current|new)) / (target(current) * proposal(new|current)))
// If the new location is accepted, it is stored into batch and becomes the
// new current location. If it is rejected, the current location remains and
// is stored into samples. Thus, a location is stored into batch at every iteration.
//
// The samples in Metropolis Hastings are correlated with one another through the
// Markov chain. As a result, the initial value can have a significant influence
// on the early samples, and so, typically, the first samples generated by the chain
// are ignored. This is known as "burn-in", and can be accomplished with slicing.
// The best choice for burn-in length will depend on the sampling and target
// distributions.
//
// Many choose to have a sampling "rate" where a number of samples
// are ignored in between each kept sample. This helps decorrelate
// the samples from one another, but also reduces the number of available samples.
// A sampling rate can be implemented with successive calls to MetropolisHastings.
func MetropolisHastings(batch []float64, initial float64, target distuv.LogProber, proposal MHProposal, src *rand.Rand) {
f64 := rand.Float64
if src != nil {
f64 = src.Float64
}
current := initial
currentLogProb := target.LogProb(initial)
for i := range batch {
proposed := proposal.ConditionalRand(current)
proposedLogProb := target.LogProb(proposed)
probTo := proposal.ConditionalLogProb(proposed, current)
probBack := proposal.ConditionalLogProb(current, proposed)
accept := math.Exp(proposedLogProb + probBack - probTo - currentLogProb)
if accept > f64() {
current = proposed
currentLogProb = proposedLogProb
}
batch[i] = current
}
}
// IIDer is a wrapper around the IID sample generation method.
type IIDer struct {
Dist distuv.Rander
}
// Sample generates a set of identically and independently distributed samples.
func (iid IIDer) Sample(batch []float64) {
IID(batch, iid.Dist)
}
// IID generates a set of independently and identically distributed samples from
// the input distribution.
func IID(batch []float64, d distuv.Rander) {
for i := range batch {
batch[i] = d.Rand()
}
}

View File

@@ -0,0 +1,99 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sampleuv
import (
"math"
"sort"
"testing"
"github.com/gonum/stat"
"github.com/gonum/stat/distuv"
)
type lhDist interface {
Quantile(float64) float64
CDF(float64) float64
}
func TestLatinHypercube(t *testing.T) {
for _, nSamples := range []int{1, 2, 5, 10, 20} {
samples := make([]float64, nSamples)
for _, dist := range []lhDist{
distuv.Uniform{Min: 0, Max: 1},
distuv.Uniform{Min: 0, Max: 10},
distuv.Normal{Mu: 5, Sigma: 3},
} {
LatinHypercube(samples, dist, nil)
sort.Float64s(samples)
for i, v := range samples {
p := dist.CDF(v)
if p < float64(i)/float64(nSamples) || p > float64(i+1)/float64(nSamples) {
t.Errorf("probability out of bounds")
}
}
}
}
}
func TestImportance(t *testing.T) {
// Test by finding the expected value of a Normal.
trueMean := 3.0
target := distuv.Normal{Mu: trueMean, Sigma: 2}
proposal := distuv.Normal{Mu: 0, Sigma: 5}
nSamples := 100000
x := make([]float64, nSamples)
weights := make([]float64, nSamples)
Importance(x, weights, target, proposal)
ev := stat.Mean(x, weights)
if math.Abs(ev-trueMean) > 1e-2 {
t.Errorf("Mean mismatch: Want %v, got %v", trueMean, ev)
}
}
func TestRejection(t *testing.T) {
// Test by finding the expected value of a Normal.
trueMean := 3.0
target := distuv.Normal{Mu: trueMean, Sigma: 2}
proposal := distuv.Normal{Mu: 0, Sigma: 5}
nSamples := 100000
x := make([]float64, nSamples)
Rejection(x, target, proposal, 100, nil)
ev := stat.Mean(x, nil)
if math.Abs(ev-trueMean) > 1e-2 {
t.Errorf("Mean mismatch: Want %v, got %v", trueMean, ev)
}
}
type condNorm struct {
Sigma float64
}
func (c condNorm) ConditionalRand(y float64) float64 {
return distuv.Normal{Mu: y, Sigma: c.Sigma}.Rand()
}
func (c condNorm) ConditionalLogProb(x, y float64) float64 {
return distuv.Normal{Mu: y, Sigma: c.Sigma}.LogProb(x)
}
func TestMetropolisHastings(t *testing.T) {
// Test by finding the expected value of a Normal.
trueMean := 3.0
target := distuv.Normal{Mu: trueMean, Sigma: 2}
proposal := condNorm{Sigma: 5}
burnin := 500
nSamples := 100000 + burnin
x := make([]float64, nSamples)
MetropolisHastings(x, 100, target, proposal, nil)
// Remove burnin
x = x[burnin:]
ev := stat.Mean(x, nil)
if math.Abs(ev-trueMean) > 1e-2 {
t.Errorf("Mean mismatch: Want %v, got %v", trueMean, ev)
}
}

137
stat/sampleuv/weighted.go Normal file
View File

@@ -0,0 +1,137 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this code is governed by a BSD-style
// license that can be found in the LICENSE file
package sampleuv
import (
"math/rand"
"github.com/gonum/floats"
)
// Weighted provides sampling without replacement from a collection of items with
// non-uniform probability.
type Weighted struct {
weights []float64
// heap is a weight heap.
//
// It keeps a heap-organised sum of remaining
// index weights that are available to be taken
// from.
//
// Each element holds the sum of weights for
// the corresponding index, plus the sum of
// of its children's weights; the children
// of an element i can be found at positions
// 2*(i+1)-1 and 2*(i+1). The root of the
// weight heap is at element 0.
//
// See comments in container/heap for an
// explanation of the layout of a heap.
heap []float64
src *rand.Rand
}
// NewWeighted returns a Weighted for the weights w. If src is nil, rand.Rand is
// used as the random source.
//
// Note that sampling from weights with a high variance or overall low absolute
// value sum may result in problems with numerical stability.
func NewWeighted(w []float64, src *rand.Rand) Weighted {
s := Weighted{
weights: make([]float64, len(w)),
heap: make([]float64, len(w)),
}
s.ReweightAll(w)
return s
}
// Len returns the number of items held by the Weighted, including items
// already taken.
func (s Weighted) Len() int { return len(s.weights) }
// Take returns an index from the Weighted with probability proportional
// to the weight of the item. The weight of the item is then set to zero.
// Take returns false if there are no items remaining.
func (s Weighted) Take() (idx int, ok bool) {
const small = 1e-12
if floats.EqualWithinAbsOrRel(s.heap[0], 0, small, small) {
return -1, false
}
var r float64
if s.src == nil {
r = s.heap[0] * rand.Float64()
} else {
r = s.heap[0] * s.src.Float64()
}
i := 1
last := -1
left := len(s.weights)
for {
if r -= s.weights[i-1]; r <= 0 {
break // Fall within item i-1.
}
i <<= 1 // Move to left child.
if d := s.heap[i-1]; r > d {
r -= d
// If enough r to pass left child
// move to right child state will
// be caught at break above.
i++
}
if i == last || left < 0 {
// No progression.
return -1, false
}
last = i
left--
}
w, idx := s.weights[i-1], i-1
s.weights[i-1] = 0
for i > 0 {
s.heap[i-1] -= w
// The following condition is necessary to
// handle floating point error. If we see
// a heap value below zero, we know we need
// to rebuild it.
if s.heap[i-1] < 0 {
s.reset()
return idx, true
}
i >>= 1
}
return idx, true
}
// Reweight sets the weight of item idx to w.
func (s Weighted) Reweight(idx int, w float64) {
w, s.weights[idx] = s.weights[idx]-w, w
idx++
for idx > 0 {
s.heap[idx-1] -= w
idx >>= 1
}
}
// ReweightAll sets the weight of all items in the Weighted. ReweightAll
// panics if len(w) != s.Len.
func (s Weighted) ReweightAll(w []float64) {
if len(w) != s.Len() {
panic("floats: length of the slices do not match")
}
copy(s.weights, w)
s.reset()
}
func (s Weighted) reset() {
copy(s.heap, s.weights)
for i := len(s.heap) - 1; i > 0; i-- {
// Sometimes 1-based counting makes sense.
s.heap[((i+1)>>1)-1] += s.heap[i]
}
}

View File

@@ -0,0 +1,267 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this code is governed by a BSD-style
// license that can be found in the LICENSE file
package sampleuv
import (
"flag"
"math/rand"
"reflect"
"testing"
"time"
"github.com/gonum/floats"
)
var prob = flag.Bool("prob", false, "enables probabilistic testing of the random weighted sampler")
const sigChi2 = 16.92 // p = 0.05 df = 9
var (
newExp = func() []float64 {
return []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9}
}
exp = newExp()
obt = []float64{973, 1937, 3898, 7897, 15769, 31284, 62176, 125408, 250295, 500363}
)
func newTestWeighted() Weighted {
weights := make([]float64, len(obt))
for i := range weights {
weights[i] = float64(int(1) << uint(i))
}
return NewWeighted(weights, nil)
}
func TestWeightedUnseeded(t *testing.T) {
rand.Seed(0)
want := Weighted{
weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9},
heap: []float64{
exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[6],
exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9],
exp[2] + exp[5] + exp[6],
exp[3] + exp[7] + exp[8],
exp[4] + exp[9],
exp[5],
exp[6],
exp[7],
exp[8],
exp[9],
},
}
ts := newTestWeighted()
if !reflect.DeepEqual(ts, want) {
t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want)
}
f := make([]float64, len(obt))
for i := 0; i < 1e6; i++ {
item, ok := newTestWeighted().Take()
if !ok {
t.Fatal("Weighted unexpectedly empty")
}
f[item]++
}
exp := newExp()
fac := floats.Sum(f) / floats.Sum(exp)
for i := range f {
exp[i] *= fac
}
if !reflect.DeepEqual(f, obt) {
t.Fatalf("unexpected selection:\ngot: %#v\nwant:%#v", f, obt)
}
// Check that this is within statistical expectations - we know this is true for this set.
X := chi2(f, exp)
if X >= sigChi2 {
t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X)
}
}
func TestWeightedTimeSeeded(t *testing.T) {
if !*prob {
t.Skip("probabilistic testing not requested")
}
t.Log("Note: This test is stochastic and is expected to fail with probability ≈ 0.05.")
rand.Seed(time.Now().Unix())
f := make([]float64, len(obt))
for i := 0; i < 1e6; i++ {
item, ok := newTestWeighted().Take()
if !ok {
t.Fatal("Weighted unexpectedly empty")
}
f[item]++
}
exp := newExp()
fac := floats.Sum(f) / floats.Sum(exp)
for i := range f {
exp[i] *= fac
}
// Check that our obtained values are within statistical expectations for p = 0.05.
// This will not be true approximately 1 in 20 tests.
X := chi2(f, exp)
if X >= sigChi2 {
t.Errorf("H₀: d(Sample) = d(Expect), H₁: d(S) ≠ d(Expect). df = %d, p = 0.05, X² threshold = %.2f, X² = %f", len(f)-1, sigChi2, X)
}
}
func TestWeightZero(t *testing.T) {
rand.Seed(0)
want := Weighted{
weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 0, 1 << 7, 1 << 8, 1 << 9},
heap: []float64{
exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5],
exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9],
exp[2] + exp[5],
exp[3] + exp[7] + exp[8],
exp[4] + exp[9],
exp[5],
0,
exp[7],
exp[8],
exp[9],
},
}
ts := newTestWeighted()
ts.Reweight(6, 0)
if !reflect.DeepEqual(ts, want) {
t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want)
}
f := make([]float64, len(obt))
for i := 0; i < 1e6; i++ {
ts := newTestWeighted()
ts.Reweight(6, 0)
item, ok := ts.Take()
if !ok {
t.Fatal("Weighted unexpectedly empty")
}
f[item]++
}
exp := newExp()
fac := floats.Sum(f) / floats.Sum(exp)
for i := range f {
exp[i] *= fac
}
if f[6] != 0 {
t.Errorf("unexpected selection rate for zero-weighted item: got: %v want:%v", f[6], 0)
}
if reflect.DeepEqual(f[:6], obt[:6]) {
t.Fatal("unexpected selection: too few elements chosen in range:\ngot: %v\nwant:%v",
f[:6], obt[:6])
}
if reflect.DeepEqual(f[7:], obt[7:]) {
t.Fatal("unexpected selection: too few elements chosen in range:\ngot: %v\nwant:%v",
f[7:], obt[7:])
}
}
func TestWeightIncrease(t *testing.T) {
rand.Seed(0)
want := Weighted{
weights: []float64{1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 9 * 2, 1 << 7, 1 << 8, 1 << 9},
heap: []float64{
exp[0] + exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9] + exp[2] + exp[5] + exp[9]*2,
exp[1] + exp[3] + exp[4] + exp[7] + exp[8] + exp[9],
exp[2] + exp[5] + exp[9]*2,
exp[3] + exp[7] + exp[8],
exp[4] + exp[9],
exp[5],
exp[9] * 2,
exp[7],
exp[8],
exp[9],
},
}
ts := newTestWeighted()
ts.Reweight(6, ts.weights[len(ts.weights)-1]*2)
if !reflect.DeepEqual(ts, want) {
t.Fatalf("unexpected new Weighted value:\ngot: %#v\nwant:%#v", ts, want)
}
f := make([]float64, len(obt))
for i := 0; i < 1e6; i++ {
ts := newTestWeighted()
ts.Reweight(6, ts.weights[len(ts.weights)-1]*2)
item, ok := ts.Take()
if !ok {
t.Fatal("Weighted unexpectedly empty")
}
f[item]++
}
exp := newExp()
fac := floats.Sum(f) / floats.Sum(exp)
for i := range f {
exp[i] *= fac
}
if f[6] < f[9] {
t.Errorf("unexpected selection rate for re-weighted item: got: %v want:%v", f[6], f[9])
}
if reflect.DeepEqual(f[:6], obt[:6]) {
t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v",
f[:6], obt[:6])
}
if reflect.DeepEqual(f[7:], obt[7:]) {
t.Fatal("unexpected selection: too many elements chosen in range:\ngot: %v\nwant:%v",
f[7:], obt[7:])
}
}
func chi2(ob, ex []float64) (sum float64) {
for i := range ob {
x := ob[i] - ex[i]
sum += (x * x) / ex[i]
}
return sum
}
func TestWeightedNoResample(t *testing.T) {
const (
tries = 10
n = 10e5
)
ts := NewWeighted(make([]float64, n), nil)
w := make([]float64, n)
for i := 0; i < tries; i++ {
for j := range w {
w[j] = rand.Float64() * n
}
ts.ReweightAll(w)
taken := make(map[int]struct{})
var c int
for {
item, ok := ts.Take()
if !ok {
if c != n {
t.Errorf("unexpected number of items: got: %d want: %d", c, n)
}
break
}
c++
if _, exists := taken[item]; exists {
t.Errorf("unexpected duplicate sample for item: %d", item)
}
taken[item] = struct{}{}
}
}
}

1161
stat/stat.go Normal file

File diff suppressed because it is too large Load Diff

1569
stat/stat_test.go Normal file

File diff suppressed because it is too large Load Diff

147
stat/statmat.go Normal file
View File

@@ -0,0 +1,147 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"math"
"github.com/gonum/floats"
"github.com/gonum/matrix"
"github.com/gonum/matrix/mat64"
)
// CovarianceMatrix returns the covariance matrix (also known as the
// variance-covariance matrix) calculated from a matrix of data, x, using
// a two-pass algorithm.
//
// If weights is not nil the weighted covariance of x is calculated. weights
// must have length equal to the number of rows in input data matrix and
// must not contain negative elements.
// If cov is not nil it must either be zero-sized or have the same number of
// columns as the input data matrix. cov will be used as the destination for
// the covariance data. If cov is nil, a new mat64.SymDense is allocated for
// the destination.
func CovarianceMatrix(cov *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
// This is the matrix version of the two-pass algorithm. It doesn't use the
// additional floating point error correction that the Covariance function uses
// to reduce the impact of rounding during centering.
r, c := x.Dims()
if cov == nil {
cov = mat64.NewSymDense(c, nil)
} else if n := cov.Symmetric(); n != c && n != 0 {
panic(matrix.ErrShape)
}
var xt mat64.Dense
xt.Clone(x.T())
// Subtract the mean of each of the columns.
for i := 0; i < c; i++ {
v := xt.RawRowView(i)
// This will panic with ErrShape if len(weights) != len(v), so
// we don't have to check the size later.
mean := Mean(v, weights)
floats.AddConst(-mean, v)
}
if weights == nil {
// Calculate the normalization factor
// scaled by the sample size.
cov.SymOuterK(1/(float64(r)-1), &xt)
return cov
}
// Multiply by the sqrt of the weights, so that multiplication is symmetric.
sqrtwts := make([]float64, r)
for i, w := range weights {
if w < 0 {
panic("stat: negative covariance matrix weights")
}
sqrtwts[i] = math.Sqrt(w)
}
// Weight the rows.
for i := 0; i < c; i++ {
v := xt.RawRowView(i)
floats.Mul(v, sqrtwts)
}
// Calculate the normalization factor
// scaled by the weighted sample size.
cov.SymOuterK(1/(floats.Sum(weights)-1), &xt)
return cov
}
// CorrelationMatrix returns the correlation matrix calculated from a matrix
// of data, x, using a two-pass algorithm.
//
// If weights is not nil the weighted correlation of x is calculated. weights
// must have length equal to the number of rows in input data matrix and
// must not contain negative elements.
// If corr is not nil it must either be zero-sized or have the same number of
// columns as the input data matrix. corr will be used as the destination for
// the correlation data. If corr is nil, a new mat64.SymDense is allocated for
// the destination.
func CorrelationMatrix(corr *mat64.SymDense, x mat64.Matrix, weights []float64) *mat64.SymDense {
// This will panic if the sizes don't match, or if weights is the wrong size.
corr = CovarianceMatrix(corr, x, weights)
covToCorr(corr)
return corr
}
// covToCorr converts a covariance matrix to a correlation matrix.
func covToCorr(c *mat64.SymDense) {
r := c.Symmetric()
s := make([]float64, r)
for i := 0; i < r; i++ {
s[i] = 1 / math.Sqrt(c.At(i, i))
}
for i, sx := range s {
// Ensure that the diagonal has exactly ones.
c.SetSym(i, i, 1)
for j := i + 1; j < r; j++ {
v := c.At(i, j)
c.SetSym(i, j, v*sx*s[j])
}
}
}
// corrToCov converts a correlation matrix to a covariance matrix.
// The input sigma should be vector of standard deviations corresponding
// to the covariance. It will panic if len(sigma) is not equal to the
// number of rows in the correlation matrix.
func corrToCov(c *mat64.SymDense, sigma []float64) {
r, _ := c.Dims()
if r != len(sigma) {
panic(matrix.ErrShape)
}
for i, sx := range sigma {
// Ensure that the diagonal has exactly sigma squared.
c.SetSym(i, i, sx*sx)
for j := i + 1; j < r; j++ {
v := c.At(i, j)
c.SetSym(i, j, v*sx*sigma[j])
}
}
}
// Mahalanobis computes the Mahalanobis distance
// D = sqrt((x-y)^T * Σ^-1 * (x-y))
// between the vectors x and y given the cholesky decomposition of Σ.
// Mahalanobis returns NaN if the linear solve fails.
//
// See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information.
func Mahalanobis(x, y *mat64.Vector, chol *mat64.Cholesky) float64 {
var diff mat64.Vector
diff.SubVec(x, y)
var tmp mat64.Vector
err := tmp.SolveCholeskyVec(chol, &diff)
if err != nil {
return math.NaN()
}
return math.Sqrt(mat64.Dot(&tmp, &diff))
}

463
stat/statmat_test.go Normal file
View File

@@ -0,0 +1,463 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stat
import (
"math"
"math/rand"
"testing"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
func TestCovarianceMatrix(t *testing.T) {
// An alternative way to test this is to call the Variance and
// Covariance functions and ensure that the results are identical.
for i, test := range []struct {
data *mat64.Dense
weights []float64
ans *mat64.Dense
}{
{
data: mat64.NewDense(5, 2, []float64{
-2, -4,
-1, 2,
0, 0,
1, -2,
2, 4,
}),
weights: nil,
ans: mat64.NewDense(2, 2, []float64{
2.5, 3,
3, 10,
}),
}, {
data: mat64.NewDense(3, 2, []float64{
1, 1,
2, 4,
3, 9,
}),
weights: []float64{
1,
1.5,
1,
},
ans: mat64.NewDense(2, 2, []float64{
.8, 3.2,
3.2, 13.142857142857146,
}),
},
} {
// Make a copy of the data to check that it isn't changing.
r := test.data.RawMatrix()
d := make([]float64, len(r.Data))
copy(d, r.Data)
w := make([]float64, len(test.weights))
if test.weights != nil {
copy(w, test.weights)
}
for _, cov := range []*mat64.SymDense{nil, &mat64.SymDense{}} {
c := CovarianceMatrix(cov, test.data, test.weights)
if !mat64.Equal(c, test.ans) {
t.Errorf("%d: expected cov %v, found %v", i, test.ans, c)
}
if !floats.Equal(d, r.Data) {
t.Errorf("%d: data was modified during execution", i)
}
if !floats.Equal(w, test.weights) {
t.Errorf("%d: weights was modified during execution", i)
}
// compare with call to Covariance
_, cols := c.Dims()
for ci := 0; ci < cols; ci++ {
for cj := 0; cj < cols; cj++ {
x := mat64.Col(nil, ci, test.data)
y := mat64.Col(nil, cj, test.data)
cov := Covariance(x, y, test.weights)
if math.Abs(cov-c.At(ci, cj)) > 1e-14 {
t.Errorf("CovMat does not match at (%v, %v). Want %v, got %v.", ci, cj, cov, c.At(ci, cj))
}
}
}
}
}
if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) {
t.Errorf("CovarianceMatrix did not panic with weight size mismatch")
}
if !Panics(func() { CovarianceMatrix(mat64.NewSymDense(1, nil), mat64.NewDense(5, 2, nil), nil) }) {
t.Errorf("CovarianceMatrix did not panic with preallocation size mismatch")
}
if !Panics(func() { CovarianceMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
t.Errorf("CovarianceMatrix did not panic with negative weights")
}
}
func TestCorrelationMatrix(t *testing.T) {
for i, test := range []struct {
data *mat64.Dense
weights []float64
ans *mat64.Dense
}{
{
data: mat64.NewDense(3, 3, []float64{
1, 2, 3,
3, 4, 5,
5, 6, 7,
}),
weights: nil,
ans: mat64.NewDense(3, 3, []float64{
1, 1, 1,
1, 1, 1,
1, 1, 1,
}),
},
{
data: mat64.NewDense(5, 2, []float64{
-2, -4,
-1, 2,
0, 0,
1, -2,
2, 4,
}),
weights: nil,
ans: mat64.NewDense(2, 2, []float64{
1, 0.6,
0.6, 1,
}),
}, {
data: mat64.NewDense(3, 2, []float64{
1, 1,
2, 4,
3, 9,
}),
weights: []float64{
1,
1.5,
1,
},
ans: mat64.NewDense(2, 2, []float64{
1, 0.9868703275903379,
0.9868703275903379, 1,
}),
},
} {
// Make a copy of the data to check that it isn't changing.
r := test.data.RawMatrix()
d := make([]float64, len(r.Data))
copy(d, r.Data)
w := make([]float64, len(test.weights))
if test.weights != nil {
copy(w, test.weights)
}
for _, corr := range []*mat64.SymDense{nil, &mat64.SymDense{}} {
c := CorrelationMatrix(corr, test.data, test.weights)
if !mat64.Equal(c, test.ans) {
t.Errorf("%d: expected corr %v, found %v", i, test.ans, c)
}
if !floats.Equal(d, r.Data) {
t.Errorf("%d: data was modified during execution", i)
}
if !floats.Equal(w, test.weights) {
t.Errorf("%d: weights was modified during execution", i)
}
// compare with call to Covariance
_, cols := c.Dims()
for ci := 0; ci < cols; ci++ {
for cj := 0; cj < cols; cj++ {
x := mat64.Col(nil, ci, test.data)
y := mat64.Col(nil, cj, test.data)
corr := Correlation(x, y, test.weights)
if math.Abs(corr-c.At(ci, cj)) > 1e-14 {
t.Errorf("CorrMat does not match at (%v, %v). Want %v, got %v.", ci, cj, corr, c.At(ci, cj))
}
}
}
}
}
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(5, 2, nil), []float64{}) }) {
t.Errorf("CorrelationMatrix did not panic with weight size mismatch")
}
if !Panics(func() { CorrelationMatrix(mat64.NewSymDense(1, nil), mat64.NewDense(5, 2, nil), nil) }) {
t.Errorf("CorrelationMatrix did not panic with preallocation size mismatch")
}
if !Panics(func() { CorrelationMatrix(nil, mat64.NewDense(2, 2, []float64{1, 2, 3, 4}), []float64{1, -1}) }) {
t.Errorf("CorrelationMatrix did not panic with negative weights")
}
}
func TestCorrCov(t *testing.T) {
// test both Cov2Corr and Cov2Corr
for i, test := range []struct {
data *mat64.Dense
weights []float64
}{
{
data: mat64.NewDense(3, 3, []float64{
1, 2, 3,
3, 4, 5,
5, 6, 7,
}),
weights: nil,
},
{
data: mat64.NewDense(5, 2, []float64{
-2, -4,
-1, 2,
0, 0,
1, -2,
2, 4,
}),
weights: nil,
}, {
data: mat64.NewDense(3, 2, []float64{
1, 1,
2, 4,
3, 9,
}),
weights: []float64{
1,
1.5,
1,
},
},
} {
corr := CorrelationMatrix(nil, test.data, test.weights)
cov := CovarianceMatrix(nil, test.data, test.weights)
r := cov.Symmetric()
// Get the diagonal elements from cov to determine the sigmas.
sigmas := make([]float64, r)
for i := range sigmas {
sigmas[i] = math.Sqrt(cov.At(i, i))
}
covFromCorr := mat64.NewSymDense(corr.Symmetric(), nil)
covFromCorr.CopySym(corr)
corrToCov(covFromCorr, sigmas)
corrFromCov := mat64.NewSymDense(cov.Symmetric(), nil)
corrFromCov.CopySym(cov)
covToCorr(corrFromCov)
if !mat64.EqualApprox(corr, corrFromCov, 1e-14) {
t.Errorf("%d: corrToCov did not match direct Correlation calculation. Want: %v, got: %v. ", i, corr, corrFromCov)
}
if !mat64.EqualApprox(cov, covFromCorr, 1e-14) {
t.Errorf("%d: covToCorr did not match direct Covariance calculation. Want: %v, got: %v. ", i, cov, covFromCorr)
}
if !Panics(func() { corrToCov(mat64.NewSymDense(2, nil), []float64{}) }) {
t.Errorf("CorrelationMatrix did not panic with sigma size mismatch")
}
}
}
func TestMahalanobis(t *testing.T) {
// Comparison with scipy.
for cas, test := range []struct {
x, y *mat64.Vector
Sigma *mat64.SymDense
ans float64
}{
{
x: mat64.NewVector(3, []float64{1, 2, 3}),
y: mat64.NewVector(3, []float64{0.8, 1.1, -1}),
Sigma: mat64.NewSymDense(3,
[]float64{
0.8, 0.3, 0.1,
0.3, 0.7, -0.1,
0.1, -0.1, 7}),
ans: 1.9251757377680914,
},
} {
var chol mat64.Cholesky
ok := chol.Factorize(test.Sigma)
if !ok {
panic("bad test")
}
ans := Mahalanobis(test.x, test.y, &chol)
if math.Abs(ans-test.ans) > 1e-14 {
t.Errorf("Cas %d: got %v, want %v", cas, ans, test.ans)
}
}
}
// benchmarks
func randMat(r, c int) mat64.Matrix {
x := make([]float64, r*c)
for i := range x {
x[i] = rand.Float64()
}
return mat64.NewDense(r, c, x)
}
func benchmarkCovarianceMatrix(b *testing.B, m mat64.Matrix) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
CovarianceMatrix(nil, m, nil)
}
}
func benchmarkCovarianceMatrixWeighted(b *testing.B, m mat64.Matrix) {
r, _ := m.Dims()
wts := make([]float64, r)
for i := range wts {
wts[i] = 0.5
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
CovarianceMatrix(nil, m, wts)
}
}
func benchmarkCovarianceMatrixInPlace(b *testing.B, m mat64.Matrix) {
_, c := m.Dims()
res := mat64.NewSymDense(c, nil)
b.ResetTimer()
for i := 0; i < b.N; i++ {
CovarianceMatrix(res, m, nil)
}
}
func BenchmarkCovarianceMatrixSmallxSmall(b *testing.B) {
// 10 * 10 elements
x := randMat(small, small)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixSmallxMedium(b *testing.B) {
// 10 * 1000 elements
x := randMat(small, medium)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixMediumxSmall(b *testing.B) {
// 1000 * 10 elements
x := randMat(medium, small)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixMediumxMedium(b *testing.B) {
// 1000 * 1000 elements
x := randMat(medium, medium)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixLargexSmall(b *testing.B) {
// 1e5 * 10 elements
x := randMat(large, small)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixHugexSmall(b *testing.B) {
// 1e7 * 10 elements
x := randMat(huge, small)
benchmarkCovarianceMatrix(b, x)
}
func BenchmarkCovarianceMatrixSmallxSmallWeighted(b *testing.B) {
// 10 * 10 elements
x := randMat(small, small)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixSmallxMediumWeighted(b *testing.B) {
// 10 * 1000 elements
x := randMat(small, medium)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixMediumxSmallWeighted(b *testing.B) {
// 1000 * 10 elements
x := randMat(medium, small)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixMediumxMediumWeighted(b *testing.B) {
// 1000 * 1000 elements
x := randMat(medium, medium)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixLargexSmallWeighted(b *testing.B) {
// 1e5 * 10 elements
x := randMat(large, small)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixHugexSmallWeighted(b *testing.B) {
// 1e7 * 10 elements
x := randMat(huge, small)
benchmarkCovarianceMatrixWeighted(b, x)
}
func BenchmarkCovarianceMatrixSmallxSmallInPlace(b *testing.B) {
// 10 * 10 elements
x := randMat(small, small)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovarianceMatrixSmallxMediumInPlace(b *testing.B) {
// 10 * 1000 elements
x := randMat(small, medium)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovarianceMatrixMediumxSmallInPlace(b *testing.B) {
// 1000 * 10 elements
x := randMat(medium, small)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovarianceMatrixMediumxMediumInPlace(b *testing.B) {
// 1000 * 1000 elements
x := randMat(medium, medium)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovarianceMatrixLargexSmallInPlace(b *testing.B) {
// 1e5 * 10 elements
x := randMat(large, small)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovarianceMatrixHugexSmallInPlace(b *testing.B) {
// 1e7 * 10 elements
x := randMat(huge, small)
benchmarkCovarianceMatrixInPlace(b, x)
}
func BenchmarkCovToCorr(b *testing.B) {
// generate a 10x10 covariance matrix
m := randMat(small, small)
c := CovarianceMatrix(nil, m, nil)
cc := mat64.NewSymDense(c.Symmetric(), nil)
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
cc.CopySym(c)
b.StartTimer()
covToCorr(cc)
}
}
func BenchmarkCorrToCov(b *testing.B) {
// generate a 10x10 correlation matrix
m := randMat(small, small)
c := CorrelationMatrix(nil, m, nil)
cc := mat64.NewSymDense(c.Symmetric(), nil)
sigma := make([]float64, small)
for i := range sigma {
sigma[i] = 2
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
cc.CopySym(c)
b.StartTimer()
corrToCov(cc, sigma)
}
}