mirror of
https://github.com/gonum/gonum.git
synced 2025-10-23 15:13:31 +08:00
Initial commit of sample statistics functions to the stat package
This commit is contained in:
507
stat.go
Normal file
507
stat.go
Normal file
@@ -0,0 +1,507 @@
|
|||||||
|
package stat
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/gonum/floats"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Correlation returns the weighted correlation between the samples of x and y
|
||||||
|
// with the given means.
|
||||||
|
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / ((sum_j {w_j} - 1) * stdX * stdY)
|
||||||
|
// The lengths of x and y must be equal
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Correlation(x []float64, meanX, stdX float64, y []float64, meanY, stdY float64, weights []float64) float64 {
|
||||||
|
return Covariance(x, meanX, y, meanY, weights) / (stdX * stdY)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Covariance returns the weighted covariance between the samples of x and y
|
||||||
|
// with the given means.
|
||||||
|
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
|
||||||
|
// The lengths of x and y must be equal
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Covariance(x []float64, meanX float64, y []float64, meanY float64, weights []float64) float64 {
|
||||||
|
if len(x) != len(y) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
if weights == nil {
|
||||||
|
var s float64
|
||||||
|
for i, v := range x {
|
||||||
|
s += (v - meanX) * (y[i] - meanY)
|
||||||
|
}
|
||||||
|
s /= float64(len(x) - 1)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
if weights != nil && len(weights) != len(x) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var s float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
s += weights[i] * (v - meanX) * (y[i] - meanY)
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
return s / (sumWeights - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CrossEntropy computes the cross-entropy between the two distributions specified
|
||||||
|
// in p and q.
|
||||||
|
func CrossEntropy(p, q []float64) float64 {
|
||||||
|
if len(p) != len(q) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var ce float64
|
||||||
|
for i, v := range p {
|
||||||
|
w := q[i]
|
||||||
|
if v == 0 && w == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ce -= v * math.Log(w)
|
||||||
|
}
|
||||||
|
return ce
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entropy computes the Shannon entropy of a distribution or the distance between
|
||||||
|
// two distributions. The natural logarithm is used.
|
||||||
|
// - sum_i (p_i * log_e(p_i))
|
||||||
|
func Entropy(p []float64) float64 {
|
||||||
|
var e float64
|
||||||
|
for _, v := range p {
|
||||||
|
if v != 0 { // Entropy needs 0 * log(0) == 0
|
||||||
|
e -= v * math.Log(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExKurtosis returns the population excess kurtosis of the sample.
|
||||||
|
// The kurtosis is defined by the 4th moment of the mean divided by the squared
|
||||||
|
// variance. The excess kurtosis subtracts 3.0 so that the excess kurtosis of
|
||||||
|
// the normal distribution is zero.
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func ExKurtosis(x []float64, mean, stdev float64, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
var e float64
|
||||||
|
for _, v := range x {
|
||||||
|
z := (v - mean) / stdev
|
||||||
|
e += z * z * z * z
|
||||||
|
}
|
||||||
|
mul, offset := kurtosisCorrection(float64(len(x)))
|
||||||
|
return e*mul - offset
|
||||||
|
}
|
||||||
|
if len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var e float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
z := (v - mean) / stdev
|
||||||
|
e += weights[i] * z * z * z * z
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
mul, offset := kurtosisCorrection(sumWeights)
|
||||||
|
return e*mul - offset
|
||||||
|
}
|
||||||
|
|
||||||
|
// n is the number of samples
|
||||||
|
// see https://en.wikipedia.org/wiki/Kurtosis
|
||||||
|
func kurtosisCorrection(n float64) (mul, offset float64) {
|
||||||
|
return ((n + 1) / (n - 1)) * (n / (n - 2)) * (1 / (n - 3)), 3 * ((n - 1) / (n - 2)) * ((n - 1) / (n - 3))
|
||||||
|
}
|
||||||
|
|
||||||
|
// GeoMean returns the weighted geometric mean of the dataset
|
||||||
|
// \prod_i {x_i ^ w_i}
|
||||||
|
// This only applies with positive x and positive weights
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func GeometricMean(x, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
var s float64
|
||||||
|
for _, v := range x {
|
||||||
|
s += math.Log(v)
|
||||||
|
}
|
||||||
|
s /= float64(len(x))
|
||||||
|
return math.Exp(s)
|
||||||
|
}
|
||||||
|
if len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var s float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
s += weights[i] * math.Log(v)
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
s /= sumWeights
|
||||||
|
return math.Exp(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GeoMean returns the weighted harmonic mean of the dataset
|
||||||
|
// \sum_i {w_i} / ( sum_i {w_i / x_i} )
|
||||||
|
// This only applies with positive x and positive weights
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func HarmonicMean(x, weights []float64) float64 {
|
||||||
|
if weights != nil && len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
// TODO: Fix this to make it more efficient and avoid allocation
|
||||||
|
|
||||||
|
// This can be numerically unstable (for exapmle if x is very small)
|
||||||
|
// W = \sum_i {w_i}
|
||||||
|
// hm = exp(log(W) - log(\sum_i w_i / x_i))
|
||||||
|
|
||||||
|
logs := make([]float64, len(x))
|
||||||
|
var W float64
|
||||||
|
for i := range x {
|
||||||
|
if weights == nil {
|
||||||
|
logs[i] = -math.Log(x[i])
|
||||||
|
W += 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
logs[i] = math.Log(weights[i]) - math.Log(x[i])
|
||||||
|
W += weights[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum all of the logs
|
||||||
|
v := floats.LogSumExp(logs) // this computes log(\sum_i { w_i / x_i})
|
||||||
|
return math.Exp(math.Log(W) - v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Histogram sums up the weighted number of data points in each bin.
|
||||||
|
// The weight of data point x[i] will be placed into count[j] if
|
||||||
|
// dividers[j-1] <= x < dividers[j]. The "span" function in the floats package can assist
|
||||||
|
// with bin creation. The count variable must either be nil or have length of
|
||||||
|
// one less than dividers.
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Histogram(count, dividers, x, weights []float64) []float64 {
|
||||||
|
if weights != nil && len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
if count == nil {
|
||||||
|
count = make([]float64, len(dividers)+1)
|
||||||
|
}
|
||||||
|
if len(count) != len(dividers)+1 {
|
||||||
|
panic("histogram: bin count mismatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
sortX, sortWeight := sortXandWeight(x, weights)
|
||||||
|
|
||||||
|
idx := 0
|
||||||
|
comp := dividers[idx]
|
||||||
|
if sortWeight == nil {
|
||||||
|
for _, v := range sortX {
|
||||||
|
if v < comp || idx == len(count)-1 {
|
||||||
|
// Still in the current bucket
|
||||||
|
count[idx] += 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Need to find the next divider where v is less than the divider
|
||||||
|
// or to set the maximum divider if no such exists
|
||||||
|
for j := idx + 1; j < len(count); j++ {
|
||||||
|
if j == len(dividers) {
|
||||||
|
idx = len(dividers)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if v < dividers[j] {
|
||||||
|
idx = j
|
||||||
|
comp = dividers[j]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count[idx] += 1
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, v := range sortX {
|
||||||
|
if v < comp || idx == len(count)-1 {
|
||||||
|
// Still in the current bucket
|
||||||
|
count[idx] += sortWeight[i]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Need to find the next divider where v is less than the divider
|
||||||
|
// or to set the maximum divider if no such exists
|
||||||
|
for j := idx + 1; j < len(count); j++ {
|
||||||
|
if j == len(dividers) {
|
||||||
|
idx = len(dividers)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if v < dividers[j] {
|
||||||
|
idx = j
|
||||||
|
comp = dividers[j]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count[idx] += sortWeight[i]
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// KulbeckLeibler computes the Kulbeck-Leibler distance between the
|
||||||
|
// distributions p and q. The natural logarithm is used.
|
||||||
|
// sum_i(p_i * log(p_i / q_i))
|
||||||
|
// Note that the Kulbeck-Leibler distance is not symmetric;
|
||||||
|
// KulbeckLeibler(p,q) != KulbeckLeibler(q,p)
|
||||||
|
func KulbeckLeibler(p, q []float64) float64 {
|
||||||
|
if len(p) != len(q) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var kl float64
|
||||||
|
for i, v := range p {
|
||||||
|
if v != 0 { // Entropy needs 0 * log(0) == 0
|
||||||
|
kl += v * (math.Log(v) - math.Log(q[i]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kl
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mean computes the weighted mean of the data set.
|
||||||
|
// sum_i {w_i * x_i} / sum_i {w_i}
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Mean(x, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
return floats.Sum(x) / float64(len(x))
|
||||||
|
}
|
||||||
|
if len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var sumValues float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, w := range weights {
|
||||||
|
sumValues += w * x[i]
|
||||||
|
sumWeights += w
|
||||||
|
}
|
||||||
|
return sumValues / sumWeights
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mode returns the most common value in the dataset specified by x and the
|
||||||
|
// given weights. Strict float64 equality is used when comparing values, so users
|
||||||
|
// should take caution. If several values are the mode, any of them may be returned.
|
||||||
|
func Mode(x []float64, weights []float64) (val float64, count float64) {
|
||||||
|
if weights != nil && len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
if len(x) == 0 {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
m := make(map[float64]float64)
|
||||||
|
if weights == nil {
|
||||||
|
for _, v := range x {
|
||||||
|
m[v] += 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for i, v := range x {
|
||||||
|
m[v] += weights[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var maxCount float64
|
||||||
|
var max float64
|
||||||
|
for val, count := range m {
|
||||||
|
if count > maxCount {
|
||||||
|
maxCount = count
|
||||||
|
max = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max, maxCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// Moment computes the weighted n^th moment of the samples,
|
||||||
|
// E[(x - μ)^N]
|
||||||
|
// No degrees of freedom correction is done.
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Moment(moment float64, x []float64, mean float64, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
var m float64
|
||||||
|
for _, v := range x {
|
||||||
|
m += math.Pow(v-mean, moment)
|
||||||
|
}
|
||||||
|
m /= float64(len(x))
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
if len(weights) != len(x) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var m float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
m += weights[i] * math.Pow(v-mean, moment)
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
return m / sumWeights
|
||||||
|
}
|
||||||
|
|
||||||
|
// Percentile returns the lowest sample of x such that x is greater than or
|
||||||
|
// equal to the fraction p of samples. p should be a number between 0 and 1
|
||||||
|
// If no such sample exists, the lowest value is returned
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Percentile(p float64, x, weights []float64) float64 {
|
||||||
|
if p < 0 || p > 1 {
|
||||||
|
panic("stat: percentile out of bounds")
|
||||||
|
}
|
||||||
|
|
||||||
|
if weights != nil && len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
sortX, sortWeight := sortXandWeight(x, weights)
|
||||||
|
if weights == nil {
|
||||||
|
loc := p * float64(len(x))
|
||||||
|
idx := int(math.Floor(loc))
|
||||||
|
if (loc == float64(idx) && idx != 0) || idx == len(x) {
|
||||||
|
idx--
|
||||||
|
}
|
||||||
|
return sortX[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := p * floats.Sum(weights)
|
||||||
|
var cumsum float64
|
||||||
|
for i, w := range sortWeight {
|
||||||
|
cumsum += w
|
||||||
|
if cumsum >= idx {
|
||||||
|
return sortX[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic("shouldn't be here")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantile returns the lowest number p such that q is >= the fraction p of samples
|
||||||
|
// It is the inverse of the Percentile function.
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Quantile(q float64, x, weights []float64) float64 {
|
||||||
|
if weights != nil && len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
sortX, sortWeight := sortXandWeight(x, weights)
|
||||||
|
|
||||||
|
// Find the first x that is greater than the supplied x
|
||||||
|
if q < sortX[0] {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if q >= sortX[len(sortX)-1] {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if weights == nil {
|
||||||
|
for i, v := range sortX {
|
||||||
|
if v > q {
|
||||||
|
return float64(i) / float64(len(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sumWeights := floats.Sum(weights)
|
||||||
|
var w float64
|
||||||
|
for i, v := range sortX {
|
||||||
|
if v > q {
|
||||||
|
return w / sumWeights
|
||||||
|
}
|
||||||
|
w += sortWeight[i]
|
||||||
|
}
|
||||||
|
panic("Impossible. Maybe x contains NaNs.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skew computes the skewness of the sample data
|
||||||
|
// If weights is nil then all of the weights are 1
|
||||||
|
// If weights is not nil, then len(x) must equal len(weights)
|
||||||
|
func Skew(x []float64, mean, stdev float64, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
var s float64
|
||||||
|
for _, v := range x {
|
||||||
|
z := (v - mean) / stdev
|
||||||
|
s += z * z * z
|
||||||
|
}
|
||||||
|
return s * skewCorrection(float64(len(x)))
|
||||||
|
}
|
||||||
|
if len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var s float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
z := (v - mean) / stdev
|
||||||
|
s += weights[i] * z * z * z
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
return s * skewCorrection(sumWeights)
|
||||||
|
}
|
||||||
|
|
||||||
|
func skewCorrection(n float64) float64 {
|
||||||
|
// http://www.amstat.org/publications/jse/v19n2/doane.pdf page 7
|
||||||
|
return (n / (n - 1)) * (1 / (n - 2))
|
||||||
|
}
|
||||||
|
|
||||||
|
// StdDev returns the population standard deviation with the provided mean
|
||||||
|
func StDev(x []float64, mean float64, weights []float64) float64 {
|
||||||
|
return math.Sqrt(Variance(x, mean, weights))
|
||||||
|
}
|
||||||
|
|
||||||
|
// StandardError returns the standard error in the mean with the given values
|
||||||
|
func StdErr(stdev, sampleSize float64) float64 {
|
||||||
|
return stdev / math.Sqrt(sampleSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StdScore returns the standard score (a.k.a. z-score, z-value) for the value x
|
||||||
|
// with the givem mean and variance, i.e.
|
||||||
|
// (x - mean) / variance
|
||||||
|
func StdScore(x, mean, variance float64) float64 {
|
||||||
|
return (x - mean) / variance
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variance computes the weighted sample variance with the provided mean.
|
||||||
|
// \sum_i w_i (x_i - mean)^2 / (sum_i w_i - 1)
|
||||||
|
// If weights is nil, then all of the weights are 1.
|
||||||
|
// If weights in not nil, then len(x) must equal len(weights).
|
||||||
|
func Variance(x []float64, mean float64, weights []float64) float64 {
|
||||||
|
if weights == nil {
|
||||||
|
var s float64
|
||||||
|
for _, v := range x {
|
||||||
|
s += (v - mean) * (v - mean)
|
||||||
|
}
|
||||||
|
return s / float64(len(x)-1)
|
||||||
|
}
|
||||||
|
if len(x) != len(weights) {
|
||||||
|
panic("stat: slice length mismatch")
|
||||||
|
}
|
||||||
|
var ss float64
|
||||||
|
var sumWeights float64
|
||||||
|
for i, v := range x {
|
||||||
|
ss += weights[i] * (v - mean) * (v - mean)
|
||||||
|
sumWeights += weights[i]
|
||||||
|
}
|
||||||
|
return ss / (sumWeights - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quartile returns
|
||||||
|
//func Quartile(x []float64, weights []float64) float64 {}
|
||||||
|
|
||||||
|
func sortXandWeight(x, weights []float64) (sortX, sortWeight []float64) {
|
||||||
|
|
||||||
|
sorted := sort.Float64sAreSorted(x)
|
||||||
|
if !sorted {
|
||||||
|
sortX = make([]float64, len(x))
|
||||||
|
copy(sortX, x)
|
||||||
|
inds := make([]int, len(x))
|
||||||
|
floats.Argsort(sortX, inds)
|
||||||
|
if weights != nil {
|
||||||
|
sortWeight = make([]float64, len(x))
|
||||||
|
for i, v := range inds {
|
||||||
|
sortWeight[i] = weights[v]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sortX = x
|
||||||
|
sortWeight = weights
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
627
stat_test.go
Normal file
627
stat_test.go
Normal file
@@ -0,0 +1,627 @@
|
|||||||
|
package stat
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gonum/floats"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleCorrelation() {
|
||||||
|
x := []float64{8, -3, 7, 8, -4}
|
||||||
|
y := []float64{10, 5, 6, 3, -1}
|
||||||
|
w := []float64{2, 1.5, 3, 3, 2}
|
||||||
|
|
||||||
|
fmt.Println("Correlation computes the degree to which two datasets move together")
|
||||||
|
fmt.Println("about their mean. For example, x and y above move similarly.")
|
||||||
|
fmt.Println("Package can be used to compute the mean and standard deviation")
|
||||||
|
fmt.Println("or they can be supplied if they are known")
|
||||||
|
meanX := Mean(x, w)
|
||||||
|
meanY := Mean(x, w)
|
||||||
|
c := Correlation(x, meanX, 3, y, meanY, 4, w)
|
||||||
|
fmt.Printf("Correlation with set standard deviatons is %.5f\n", c)
|
||||||
|
stdX := StDev(x, meanX, w)
|
||||||
|
stdY := StDev(x, meanY, w)
|
||||||
|
c2 := Correlation(x, meanX, stdX, y, meanY, stdY, w)
|
||||||
|
fmt.Printf("Correlation with computed standard deviatons is %.5f\n", c2)
|
||||||
|
// Output:
|
||||||
|
// Correlation computes the degree to which two datasets move together
|
||||||
|
// about their mean. For example, x and y above move similarly.
|
||||||
|
// Package can be used to compute the mean and standard deviation
|
||||||
|
// or they can be supplied if they are known
|
||||||
|
// Correlation with set standard deviatons is 0.96894
|
||||||
|
// Correlation with computed standard deviatons is 0.39644
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCorrelation(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
y []float64
|
||||||
|
w []float64
|
||||||
|
ans float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{8, -3, 7, 8, -4},
|
||||||
|
w: nil,
|
||||||
|
ans: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{8, -3, 7, 8, -4},
|
||||||
|
w: []float64{1, 1, 1, 1, 1},
|
||||||
|
ans: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{8, -3, 7, 8, -4},
|
||||||
|
w: []float64{1, 6, 7, 0.8, 2.1},
|
||||||
|
ans: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{10, 15, 4, 5, -1},
|
||||||
|
w: nil,
|
||||||
|
ans: 0.0093334660769059,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{10, 15, 4, 5, -1},
|
||||||
|
w: nil,
|
||||||
|
ans: 0.0093334660769059,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
y: []float64{10, 15, 4, 5, -1},
|
||||||
|
w: []float64{1, 3, 1, 2, 2},
|
||||||
|
ans: -0.13966633352689,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
meanX := Mean(test.x, test.w)
|
||||||
|
meanY := Mean(test.y, test.w)
|
||||||
|
stdX := StDev(test.x, meanX, test.w)
|
||||||
|
stdY := StDev(test.y, meanY, test.w)
|
||||||
|
c := Correlation(test.x, meanX, stdX, test.y, meanY, stdY, test.w)
|
||||||
|
if math.Abs(test.ans-c) > 1e-14 {
|
||||||
|
t.Errorf("Correlation mismatch case %d. Expected %v, Found %v", i, test.ans, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleCovariance() {
|
||||||
|
fmt.Println("Covariance computes the degree to which datasets move together")
|
||||||
|
fmt.Println("about their mean.")
|
||||||
|
x := []float64{8, -3, 7, 8, -4}
|
||||||
|
y := []float64{10, 2, 2, 4, 1}
|
||||||
|
meanX := Mean(x, nil)
|
||||||
|
meanY := Mean(y, nil)
|
||||||
|
cov := Covariance(x, meanX, y, meanY, nil)
|
||||||
|
fmt.Printf("Cov = %.4f\n", cov)
|
||||||
|
fmt.Println("If datasets move perfectly together, the variance equals the covariance")
|
||||||
|
y2 := []float64{12, 1, 11, 12, 0}
|
||||||
|
meanY2 := Mean(y2, nil)
|
||||||
|
cov2 := Covariance(x, meanX, y2, meanY2, nil)
|
||||||
|
varX := Variance(x, meanX, nil)
|
||||||
|
fmt.Printf("Cov2 is %.4f, VarX is %.4f", cov2, varX)
|
||||||
|
// Output:
|
||||||
|
// Covariance computes the degree to which datasets move together
|
||||||
|
// about their mean.
|
||||||
|
// Cov = 13.8000
|
||||||
|
// If datasets move perfectly together, the variance equals the covariance
|
||||||
|
// Cov2 is 37.7000, VarX is 37.7000
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCrossEntropy(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
p []float64
|
||||||
|
q []float64
|
||||||
|
ans float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
p: []float64{0.75, 0.1, 0.05},
|
||||||
|
q: []float64{0.5, 0.25, 0.25},
|
||||||
|
ans: 0.7278045395879426,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: []float64{0.75, 0.1, 0.05, 0, 0, 0},
|
||||||
|
q: []float64{0.5, 0.25, 0.25, 0, 0, 0},
|
||||||
|
ans: 0.7278045395879426,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: []float64{0.75, 0.1, 0.05, 0, 0, 0.1},
|
||||||
|
q: []float64{0.5, 0.25, 0.25, 0, 0, 0},
|
||||||
|
ans: math.Inf(1),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: nil,
|
||||||
|
q: nil,
|
||||||
|
ans: 0,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
c := CrossEntropy(test.p, test.q)
|
||||||
|
if math.Abs(c-test.ans) > 1e-14 {
|
||||||
|
t.Errorf("Cross entropy mismatch case %d: Expected %v, Found %v", i, test.ans, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleEntropy() {
|
||||||
|
|
||||||
|
p := []float64{0.05, 0.1, 0.9, 0.05}
|
||||||
|
entP := Entropy(p)
|
||||||
|
|
||||||
|
q := []float64{0.2, 0.4, 0.25, 0.15}
|
||||||
|
entQ := Entropy(q)
|
||||||
|
|
||||||
|
r := []float64{0.2, 0, 0, 0.5, 0, 0.2, 0.1, 0, 0, 0}
|
||||||
|
entR := Entropy(r)
|
||||||
|
|
||||||
|
s := []float64{0, 0, 1, 0}
|
||||||
|
entS := Entropy(s)
|
||||||
|
|
||||||
|
fmt.Println("Entropy is a measure of the amount of uncertainty in a distribution")
|
||||||
|
fmt.Printf("The second bin of p is very likely to occur. It's entropy is %.4f\n", entP)
|
||||||
|
fmt.Printf("The distribution of q is more spread out. It's entropy is %.4f\n", entQ)
|
||||||
|
fmt.Println("Adding buckets with zero probability does not change the entropy.")
|
||||||
|
fmt.Printf("The entropy of r is: %.4f\n", entR)
|
||||||
|
fmt.Printf("A distribution with no uncertainty has entropy %.4f\n", entS)
|
||||||
|
// Output:
|
||||||
|
// Entropy is a measure of the amount of uncertainty in a distribution
|
||||||
|
// The second bin of p is very likely to occur. It's entropy is 0.6247
|
||||||
|
// The distribution of q is more spread out. It's entropy is 1.3195
|
||||||
|
// Adding buckets with zero probability does not change the entropy.
|
||||||
|
// The entropy of r is: 1.2206
|
||||||
|
// A distribution with no uncertainty has entropy 0.0000
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleExKurtosis() {
|
||||||
|
fmt.Println(`Kurtosis is a measure of the 'peakedness' of a distribution, and the
|
||||||
|
excess kurtosis is the kurtosis above or below that of the standard normal
|
||||||
|
distribution`)
|
||||||
|
x := []float64{5, 4, -3, -2}
|
||||||
|
mean := Mean(x, nil)
|
||||||
|
stdev := StDev(x, mean, nil)
|
||||||
|
kurt := ExKurtosis(x, mean, stdev, nil)
|
||||||
|
fmt.Printf("ExKurtosis = %.5f\n", kurt)
|
||||||
|
weights := []float64{1, 2, 3, 5}
|
||||||
|
wMean := Mean(x, weights)
|
||||||
|
wStdev := StDev(x, wMean, weights)
|
||||||
|
wKurt := ExKurtosis(x, wMean, wStdev, weights)
|
||||||
|
fmt.Printf("Weighted ExKurtosis is %.4f", wKurt)
|
||||||
|
// Output:
|
||||||
|
// Kurtosis is a measure of the 'peakedness' of a distribution, and the
|
||||||
|
// excess kurtosis is the kurtosis above or below that of the standard normal
|
||||||
|
// distribution
|
||||||
|
// ExKurtosis = -5.41200
|
||||||
|
// Weighted ExKurtosis is -0.6779
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleGeometricMean() {
|
||||||
|
x := []float64{8, 2, 9, 15, 4}
|
||||||
|
weights := []float64{2, 2, 6, 7, 1}
|
||||||
|
mean := Mean(x, weights)
|
||||||
|
gmean := GeometricMean(x, weights)
|
||||||
|
|
||||||
|
logx := make([]float64, len(x))
|
||||||
|
for i, v := range x {
|
||||||
|
logx[i] = math.Log(v)
|
||||||
|
}
|
||||||
|
expMeanLog := math.Exp(Mean(logx, weights))
|
||||||
|
fmt.Printf("The arithmetic mean is %.4f, but the geometric mean is %.4f.\n", mean, gmean)
|
||||||
|
fmt.Printf("The exponential of the mean of the logs is %.4f\n", expMeanLog)
|
||||||
|
// Output:
|
||||||
|
// The arithmetic mean is 10.1667, but the geometric mean is 8.7637.
|
||||||
|
// The exponential of the mean of the logs is 8.7637
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHarmonicMean() {
|
||||||
|
x := []float64{8, 2, 9, 15, 4}
|
||||||
|
weights := []float64{2, 2, 6, 7, 1}
|
||||||
|
mean := Mean(x, weights)
|
||||||
|
hmean := HarmonicMean(x, weights)
|
||||||
|
|
||||||
|
fmt.Printf("The arithmetic mean is %.5f, but the harmonic mean is %.4f.\n", mean, hmean)
|
||||||
|
// Output:
|
||||||
|
// The arithmetic mean is 10.16667, but the harmonic mean is 6.8354.
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHistogram(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
dividers []float64
|
||||||
|
ans []float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
x: []float64{1, 3, 5, 6, 7, 8},
|
||||||
|
dividers: []float64{2, 4, 6, 7},
|
||||||
|
ans: []float64{1, 1, 1, 1, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{1, 3, 5, 6, 7, 8},
|
||||||
|
dividers: []float64{2, 4, 6, 7},
|
||||||
|
weights: []float64{1, 2, 1, 1, 1, 2},
|
||||||
|
ans: []float64{1, 2, 1, 1, 3},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{1, 8},
|
||||||
|
dividers: []float64{2, 4, 6, 7},
|
||||||
|
weights: []float64{1, 2},
|
||||||
|
ans: []float64{1, 0, 0, 0, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{1, 8},
|
||||||
|
dividers: []float64{2, 4, 6, 7},
|
||||||
|
ans: []float64{1, 0, 0, 0, 1},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
hist := Histogram(nil, test.dividers, test.x, test.weights)
|
||||||
|
if !floats.Equal(hist, test.ans) {
|
||||||
|
t.Errorf("Hist mismatch case %d. Expected %v, Found %v", i, test.ans, hist)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHistogram() {
|
||||||
|
x := make([]float64, 101)
|
||||||
|
for i := range x {
|
||||||
|
x[i] = 1.1 * float64(i) // x data ranges from 0 to 110
|
||||||
|
}
|
||||||
|
dividers := []float64{7, 20, 100, 1000}
|
||||||
|
fmt.Println(`Histogram counts the amount of data in the bins specified by
|
||||||
|
the dividers. In this data set, there are 7 data points less than 7 (dividers[0]),
|
||||||
|
12 data points between 7 and 20 (dividers[2] and dividers[1]), and 0 data points
|
||||||
|
above 1000. Since dividers has length 4, there will be 5 bins.`)
|
||||||
|
hist := Histogram(nil, dividers, x, nil)
|
||||||
|
fmt.Printf("Hist = %v\n", hist)
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("For ease, the floats Span function can be used to set the dividers")
|
||||||
|
nBins := 10
|
||||||
|
// Create one fewer divider than bins, but add two to work with Span (see
|
||||||
|
// note below)
|
||||||
|
dividers = make([]float64, nBins+1)
|
||||||
|
min, _ := floats.Min(x)
|
||||||
|
max, _ := floats.Max(x)
|
||||||
|
floats.Span(dividers, min, max)
|
||||||
|
// Span includes the min and the max. Trim the dividers to create 10 buckets
|
||||||
|
dividers = dividers[1 : len(dividers)-1]
|
||||||
|
fmt.Println("len dividers = ", len(dividers))
|
||||||
|
hist = Histogram(nil, dividers, x, nil)
|
||||||
|
fmt.Printf("Hist = %v\n", hist)
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println(`Histogram also works with weighted data, and allows reusing of
|
||||||
|
the count field in order to avoid extra garbage`)
|
||||||
|
weights := make([]float64, len(x))
|
||||||
|
for i := range weights {
|
||||||
|
weights[i] = float64(i + 1)
|
||||||
|
}
|
||||||
|
Histogram(hist, dividers, x, weights)
|
||||||
|
fmt.Printf("Weighted Hist = %v\n", hist)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Histogram counts the amount of data in the bins specified by
|
||||||
|
// the dividers. In this data set, there are 7 data points less than 7 (dividers[0]),
|
||||||
|
// 12 data points between 7 and 20 (dividers[2] and dividers[1]), and 0 data points
|
||||||
|
// above 1000. Since dividers has length 4, there will be 5 bins.
|
||||||
|
// Hist = [7 12 72 10 0]
|
||||||
|
//
|
||||||
|
// For ease, the floats Span function can be used to set the dividers
|
||||||
|
// len dividers = 9
|
||||||
|
// Hist = [11 10 10 10 9 11 10 10 9 11]
|
||||||
|
//
|
||||||
|
// Histogram also works with weighted data, and allows reusing of
|
||||||
|
// the count field in order to avoid extra garbage
|
||||||
|
// Weighted Hist = [77 175 275 375 423 627 675 775 783 1067]
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleKulbeckLiebler() {
|
||||||
|
|
||||||
|
p := []float64{0.05, 0.1, 0.9, 0.05}
|
||||||
|
q := []float64{0.2, 0.4, 0.25, 0.15}
|
||||||
|
s := []float64{0, 0, 1, 0}
|
||||||
|
|
||||||
|
klPQ := KulbeckLeibler(p, q)
|
||||||
|
klPS := KulbeckLeibler(p, s)
|
||||||
|
klPP := KulbeckLeibler(p, p)
|
||||||
|
|
||||||
|
fmt.Println("Kulbeck-Liebler is one measure of the difference between two distributions")
|
||||||
|
fmt.Printf("The K-L distance between p and q is %.4f\n", klPQ)
|
||||||
|
fmt.Println("It is impossible for s and p to be the same distribution, because")
|
||||||
|
fmt.Println("the first bucket has zero probability in s and non-zero in p. Thus,")
|
||||||
|
fmt.Printf("the K-L distance between them is %.4f\n", klPS)
|
||||||
|
fmt.Printf("The K-L distance between identical distributions is %.4f\n", klPP)
|
||||||
|
|
||||||
|
// Kulbeck-Liebler is one measure of the difference between two distributions
|
||||||
|
// The K-L distance between p and q is 0.8900
|
||||||
|
// It is impossible for s and p to be the same distribution, because
|
||||||
|
// the first bucket has zero probability in s and non-zero in p. Thus,
|
||||||
|
// the K-L distance between them is +Inf
|
||||||
|
// The K-L distance between identical distributions is 0.0000
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleMean() {
|
||||||
|
x := []float64{8.2, -6, 5, 7}
|
||||||
|
mean := Mean(x, nil)
|
||||||
|
fmt.Printf("The mean of the samples is %.4f\n", mean)
|
||||||
|
w := []float64{2, 6, 3, 5}
|
||||||
|
weightedMean := Mean(x, w)
|
||||||
|
fmt.Printf("The weighted mean of the samples is %.4f\n", weightedMean)
|
||||||
|
x2 := []float64{8.2, 8.2, -6, -6, -6, -6, -6, -6, 5, 5, 5, 7, 7, 7, 7, 7}
|
||||||
|
mean2 := Mean(x2, nil)
|
||||||
|
fmt.Printf("The mean of x2 is %.4f\n", mean2)
|
||||||
|
fmt.Println("The weights act as if there were more samples of that number")
|
||||||
|
// Output:
|
||||||
|
// The mean of the samples is 3.5500
|
||||||
|
// The weighted mean of the samples is 1.9000
|
||||||
|
// The mean of x2 is 1.9000
|
||||||
|
// The weights act as if there were more samples of that number
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMode(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
ans float64
|
||||||
|
count float64
|
||||||
|
}{
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
x: []float64{1, 6, 1, 9, -2},
|
||||||
|
ans: 1,
|
||||||
|
count: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{1, 6, 1, 9, -2},
|
||||||
|
weights: []float64{1, 7, 3, 5, 0},
|
||||||
|
ans: 6,
|
||||||
|
count: 7,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
m, count := Mode(test.x, test.weights)
|
||||||
|
if test.ans != m {
|
||||||
|
t.Errorf("Mode mismatch case %d. Expected %v, found %v", i, test.ans, m)
|
||||||
|
}
|
||||||
|
if test.count != count {
|
||||||
|
t.Errorf("Mode count mismatch case %d. Expected %v, found %v", i, test.count, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMoment(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
moment float64
|
||||||
|
mean float64
|
||||||
|
ans float64
|
||||||
|
}{
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
x: []float64{6, 2, 4, 8, 9},
|
||||||
|
mean: 3,
|
||||||
|
moment: 5,
|
||||||
|
ans: 2.2288e3,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
m := Moment(test.moment, test.x, test.mean, test.weights)
|
||||||
|
if math.Abs(test.ans-m) > 1e-14 {
|
||||||
|
t.Errorf("Moment mismatch case %d. Expected %v, found %v", i, test.ans, m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPercentile(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
p []float64
|
||||||
|
x []float64
|
||||||
|
w []float64
|
||||||
|
ans []float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1},
|
||||||
|
x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
|
||||||
|
w: nil,
|
||||||
|
ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1},
|
||||||
|
x: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
|
||||||
|
w: []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
|
||||||
|
ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1},
|
||||||
|
x: []float64{3, 10, 1, 2, 5, 9, 7, 8, 6, 4},
|
||||||
|
w: []float64{3, 3, 3, 3, 3, 3, 3, 3, 3, 3},
|
||||||
|
ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
p: []float64{0, 0.05, 0.1, 0.15, 0.45, 0.5, 0.55, 0.85, 0.9, 0.95, 1},
|
||||||
|
x: []float64{3, 10, 1, 2, 5, 9, 7, 8, 6, 4},
|
||||||
|
w: nil,
|
||||||
|
ans: []float64{1, 1, 1, 2, 5, 5, 6, 9, 9, 10, 10},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
if len(test.p) != len(test.ans) {
|
||||||
|
panic("bad test")
|
||||||
|
}
|
||||||
|
copyX := make([]float64, len(test.x))
|
||||||
|
copy(copyX, test.x)
|
||||||
|
var copyW []float64
|
||||||
|
if test.w != nil {
|
||||||
|
copyW = make([]float64, len(test.w))
|
||||||
|
copy(copyW, test.w)
|
||||||
|
}
|
||||||
|
for j, p := range test.p {
|
||||||
|
v := Percentile(p, test.x, test.w)
|
||||||
|
if !floats.Equal(copyX, test.x) {
|
||||||
|
t.Errorf("x changed for case %d percentile %v", i, p)
|
||||||
|
}
|
||||||
|
if !floats.Equal(copyW, test.w) {
|
||||||
|
t.Errorf("x changed for case %d percentile %v", i, p)
|
||||||
|
}
|
||||||
|
if v != test.ans[j] {
|
||||||
|
t.Errorf("mismatch case %d percentile %v. Expected: %v, found: %v", i, p, test.ans[j], v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQuantile(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
q []float64
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
ans []float64
|
||||||
|
}{
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
q: []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1},
|
||||||
|
x: []float64{1, 2, 3, 4, 5},
|
||||||
|
ans: []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
q: []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1},
|
||||||
|
x: []float64{5, 2, 3, 4, 1},
|
||||||
|
ans: []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
q: []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1},
|
||||||
|
x: []float64{5, 2, 3, 4, 1},
|
||||||
|
weights: []float64{1, 1, 1, 1, 1},
|
||||||
|
ans: []float64{0, 0, 0.2, 0.2, 0.4, 0.6, 0.6, 0.8, 1, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
q: []float64{0, 0.9, 1, 1.1, 2.9, 3, 3.1, 4.9, 5, 5.1},
|
||||||
|
x: []float64{5, 2, 3, 4, 1},
|
||||||
|
weights: []float64{1, 1, 2, 5, 1},
|
||||||
|
ans: []float64{0, 0, 0.1, 0.1, 0.2, 0.4, 0.4, 0.9, 1, 1},
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
copyX := make([]float64, len(test.x))
|
||||||
|
copy(copyX, test.x)
|
||||||
|
var copyW []float64
|
||||||
|
if test.weights != nil {
|
||||||
|
copyW = make([]float64, len(test.weights))
|
||||||
|
copy(copyW, test.weights)
|
||||||
|
}
|
||||||
|
for j, q := range test.q {
|
||||||
|
v := Quantile(q, test.x, test.weights)
|
||||||
|
if !floats.Equal(copyX, test.x) {
|
||||||
|
t.Errorf("x changed for case %d percentile %v", i, q)
|
||||||
|
}
|
||||||
|
if !floats.Equal(copyW, test.weights) {
|
||||||
|
t.Errorf("x changed for case %d percentile %v", i, q)
|
||||||
|
}
|
||||||
|
if v != test.ans[j] {
|
||||||
|
t.Errorf("mismatch case %d percentile %v. Expected: %v, found: %v", i, q, test.ans[j], v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleStDev() {
|
||||||
|
x := []float64{8, 2, -9, 15, 4}
|
||||||
|
mean := Mean(x, nil)
|
||||||
|
stdev := StDev(x, mean, nil)
|
||||||
|
fmt.Printf("The standard deviation of the samples is %.4f\n", stdev)
|
||||||
|
|
||||||
|
weights := []float64{2, 2, 6, 7, 1}
|
||||||
|
weightedMean := Mean(x, weights)
|
||||||
|
weightedStdev := StDev(x, weightedMean, weights)
|
||||||
|
fmt.Printf("The weighted standard deviation of the samples is %.4f\n", weightedStdev)
|
||||||
|
// Output:
|
||||||
|
// The standard deviation of the samples is 8.8034
|
||||||
|
// The weighted standard deviation of the samples is 10.5733
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleStdErr() {
|
||||||
|
x := []float64{8, 2, -9, 15, 4}
|
||||||
|
weights := []float64{2, 2, 6, 7, 1}
|
||||||
|
mean := Mean(x, weights)
|
||||||
|
stdev := StDev(x, mean, weights)
|
||||||
|
nSamples := floats.Sum(weights)
|
||||||
|
stdErr := StdErr(stdev, nSamples)
|
||||||
|
fmt.Printf("The standard deviation is %.4f and there are %g samples, so the mean\nis likely %.4f ± %.4f.", stdev, nSamples, mean, stdErr)
|
||||||
|
// Output:
|
||||||
|
// The standard deviation is 10.5733 and there are 18 samples, so the mean
|
||||||
|
// is likely 4.1667 ± 2.4921.
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkew(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
ans float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
x: []float64{8, 3, 7, 8, 4},
|
||||||
|
weights: nil,
|
||||||
|
ans: -0.581456499151665,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, 3, 7, 8, 4},
|
||||||
|
weights: []float64{1, 1, 1, 1, 1},
|
||||||
|
ans: -0.581456499151665,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, 3, 7, 8, 4},
|
||||||
|
weights: []float64{2, 1, 2, 1, 1},
|
||||||
|
ans: -1.12066646837198,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
mean := Mean(test.x, test.weights)
|
||||||
|
std := StDev(test.x, mean, test.weights)
|
||||||
|
skew := Skew(test.x, mean, std, test.weights)
|
||||||
|
if math.Abs(skew-test.ans) > 1e-14 {
|
||||||
|
t.Errorf("Skew mismatch case %d. Expected %v, Found %v", i, test.ans, skew)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVariance(t *testing.T) {
|
||||||
|
for i, test := range []struct {
|
||||||
|
x []float64
|
||||||
|
weights []float64
|
||||||
|
ans float64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
weights: nil,
|
||||||
|
ans: 37.7,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, -3, 7, 8, -4},
|
||||||
|
weights: []float64{1, 1, 1, 1, 1},
|
||||||
|
ans: 37.7,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
x: []float64{8, 3, 7, 8, 4},
|
||||||
|
weights: []float64{2, 1, 2, 1, 1},
|
||||||
|
ans: 4.2857142857142865,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
mean := Mean(test.x, test.weights)
|
||||||
|
variance := Variance(test.x, mean, test.weights)
|
||||||
|
if math.Abs(variance-test.ans) > 1e-14 {
|
||||||
|
t.Errorf("Variance mismatch case %d. Expected %v, Found %v", i, test.ans, variance)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleVariance() {
|
||||||
|
x := []float64{8, 2, -9, 15, 4}
|
||||||
|
mean := Mean(x, nil)
|
||||||
|
variance := Variance(x, mean, nil)
|
||||||
|
fmt.Printf("The variance of the samples is %.4f\n", variance)
|
||||||
|
|
||||||
|
weights := []float64{2, 2, 6, 7, 1}
|
||||||
|
weightedMean := Mean(x, weights)
|
||||||
|
weightedVariance := Variance(x, weightedMean, weights)
|
||||||
|
fmt.Printf("The weighted variance of the samples is %.4f\n", weightedVariance)
|
||||||
|
// Output:
|
||||||
|
// The variance of the samples is 77.5000
|
||||||
|
// The weighted variance of the samples is 111.7941
|
||||||
|
}
|
Reference in New Issue
Block a user