mirror of
https://github.com/gonum/gonum.git
synced 2025-10-08 00:20:11 +08:00
294 lines
8.3 KiB
Go
294 lines
8.3 KiB
Go
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package distuv
|
|
|
|
import (
|
|
"math"
|
|
"sort"
|
|
"testing"
|
|
|
|
"gonum.org/v1/gonum/floats"
|
|
"gonum.org/v1/gonum/integrate/quad"
|
|
"gonum.org/v1/gonum/stat"
|
|
)
|
|
|
|
type meaner interface {
|
|
Mean() float64
|
|
}
|
|
|
|
type quantiler interface {
|
|
Quantile(float64) float64
|
|
}
|
|
|
|
type medianer interface {
|
|
quantiler
|
|
Median() float64
|
|
}
|
|
|
|
type varStder interface {
|
|
StdDev() float64
|
|
Variance() float64
|
|
}
|
|
|
|
type entropyer interface {
|
|
LogProber
|
|
Entropy() float64
|
|
}
|
|
|
|
type exKurtosiser interface {
|
|
ExKurtosis() float64
|
|
Mean() float64
|
|
}
|
|
|
|
type skewnesser interface {
|
|
StdDev() float64
|
|
Mean() float64
|
|
Skewness() float64
|
|
}
|
|
|
|
type cumulanter interface {
|
|
Quantiler
|
|
CDF(x float64) float64
|
|
Survival(x float64) float64
|
|
}
|
|
|
|
func generateSamples(x []float64, r Rander) {
|
|
for i := range x {
|
|
x[i] = r.Rand()
|
|
}
|
|
}
|
|
|
|
type probLogprober interface {
|
|
Prob(x float64) float64
|
|
LogProb(x float64) float64
|
|
}
|
|
|
|
type cumulantProber interface {
|
|
cumulanter
|
|
probLogprober
|
|
}
|
|
|
|
func checkMean(t *testing.T, i int, x []float64, m meaner, tol float64) {
|
|
mean := stat.Mean(x, nil)
|
|
if !floats.EqualWithinAbsOrRel(mean, m.Mean(), tol, tol) {
|
|
t.Errorf("Mean mismatch case %v: want: %v, got: %v", i, mean, m.Mean())
|
|
}
|
|
}
|
|
|
|
func checkMedian(t *testing.T, i int, x []float64, m medianer, tol float64) {
|
|
median := stat.Quantile(0.5, stat.Empirical, x, nil)
|
|
if !floats.EqualWithinAbsOrRel(median, m.Median(), tol, tol) {
|
|
t.Errorf("Median mismatch case %v: want: %v, got: %v", i, median, m.Median())
|
|
}
|
|
}
|
|
|
|
func checkVarAndStd(t *testing.T, i int, x []float64, v varStder, tol float64) {
|
|
variance := stat.Variance(x, nil)
|
|
if !floats.EqualWithinAbsOrRel(variance, v.Variance(), tol, tol) {
|
|
t.Errorf("Variance mismatch case %v: want: %v, got: %v", i, variance, v.Variance())
|
|
}
|
|
std := math.Sqrt(variance)
|
|
if !floats.EqualWithinAbsOrRel(std, v.StdDev(), tol, tol) {
|
|
t.Errorf("StdDev mismatch case %v: want: %v, got: %v", i, std, v.StdDev())
|
|
}
|
|
}
|
|
|
|
func checkEntropy(t *testing.T, i int, x []float64, e entropyer, tol float64) {
|
|
tmp := make([]float64, len(x))
|
|
for i, v := range x {
|
|
tmp[i] = -e.LogProb(v)
|
|
}
|
|
entropy := stat.Mean(tmp, nil)
|
|
if !floats.EqualWithinAbsOrRel(entropy, e.Entropy(), tol, tol) {
|
|
t.Errorf("Entropy mismatch case %v: want: %v, got: %v", i, entropy, e.Entropy())
|
|
}
|
|
}
|
|
|
|
func checkExKurtosis(t *testing.T, i int, x []float64, e exKurtosiser, tol float64) {
|
|
mean := e.Mean()
|
|
tmp := make([]float64, len(x))
|
|
for i, x := range x {
|
|
tmp[i] = math.Pow(x-mean, 4)
|
|
}
|
|
variance := stat.Variance(x, nil)
|
|
mu4 := stat.Mean(tmp, nil)
|
|
kurtosis := mu4/(variance*variance) - 3
|
|
if !floats.EqualWithinAbsOrRel(kurtosis, e.ExKurtosis(), tol, tol) {
|
|
t.Errorf("ExKurtosis mismatch case %v: want: %v, got: %v", i, kurtosis, e.ExKurtosis())
|
|
}
|
|
}
|
|
|
|
func checkSkewness(t *testing.T, i int, x []float64, s skewnesser, tol float64) {
|
|
mean := s.Mean()
|
|
std := s.StdDev()
|
|
tmp := make([]float64, len(x))
|
|
for i, v := range x {
|
|
tmp[i] = math.Pow(v-mean, 3)
|
|
}
|
|
mu3 := stat.Mean(tmp, nil)
|
|
skewness := mu3 / math.Pow(std, 3)
|
|
if !floats.EqualWithinAbsOrRel(skewness, s.Skewness(), tol, tol) {
|
|
t.Errorf("Skewness mismatch case %v: want: %v, got: %v", i, skewness, s.Skewness())
|
|
}
|
|
}
|
|
|
|
func checkQuantileCDFSurvival(t *testing.T, i int, xs []float64, c cumulanter, tol float64) {
|
|
// Quantile, CDF, and survival check.
|
|
for i, p := range []float64{0.1, 0.25, 0.5, 0.75, 0.9} {
|
|
x := c.Quantile(p)
|
|
cdf := c.CDF(x)
|
|
estCDF := stat.CDF(x, stat.Empirical, xs, nil)
|
|
if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
|
|
t.Errorf("CDF mismatch case %v: want: %v, got: %v", i, estCDF, cdf)
|
|
}
|
|
if !floats.EqualWithinAbsOrRel(cdf, p, tol, tol) {
|
|
t.Errorf("Quantile/CDF mismatch case %v: want: %v, got: %v", i, p, cdf)
|
|
}
|
|
if math.Abs(1-cdf-c.Survival(x)) > 1e-14 {
|
|
t.Errorf("Survival/CDF mismatch case %v: want: %v, got: %v", i, 1-cdf, c.Survival(x))
|
|
}
|
|
}
|
|
}
|
|
|
|
func checkProbContinuous(t *testing.T, i int, x []float64, p probLogprober, tol float64) {
|
|
// Check that the PDF is consistent (integrates to 1).
|
|
q := quad.Fixed(p.Prob, math.Inf(-1), math.Inf(1), 1000000, nil, 0)
|
|
if math.Abs(q-1) > tol {
|
|
t.Errorf("Probability distribution doesn't integrate to 1. Case %v: Got %v", i, q)
|
|
}
|
|
|
|
// Check that PDF and LogPDF are consistent.
|
|
for i, v := range x {
|
|
if math.Abs(math.Log(p.Prob(v))-p.LogProb(v)) > 1e-14 {
|
|
t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, v, math.Log(v), p.LogProb(v))
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// checkProbQuantContinuous checks that the Prob, Rand, and Quantile are all consistent.
|
|
// checkProbContinuous only checks that Prob is a valid distribution (integrates
|
|
// to 1 and greater than 0). However, this is also true if the PDF of a different
|
|
// distribution is used. This checks that PDF is also consistent with the
|
|
// CDF implementation and the random samples.
|
|
func checkProbQuantContinuous(t *testing.T, i int, xs []float64, c cumulantProber, tol float64) {
|
|
ps := make([]float64, 101)
|
|
floats.Span(ps, 0, 1)
|
|
|
|
var xp, x float64
|
|
for i, p := range ps {
|
|
x = c.Quantile(p)
|
|
if p == 0 {
|
|
xp = x
|
|
if floats.Min(xs) < x {
|
|
t.Errorf("Sample of x less than Quantile(0). Case %v.", i)
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
if p == 1 {
|
|
if floats.Max(xs) > x {
|
|
t.Errorf("Sample of x greater than Quantile(1). Case %v.", i)
|
|
break
|
|
}
|
|
}
|
|
|
|
// The integral of the PDF between xp and x should be the difference in
|
|
// the quantiles.
|
|
q := quad.Fixed(c.Prob, xp, x, 1000, nil, 0)
|
|
if math.Abs(q-(p-ps[i-1])) > 1e-5 {
|
|
t.Errorf("Integral of PDF doesn't match quantile. Case %v. Want %v, got %v.", i, p-ps[i-1], q)
|
|
break
|
|
}
|
|
|
|
pEst := stat.CDF(x, stat.Empirical, xs, nil)
|
|
if math.Abs(pEst-p) > tol {
|
|
t.Errorf("Empirical CDF doesn't match quantile. Case %v.", i)
|
|
}
|
|
xp = x
|
|
}
|
|
}
|
|
|
|
// checkProbDiscrete confirms that PDF and Rand are consistent for discrete distributions.
|
|
func checkProbDiscrete(t *testing.T, i int, xs []float64, p probLogprober, tol float64) {
|
|
// Make a map of all of the unique samples.
|
|
m := make(map[float64]int)
|
|
for _, v := range xs {
|
|
m[v]++
|
|
}
|
|
for x, count := range m {
|
|
prob := float64(count) / float64(len(xs))
|
|
if math.Abs(prob-p.Prob(x)) > tol {
|
|
t.Errorf("PDF mismatch case %v at %v: want %v, got %v", i, x, prob, p.Prob(x))
|
|
}
|
|
if math.Abs(math.Log(p.Prob(x))-p.LogProb(x)) > 1e-14 {
|
|
t.Errorf("Prob and LogProb mismatch case %v at %v: want %v, got %v", i, x, math.Log(x), p.LogProb(x))
|
|
}
|
|
}
|
|
}
|
|
|
|
// dist is a type that implements the standard set of routines.
|
|
type fullDist interface {
|
|
CDF(x float64) float64
|
|
Entropy() float64
|
|
ExKurtosis() float64
|
|
LogProb(x float64) float64
|
|
Mean() float64
|
|
Median() float64
|
|
NumParameters() int
|
|
Prob(x float64) float64
|
|
Quantile(p float64) float64
|
|
Rand() float64
|
|
Skewness() float64
|
|
StdDev() float64
|
|
Survival(x float64) float64
|
|
Variance() float64
|
|
}
|
|
|
|
// testFullDist tests all of the functions of a fullDist.
|
|
func testFullDist(t *testing.T, f fullDist, i int, continuous bool) {
|
|
tol := 1e-2
|
|
const n = 1e6
|
|
x := make([]float64, n)
|
|
generateSamples(x, f)
|
|
sort.Float64s(x)
|
|
|
|
checkMean(t, i, x, f, tol)
|
|
checkVarAndStd(t, i, x, f, tol)
|
|
checkEntropy(t, i, x, f, tol)
|
|
checkExKurtosis(t, i, x, f, tol)
|
|
checkSkewness(t, i, x, f, tol)
|
|
if continuous {
|
|
// In a discrete distribution, the median may not have positive probability.
|
|
checkMedian(t, i, x, f, tol)
|
|
// In a discrete distribution, the CDF and Quantile may not be perfect mappings.
|
|
checkQuantileCDFSurvival(t, i, x, f, tol)
|
|
// Integrate over the PDF
|
|
checkProbContinuous(t, i, x, f, 1e-10)
|
|
checkProbQuantContinuous(t, i, x, f, tol)
|
|
} else {
|
|
// Check against empirical PDF.
|
|
checkProbDiscrete(t, i, x, f, tol)
|
|
}
|
|
}
|
|
|
|
// testRandLogProb tests that LogProb and Rand give consistent results. This
|
|
// can be used when the distribution does not implement CDF.
|
|
func testRandLogProbContinuous(t *testing.T, i int, min float64, x []float64, f LogProber, tol float64, bins int) {
|
|
for cdf := 1 / float64(bins); cdf <= 1-1/float64(bins); cdf += 1 / float64(bins) {
|
|
// Get the estimated CDF from the samples
|
|
pt := stat.Quantile(cdf, stat.Empirical, x, nil)
|
|
|
|
prob := func(x float64) float64 {
|
|
return math.Exp(f.LogProb(x))
|
|
}
|
|
// Integrate the PDF to find the CDF
|
|
estCDF := quad.Fixed(prob, min, pt, 1000, nil, 0)
|
|
if !floats.EqualWithinAbsOrRel(cdf, estCDF, tol, tol) {
|
|
t.Errorf("Mismatch between integral of PDF and empirical CDF. Case %v. Want %v, got %v", i, cdf, estCDF)
|
|
}
|
|
}
|
|
}
|