From 153d48337f8b51b73e55d47b61e87210ca2974cb Mon Sep 17 00:00:00 2001 From: kczimm Date: Fri, 8 Dec 2017 00:23:03 -0600 Subject: [PATCH] stat: implement Pareto (#309) --- stat/distuv/pareto.go | 122 ++++++++++++++++++++++++++++ stat/distuv/pareto_test.go | 162 +++++++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100644 stat/distuv/pareto.go create mode 100644 stat/distuv/pareto_test.go diff --git a/stat/distuv/pareto.go b/stat/distuv/pareto.go new file mode 100644 index 00000000..c5aa8fc5 --- /dev/null +++ b/stat/distuv/pareto.go @@ -0,0 +1,122 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + + "golang.org/x/exp/rand" +) + +// Pareto implements the Pareto (Type I) distribution, a one parameter distribution +// with support above the scale parameter. +// +// The density function is given by +// (α x_m^{α})/(x^{α+1}) for x >= x_m. +// +// For more information, see https://en.wikipedia.org/wiki/Pareto_distribution. +type Pareto struct { + // Xm is the scale parameter. + // Xm must be greater than 0. + Xm float64 + + // Alpha is the shape parameter. + // Alpha must be greater than 0. + Alpha float64 + + Src *rand.Rand +} + +// CDF computes the value of the cumulative density function at x. +func (p Pareto) CDF(x float64) float64 { + if x < p.Xm { + return 0 + } + return 1 - p.Survival(x) +} + +// Entropy returns the differential entropy of the distribution. +func (p Pareto) Entropy() float64 { + return math.Log(p.Xm) - math.Log(p.Alpha) + (1 + 1/p.Alpha) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (p Pareto) ExKurtosis() float64 { + if p.Alpha <= 4 { + return 0 + } + return 6 * (p.Alpha*p.Alpha*p.Alpha + p.Alpha*p.Alpha - 6*p.Alpha - 2) / (p.Alpha * (p.Alpha - 3) * (p.Alpha - 4)) + +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (p Pareto) LogProb(x float64) float64 { + if x < p.Xm { + return math.Inf(-1) + } + return math.Log(p.Alpha) + p.Alpha*math.Log(p.Xm) - (p.Alpha+1)*math.Log(x) +} + +// Mean returns the mean of the probability distribution. +func (p Pareto) Mean() float64 { + if p.Alpha <= 1 { + return math.Inf(1) + } + return p.Alpha * p.Xm / (p.Alpha - 1) +} + +// Median returns the median of the pareto distribution. +func (p Pareto) Median() float64 { + return p.Xm * math.Pow(2, 1/p.Alpha) +} + +// Mode returns the mode of the distribution. +func (p Pareto) Mode() float64 { + return p.Xm +} + +// NumParameters returns the number of parameters in the distribution. +func (p Pareto) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (p Pareto) Prob(x float64) float64 { + return math.Exp(p.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (p Pareto) Rand() float64 { + var rnd float64 + if p.Src == nil { + rnd = rand.ExpFloat64() + } else { + rnd = p.Src.ExpFloat64() + } + return math.Exp(math.Log(p.Xm) + 1/p.Alpha*rnd) +} + +// StdDev returns the standard deviation of the probability distribution. +func (p Pareto) StdDev() float64 { + return math.Sqrt(p.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (p Pareto) Survival(x float64) float64 { + if x < p.Xm { + return 1 + } + return math.Exp(p.Alpha * (math.Log(p.Xm) - math.Log(x))) +} + +// Variance returns the variance of the probability distribution. +func (p Pareto) Variance() float64 { + if p.Alpha <= 2 { + return math.Inf(1) + } + am1 := p.Alpha - 1 + return p.Xm * p.Xm * p.Alpha / (am1 * am1 * (p.Alpha - 2)) +} diff --git a/stat/distuv/pareto_test.go b/stat/distuv/pareto_test.go new file mode 100644 index 00000000..37e1f2d7 --- /dev/null +++ b/stat/distuv/pareto_test.go @@ -0,0 +1,162 @@ +// Copyright ©2017 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "sort" + "testing" + + "golang.org/x/exp/rand" + "gonum.org/v1/gonum/floats" +) + +func TestParetoProb(t *testing.T) { + for _, test := range []struct { + x, xm, alpha, want float64 + }{ + {0, 1, 1, 0}, + {0.5, 1, 1, 0}, + {1, 1, 1, 1.0}, + {1.5, 1, 1, 0.444444444444444}, + {2, 1, 1, 0.25}, + {2.5, 1, 1, 0.16}, + {3, 1, 1, 0.1111111111111111}, + {3.5, 1, 1, 0.081632653061224}, + {4, 1, 1, 0.0625}, + {4.5, 1, 1, 0.049382716049383}, + {5, 1, 1, 0.04}, + + {0, 1, 2, 0}, + {0.5, 1, 2, 0}, + {1, 1, 2, 2}, + {1.5, 1, 2, 0.592592592592593}, + {2, 1, 2, 0.25}, + {2.5, 1, 2, 0.128}, + {3, 1, 2, 0.074074074074074}, + {3.5, 1, 2, 0.046647230320700}, + {4, 1, 2, 0.03125}, + {4.5, 1, 2, 0.021947873799726}, + {5, 1, 2, 0.016}, + + {0, 1, 3, 0}, + {0.5, 1, 3, 0}, + {1, 1, 3, 3.0}, + {1.5, 1, 3, 0.592592592592593}, + {2, 1, 3, 0.1875}, + {2.5, 1, 3, 0.0768}, + {3, 1, 3, 0.037037037037037}, + {3.5, 1, 3, 0.019991670137443}, + {4, 1, 3, 0.011718750000000}, + {4.5, 1, 3, 0.007315957933242}, + {5, 1, 3, 0.0048}, + } { + pdf := Pareto{test.xm, test.alpha, nil}.Prob(test.x) + if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) { + t.Errorf("Pdf mismatch, x = %v, xm = %v, alpha = %v. Got %v, want %v", test.x, test.xm, test.alpha, pdf, test.want) + } + } +} + +func TestParetoCDF(t *testing.T) { + for _, test := range []struct { + x, xm, alpha, want float64 + }{ + {0, 1, 1, 0}, + {0.5, 1, 1, 0}, + {1, 1, 1, 0}, + {1.5, 1, 1, 0.333333333333333}, + {2, 1, 1, 0.5}, + {2.5, 1, 1, 0.6}, + {3, 1, 1, 0.666666666666667}, + {3.5, 1, 1, 0.714285714285714}, + {4, 1, 1, 0.75}, + {4.5, 1, 1, 0.777777777777778}, + {5, 1, 1, 0.80}, + {5.5, 1, 1, 0.818181818181818}, + {6, 1, 1, 0.833333333333333}, + {6.5, 1, 1, 0.846153846153846}, + {7, 1, 1, 0.857142857142857}, + {7.5, 1, 1, 0.866666666666667}, + {8, 1, 1, 0.875}, + {8.5, 1, 1, 0.882352941176471}, + {9, 1, 1, 0.888888888888889}, + {9.5, 1, 1, 0.894736842105263}, + {10, 1, 1, 0.90}, + + {0, 1, 2, 0}, + {0.5, 1, 2, 0}, + {1, 1, 2, 0}, + {1.5, 1, 2, 0.555555555555556}, + {2, 1, 2, 0.75}, + {2.5, 1, 2, 0.84}, + {3, 1, 2, 0.888888888888889}, + {3.5, 1, 2, 0.918367346938776}, + {4, 1, 2, 0.9375}, + {4.5, 1, 2, 0.950617283950617}, + {5, 1, 2, 0.96}, + {5.5, 1, 2, 0.966942148760331}, + {6, 1, 2, 0.972222222222222}, + {6.5, 1, 2, 0.976331360946746}, + {7, 1, 2, 0.979591836734694}, + {7.5, 1, 2, 0.982222222222222}, + {8, 1, 2, 0.984375000000000}, + {8.5, 1, 2, 0.986159169550173}, + {9, 1, 2, 0.987654320987654}, + {9.5, 1, 2, 0.988919667590028}, + {10, 1, 2, 0.99}, + + {0, 1, 3, 0}, + {0.5, 1, 3, 0}, + {1, 1, 3, 0}, + {1.5, 1, 3, 0.703703703703704}, + {2, 1, 3, 0.875}, + {2.5, 1, 3, 0.936}, + {3, 1, 3, 0.962962962962963}, + {3.5, 1, 3, 0.976676384839650}, + {4, 1, 3, 0.984375000000000}, + {4.5, 1, 3, 0.989026063100137}, + {5, 1, 3, 0.992}, + {5.5, 1, 3, 0.993989481592787}, + {6, 1, 3, 0.995370370370370}, + {6.5, 1, 3, 0.996358670914884}, + {7, 1, 3, 0.997084548104956}, + {7.5, 1, 3, 0.997629629629630}, + {8, 1, 3, 0.998046875000000}, + {8.5, 1, 3, 0.998371667005903}, + {9, 1, 3, 0.998628257887517}, + {9.5, 1, 3, 0.998833649220003}, + {10, 1, 3, 0.999}, + } { + cdf := Pareto{test.xm, test.alpha, nil}.CDF(test.x) + if !floats.EqualWithinAbsOrRel(cdf, test.want, 1e-10, 1e-10) { + t.Errorf("CDF mismatch, x = %v, xm = %v, alpha = %v. Got %v, want %v", test.x, test.xm, test.alpha, cdf, test.want) + } + } +} + +func TestPareto(t *testing.T) { + src := rand.New(rand.NewSource(1)) + for i, p := range []Pareto{ + {1, 10, src}, + {1, 20, src}, + } { + testPareto(t, p, i) + } +} + +func testPareto(t *testing.T, p Pareto, i int) { + tol := 1e-2 + const n = 1e6 + const bins = 50 + x := make([]float64, n) + generateSamples(x, p) + sort.Float64s(x) + + testRandLogProbContinuous(t, i, 0, x, p, tol, bins) + checkMean(t, i, x, p, tol) + checkVarAndStd(t, i, x, p, tol) + checkExKurtosis(t, i, x, p, 7e-2) + checkProbContinuous(t, i, x, p, 1e-3) +}