stat: implement Pareto (#309)

This commit is contained in:
kczimm
2017-12-08 00:23:03 -06:00
committed by Brendan Tracey
parent 3883f53293
commit 153d48337f
2 changed files with 284 additions and 0 deletions

122
stat/distuv/pareto.go Normal file
View File

@@ -0,0 +1,122 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"math"
"golang.org/x/exp/rand"
)
// Pareto implements the Pareto (Type I) distribution, a one parameter distribution
// with support above the scale parameter.
//
// The density function is given by
// (α x_m^{α})/(x^{α+1}) for x >= x_m.
//
// For more information, see https://en.wikipedia.org/wiki/Pareto_distribution.
type Pareto struct {
// Xm is the scale parameter.
// Xm must be greater than 0.
Xm float64
// Alpha is the shape parameter.
// Alpha must be greater than 0.
Alpha float64
Src *rand.Rand
}
// CDF computes the value of the cumulative density function at x.
func (p Pareto) CDF(x float64) float64 {
if x < p.Xm {
return 0
}
return 1 - p.Survival(x)
}
// Entropy returns the differential entropy of the distribution.
func (p Pareto) Entropy() float64 {
return math.Log(p.Xm) - math.Log(p.Alpha) + (1 + 1/p.Alpha)
}
// ExKurtosis returns the excess kurtosis of the distribution.
func (p Pareto) ExKurtosis() float64 {
if p.Alpha <= 4 {
return 0
}
return 6 * (p.Alpha*p.Alpha*p.Alpha + p.Alpha*p.Alpha - 6*p.Alpha - 2) / (p.Alpha * (p.Alpha - 3) * (p.Alpha - 4))
}
// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (p Pareto) LogProb(x float64) float64 {
if x < p.Xm {
return math.Inf(-1)
}
return math.Log(p.Alpha) + p.Alpha*math.Log(p.Xm) - (p.Alpha+1)*math.Log(x)
}
// Mean returns the mean of the probability distribution.
func (p Pareto) Mean() float64 {
if p.Alpha <= 1 {
return math.Inf(1)
}
return p.Alpha * p.Xm / (p.Alpha - 1)
}
// Median returns the median of the pareto distribution.
func (p Pareto) Median() float64 {
return p.Xm * math.Pow(2, 1/p.Alpha)
}
// Mode returns the mode of the distribution.
func (p Pareto) Mode() float64 {
return p.Xm
}
// NumParameters returns the number of parameters in the distribution.
func (p Pareto) NumParameters() int {
return 2
}
// Prob computes the value of the probability density function at x.
func (p Pareto) Prob(x float64) float64 {
return math.Exp(p.LogProb(x))
}
// Rand returns a random sample drawn from the distribution.
func (p Pareto) Rand() float64 {
var rnd float64
if p.Src == nil {
rnd = rand.ExpFloat64()
} else {
rnd = p.Src.ExpFloat64()
}
return math.Exp(math.Log(p.Xm) + 1/p.Alpha*rnd)
}
// StdDev returns the standard deviation of the probability distribution.
func (p Pareto) StdDev() float64 {
return math.Sqrt(p.Variance())
}
// Survival returns the survival function (complementary CDF) at x.
func (p Pareto) Survival(x float64) float64 {
if x < p.Xm {
return 1
}
return math.Exp(p.Alpha * (math.Log(p.Xm) - math.Log(x)))
}
// Variance returns the variance of the probability distribution.
func (p Pareto) Variance() float64 {
if p.Alpha <= 2 {
return math.Inf(1)
}
am1 := p.Alpha - 1
return p.Xm * p.Xm * p.Alpha / (am1 * am1 * (p.Alpha - 2))
}

162
stat/distuv/pareto_test.go Normal file
View File

@@ -0,0 +1,162 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package distuv
import (
"sort"
"testing"
"golang.org/x/exp/rand"
"gonum.org/v1/gonum/floats"
)
func TestParetoProb(t *testing.T) {
for _, test := range []struct {
x, xm, alpha, want float64
}{
{0, 1, 1, 0},
{0.5, 1, 1, 0},
{1, 1, 1, 1.0},
{1.5, 1, 1, 0.444444444444444},
{2, 1, 1, 0.25},
{2.5, 1, 1, 0.16},
{3, 1, 1, 0.1111111111111111},
{3.5, 1, 1, 0.081632653061224},
{4, 1, 1, 0.0625},
{4.5, 1, 1, 0.049382716049383},
{5, 1, 1, 0.04},
{0, 1, 2, 0},
{0.5, 1, 2, 0},
{1, 1, 2, 2},
{1.5, 1, 2, 0.592592592592593},
{2, 1, 2, 0.25},
{2.5, 1, 2, 0.128},
{3, 1, 2, 0.074074074074074},
{3.5, 1, 2, 0.046647230320700},
{4, 1, 2, 0.03125},
{4.5, 1, 2, 0.021947873799726},
{5, 1, 2, 0.016},
{0, 1, 3, 0},
{0.5, 1, 3, 0},
{1, 1, 3, 3.0},
{1.5, 1, 3, 0.592592592592593},
{2, 1, 3, 0.1875},
{2.5, 1, 3, 0.0768},
{3, 1, 3, 0.037037037037037},
{3.5, 1, 3, 0.019991670137443},
{4, 1, 3, 0.011718750000000},
{4.5, 1, 3, 0.007315957933242},
{5, 1, 3, 0.0048},
} {
pdf := Pareto{test.xm, test.alpha, nil}.Prob(test.x)
if !floats.EqualWithinAbsOrRel(pdf, test.want, 1e-10, 1e-10) {
t.Errorf("Pdf mismatch, x = %v, xm = %v, alpha = %v. Got %v, want %v", test.x, test.xm, test.alpha, pdf, test.want)
}
}
}
func TestParetoCDF(t *testing.T) {
for _, test := range []struct {
x, xm, alpha, want float64
}{
{0, 1, 1, 0},
{0.5, 1, 1, 0},
{1, 1, 1, 0},
{1.5, 1, 1, 0.333333333333333},
{2, 1, 1, 0.5},
{2.5, 1, 1, 0.6},
{3, 1, 1, 0.666666666666667},
{3.5, 1, 1, 0.714285714285714},
{4, 1, 1, 0.75},
{4.5, 1, 1, 0.777777777777778},
{5, 1, 1, 0.80},
{5.5, 1, 1, 0.818181818181818},
{6, 1, 1, 0.833333333333333},
{6.5, 1, 1, 0.846153846153846},
{7, 1, 1, 0.857142857142857},
{7.5, 1, 1, 0.866666666666667},
{8, 1, 1, 0.875},
{8.5, 1, 1, 0.882352941176471},
{9, 1, 1, 0.888888888888889},
{9.5, 1, 1, 0.894736842105263},
{10, 1, 1, 0.90},
{0, 1, 2, 0},
{0.5, 1, 2, 0},
{1, 1, 2, 0},
{1.5, 1, 2, 0.555555555555556},
{2, 1, 2, 0.75},
{2.5, 1, 2, 0.84},
{3, 1, 2, 0.888888888888889},
{3.5, 1, 2, 0.918367346938776},
{4, 1, 2, 0.9375},
{4.5, 1, 2, 0.950617283950617},
{5, 1, 2, 0.96},
{5.5, 1, 2, 0.966942148760331},
{6, 1, 2, 0.972222222222222},
{6.5, 1, 2, 0.976331360946746},
{7, 1, 2, 0.979591836734694},
{7.5, 1, 2, 0.982222222222222},
{8, 1, 2, 0.984375000000000},
{8.5, 1, 2, 0.986159169550173},
{9, 1, 2, 0.987654320987654},
{9.5, 1, 2, 0.988919667590028},
{10, 1, 2, 0.99},
{0, 1, 3, 0},
{0.5, 1, 3, 0},
{1, 1, 3, 0},
{1.5, 1, 3, 0.703703703703704},
{2, 1, 3, 0.875},
{2.5, 1, 3, 0.936},
{3, 1, 3, 0.962962962962963},
{3.5, 1, 3, 0.976676384839650},
{4, 1, 3, 0.984375000000000},
{4.5, 1, 3, 0.989026063100137},
{5, 1, 3, 0.992},
{5.5, 1, 3, 0.993989481592787},
{6, 1, 3, 0.995370370370370},
{6.5, 1, 3, 0.996358670914884},
{7, 1, 3, 0.997084548104956},
{7.5, 1, 3, 0.997629629629630},
{8, 1, 3, 0.998046875000000},
{8.5, 1, 3, 0.998371667005903},
{9, 1, 3, 0.998628257887517},
{9.5, 1, 3, 0.998833649220003},
{10, 1, 3, 0.999},
} {
cdf := Pareto{test.xm, test.alpha, nil}.CDF(test.x)
if !floats.EqualWithinAbsOrRel(cdf, test.want, 1e-10, 1e-10) {
t.Errorf("CDF mismatch, x = %v, xm = %v, alpha = %v. Got %v, want %v", test.x, test.xm, test.alpha, cdf, test.want)
}
}
}
func TestPareto(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, p := range []Pareto{
{1, 10, src},
{1, 20, src},
} {
testPareto(t, p, i)
}
}
func testPareto(t *testing.T, p Pareto, i int) {
tol := 1e-2
const n = 1e6
const bins = 50
x := make([]float64, n)
generateSamples(x, p)
sort.Float64s(x)
testRandLogProbContinuous(t, i, 0, x, p, tol, bins)
checkMean(t, i, x, p, tol)
checkVarAndStd(t, i, x, p, tol)
checkExKurtosis(t, i, x, p, 7e-2)
checkProbContinuous(t, i, x, p, 1e-3)
}