From 435185761cc9b6f0fa86a46852ece34bc79f9e09 Mon Sep 17 00:00:00 2001 From: deas-mhumhna Date: Sat, 8 Dec 2018 13:09:48 -0800 Subject: [PATCH] stat/distuv: add binomial distribution --- AUTHORS | 1 + CONTRIBUTORS | 1 + stat/distuv/binomial.go | 188 +++++++++++++++++++++++++++++++++++ stat/distuv/binomial_test.go | 134 +++++++++++++++++++++++++ 4 files changed, 324 insertions(+) create mode 100644 stat/distuv/binomial.go create mode 100644 stat/distuv/binomial_test.go diff --git a/AUTHORS b/AUTHORS index cbfebfa5..04d4f70d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -21,6 +21,7 @@ Daniel Fireman David Samborski Davor Kapsa DeepMind Technologies +Dezmond Goff Egon Elbre Ekaterina Efimova Ethan Burns diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6bb63dd6..f2dbbdda 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -28,6 +28,7 @@ Dan Kortschak Daniel Fireman David Samborski Davor Kapsa +Dezmond Goff Egon Elbre Ekaterina Efimova Ethan Burns diff --git a/stat/distuv/binomial.go b/stat/distuv/binomial.go new file mode 100644 index 00000000..b5f80134 --- /dev/null +++ b/stat/distuv/binomial.go @@ -0,0 +1,188 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "math" + + "golang.org/x/exp/rand" + + "gonum.org/v1/gonum/mathext" + "gonum.org/v1/gonum/stat/combin" +) + +// Binomial implements the binomial distribution, a discrete probability distribution +// that expresses the probability of a given number of successful Bernoulli trials +// out of a total of n, each with successs probability p. +// The binomial distribution has the density function: +// f(k) = (n choose k) p^k (1-p)^(n-k) +// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution. +type Binomial struct { + // N is the total number of Bernoulli trials. N must be greater than 0. + N float64 + // P is the probablity of success in any given trial. P must be in [0, 1]. + P float64 + + Src rand.Source +} + +// CDF computes the value of the cumulative distribution function at x. +func (b Binomial) CDF(x float64) float64 { + if x < 0 { + return 0 + } + if x >= b.N { + return 1 + } + x = math.Floor(x) + return mathext.RegIncBeta(b.N-x, x+1, 1-b.P) +} + +// ExKurtosis returns the excess kurtosis of the distribution. +func (b Binomial) ExKurtosis() float64 { + v := b.P * (1 - b.P) + return (1 - 6*v) / (b.N * v) +} + +// LogProb computes the natural logarithm of the value of the probability +// density function at x. +func (b Binomial) LogProb(x float64) float64 { + if x < 0 || x > b.N || math.Floor(x) != x { + return math.Inf(-1) + } + lb := combin.LogGeneralizedBinomial(b.N, x) + return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P) +} + +// Mean returns the mean of the probability distribution. +func (b Binomial) Mean() float64 { + return b.N * b.P +} + +// NumParameters returns the number of parameters in the distribution. +func (Binomial) NumParameters() int { + return 2 +} + +// Prob computes the value of the probability density function at x. +func (b Binomial) Prob(x float64) float64 { + return math.Exp(b.LogProb(x)) +} + +// Rand returns a random sample drawn from the distribution. +func (b Binomial) Rand() float64 { + // NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5) + // p. 295-6 + // http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf + + runif := rand.Float64 + rexp := rand.ExpFloat64 + if b.Src != nil { + rnd := rand.New(b.Src) + runif = rnd.Float64 + rexp = rnd.ExpFloat64 + } + + p := b.P + if p > 0.5 { + p = 1 - p + } + am := b.N * p + + if b.N < 25 { + // Use direct method. + bnl := 0.0 + for i := 0; i < int(b.N); i++ { + if runif() < p { + bnl++ + } + } + if p != b.P { + return b.N - bnl + } + return bnl + } + + if am < 1 { + // Use rejection method with Poisson proposal. + const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling) + var bnl float64 + z := -p + pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x) + for { + bnl = 0.0 + t := 0.0 + for i := 0; i < int(b.N); i++ { + t += rexp() + if t >= am { + break + } + bnl++ + } + bnlc := b.N - bnl + z = -bnl / b.N + log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) + t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!) + if rexp() >= t { + break + } + } + if p != b.P { + return b.N - bnl + } + return bnl + } + // Original algorithm samples from a Poisson distribution with the + // appropriate expected value. However, the Poisson approximation is + // asymptotic such that the absolute deviation in probability is O(1/n). + // Rejection sampling produces exact variates with at worst less than 3% + // rejection with miminal additional computation. + + // Use rejection method with Cauchy proposal. + g, _ := math.Lgamma(b.N + 1) + plog := math.Log(p) + pclog := math.Log1p(-p) + sq := math.Sqrt(2 * am * (1 - p)) + for { + var em, y float64 + for { + y = math.Tan(math.Pi * runif()) + em = sq*y + am + if em >= 0 && em < b.N+1 { + break + } + } + em = math.Floor(em) + lg1, _ := math.Lgamma(em + 1) + lg2, _ := math.Lgamma(b.N - em + 1) + t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog) + if runif() <= t { + if p != b.P { + return b.N - em + } + return em + } + } +} + +// Skewness returns the skewness of the distribution. +func (b Binomial) Skewness() float64 { + return (1 - 2*b.P) / b.StdDev() +} + +// StdDev returns the standard deviation of the probability distribution. +func (b Binomial) StdDev() float64 { + return math.Sqrt(b.Variance()) +} + +// Survival returns the survival function (complementary CDF) at x. +func (b Binomial) Survival(x float64) float64 { + return 1 - b.CDF(x) +} + +// Variance returns the variance of the probability distribution. +func (b Binomial) Variance() float64 { + return b.N * b.P * (1 - b.P) +} diff --git a/stat/distuv/binomial_test.go b/stat/distuv/binomial_test.go new file mode 100644 index 00000000..977dd0e0 --- /dev/null +++ b/stat/distuv/binomial_test.go @@ -0,0 +1,134 @@ +// Copyright ©2018 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package distuv + +import ( + "sort" + "testing" + + "golang.org/x/exp/rand" + + "gonum.org/v1/gonum/floats" +) + +func TestBinomialProb(t *testing.T) { + const tol = 1e-10 + for i, tt := range []struct { + k float64 + n float64 + p float64 + want float64 + }{ + // Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com) + {0, 10, 0.5, 0.0009765625}, + {1, 10, 0.5, 0.009765625}, + {2, 10, 0.5, 0.0439453125}, + {3, 10, 0.5, 0.1171875}, + {4, 10, 0.5, 0.205078125}, + {5, 10, 0.75, 5.839920043945313e-02}, + {6, 10, 0.75, 0.1459980010986328}, + {7, 10, 0.75, 0.2502822875976563}, + {8, 10, 0.75, 0.2815675735473633}, + {9, 10, 0.75, 0.1877117156982422}, + {10, 10, 0.75, 5.6313514709472656e-02}, + + {0, 25, 0.25, 7.525434581650003e-04}, + {2, 25, 0.25, 2.508478193883334e-02}, + {5, 25, 0.25, 0.1645375881987921}, + {7, 25, 0.25, 0.1654081574485211}, + {10, 25, 0.25, 4.165835076481272e-02}, + {12, 25, 0.01, 4.563372575901533e-18}, + {15, 25, 0.01, 2.956207951505780e-24}, + {17, 25, 0.01, 9.980175928758777e-29}, + {20, 25, 0.99, 4.345539559454088e-06}, + {22, 25, 0.99, 1.843750355939806e-03}, + {25, 25, 0.99, 0.7778213593991468}, + + {0.5, 25, 0.5, 0}, + {1.5, 25, 0.5, 0}, + {2.5, 25, 0.5, 0}, + {3.5, 25, 0.5, 0}, + {4.5, 25, 0.5, 0}, + {5.5, 25, 0.5, 0}, + {6.5, 25, 0.5, 0}, + {7.5, 25, 0.5, 0}, + {8.5, 25, 0.5, 0}, + {9.5, 25, 0.5, 0}, + } { + b := Binomial{N: tt.n, P: tt.p} + got := b.Prob(tt.k) + if !floats.EqualWithinRel(got, tt.want, tol) { + t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want) + } + } +} + +func TestBinomialCDF(t *testing.T) { + const tol = 1e-10 + for i, tt := range []struct { + k float64 + n float64 + p float64 + want float64 + }{ + // Cumulative probabilities computed with SciPy + {0, 10, 0.5, 9.765625e-04}, + {1, 10, 0.5, 1.0742187499999998e-02}, + {2, 10, 0.5, 5.468749999999999e-02}, + {3, 10, 0.5, 1.7187499999999994e-01}, + {4, 10, 0.5, 3.769531249999999e-01}, + {5, 10, 0.25, 9.802722930908203e-01}, + {6, 10, 0.25, 9.964942932128906e-01}, + {7, 10, 0.25, 9.995841979980469e-01}, + {8, 10, 0.25, 9.999704360961914e-01}, + {9, 10, 0.25, 9.999990463256836e-01}, + {10, 10, 0.25, 1.0}, + + {0, 25, 0.75, 8.881784197001252e-16}, + {2.5, 25, 0.75, 2.4655832930875472e-12}, + {5, 25, 0.75, 1.243460090449844e-08}, + {7.5, 25, 0.75, 1.060837565347583e-06}, + {10, 25, 0.75, 2.1451240486669576e-04}, + {12.5, 25, 0.01, 9.999999999999999e-01}, + {15, 25, 0.01, 9.999999999999999e-01}, + {17.5, 25, 0.01, 9.999999999999999e-01}, + {20, 25, 0.99, 4.495958469027147e-06}, + {22.5, 25, 0.99, 1.9506768897388268e-03}, + {25, 25, 0.99, 1.0}, + } { + b := Binomial{N: tt.n, P: tt.p} + got := b.CDF(tt.k) + if !floats.EqualWithinRel(got, tt.want, tol) { + t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want) + } + } +} + +func TestBinomial(t *testing.T) { + src := rand.New(rand.NewSource(1)) + for i, b := range []Binomial{ + {100, 0.5, src}, + {15, 0.25, src}, + {10, 0.75, src}, + {9000, 0.102, src}, + {1e6, 0.001, src}, + {25, 0.02, src}, + {3, 0.8, src}, + } { + testBinomial(t, b, i) + } +} + +func testBinomial(t *testing.T, b Binomial, i int) { + const tol = 1e-2 + const n = 1e6 + x := make([]float64, n) + generateSamples(x, b) + sort.Float64s(x) + + checkMean(t, i, x, b, tol) + checkVarAndStd(t, i, x, b, tol) + checkExKurtosis(t, i, x, b, 7e-2) +}