stat/distuv: add binomial distribution

2025-10-25 00:00:24 +08:00 · 2018-12-08 13:09:48 -08:00
parent b71a28080e
commit 435185761c
4 changed files with 324 additions and 0 deletions
--- a/1
+++ b/1
@@ -21,6 +21,7 @@ Daniel Fireman <danielfireman@gmail.com>
 David Samborski <bloggingarrow@gmail.com>
 Davor Kapsa <davor.kapsa@gmail.com>
 DeepMind Technologies
 Dezmond Goff <goff.dezmond@gmail.com>
 Egon Elbre <egonelbre@gmail.com>
 Ekaterina Efimova <katerina.efimova@gmail.com>
 Ethan Burns <burns.ethan@gmail.com>
--- a/1
+++ b/1
@@ -28,6 +28,7 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
 Daniel Fireman <danielfireman@gmail.com>
 David Samborski <bloggingarrow@gmail.com>
 Davor Kapsa <davor.kapsa@gmail.com>
 Dezmond Goff <goff.dezmond@gmail.com>
 Egon Elbre <egonelbre@gmail.com>
 Ekaterina Efimova <katerina.efimova@gmail.com>
 Ethan Burns <burns.ethan@gmail.com>
--- a/stat/distuv/binomial.go
+++ b/stat/distuv/binomial.go
@@ -0,0 +1,188 @@
 // Copyright ©2018 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package distuv
 import (
 	"math"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/mathext"
 	"gonum.org/v1/gonum/stat/combin"
 )
 // Binomial implements the binomial distribution, a discrete probability distribution
 // that expresses the probability of a given number of successful Bernoulli trials
 // out of a total of n, each with successs probability p.
 // The binomial distribution has the density function:
 //  f(k) = (n choose k) p^k (1-p)^(n-k)
 // For more information, see https://en.wikipedia.org/wiki/Binomial_distribution.
 type Binomial struct {
 	// N is the total number of Bernoulli trials. N must be greater than 0.
 	N float64
 	// P is the probablity of success in any given trial. P must be in [0, 1].
 	P float64
 	Src rand.Source
 }
 // CDF computes the value of the cumulative distribution function at x.
 func (b Binomial) CDF(x float64) float64 {
 	if x < 0 {
 		return 0
 	}
 	if x >= b.N {
 		return 1
 	}
 	x = math.Floor(x)
 	return mathext.RegIncBeta(b.N-x, x+1, 1-b.P)
 }
 // ExKurtosis returns the excess kurtosis of the distribution.
 func (b Binomial) ExKurtosis() float64 {
 	v := b.P * (1 - b.P)
 	return (1 - 6*v) / (b.N * v)
 }
 // LogProb computes the natural logarithm of the value of the probability
 // density function at x.
 func (b Binomial) LogProb(x float64) float64 {
 	if x < 0 || x > b.N || math.Floor(x) != x {
 		return math.Inf(-1)
 	}
 	lb := combin.LogGeneralizedBinomial(b.N, x)
 	return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P)
 }
 // Mean returns the mean of the probability distribution.
 func (b Binomial) Mean() float64 {
 	return b.N * b.P
 }
 // NumParameters returns the number of parameters in the distribution.
 func (Binomial) NumParameters() int {
 	return 2
 }
 // Prob computes the value of the probability density function at x.
 func (b Binomial) Prob(x float64) float64 {
 	return math.Exp(b.LogProb(x))
 }
 // Rand returns a random sample drawn from the distribution.
 func (b Binomial) Rand() float64 {
 	// NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5)
 	// p. 295-6
 	// http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf
 	runif := rand.Float64
 	rexp := rand.ExpFloat64
 	if b.Src != nil {
 		rnd := rand.New(b.Src)
 		runif = rnd.Float64
 		rexp = rnd.ExpFloat64
 	}
 	p := b.P
 	if p > 0.5 {
 		p = 1 - p
 	}
 	am := b.N * p
 	if b.N < 25 {
 		// Use direct method.
 		bnl := 0.0
 		for i := 0; i < int(b.N); i++ {
 			if runif() < p {
 				bnl++
 			}
 		}
 		if p != b.P {
 			return b.N - bnl
 		}
 		return bnl
 	}
 	if am < 1 {
 		// Use rejection method with Poisson proposal.
 		const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling)
 		var bnl float64
 		z := -p
 		pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x)
 		for {
 			bnl = 0.0
 			t := 0.0
 			for i := 0; i < int(b.N); i++ {
 				t += rexp()
 				if t >= am {
 					break
 				}
 				bnl++
 			}
 			bnlc := b.N - bnl
 			z = -bnl / b.N
 			log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z)
 			t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!)
 			if rexp() >= t {
 				break
 			}
 		}
 		if p != b.P {
 			return b.N - bnl
 		}
 		return bnl
 	}
 	// Original algorithm samples from a Poisson distribution with the
 	// appropriate expected value. However, the Poisson approximation is
 	// asymptotic such that the absolute deviation in probability is O(1/n).
 	// Rejection sampling produces exact variates with at worst less than 3%
 	// rejection with miminal additional computation.
 	// Use rejection method with Cauchy proposal.
 	g, _ := math.Lgamma(b.N + 1)
 	plog := math.Log(p)
 	pclog := math.Log1p(-p)
 	sq := math.Sqrt(2 * am * (1 - p))
 	for {
 		var em, y float64
 		for {
 			y = math.Tan(math.Pi * runif())
 			em = sq*y + am
 			if em >= 0 && em < b.N+1 {
 				break
 			}
 		}
 		em = math.Floor(em)
 		lg1, _ := math.Lgamma(em + 1)
 		lg2, _ := math.Lgamma(b.N - em + 1)
 		t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog)
 		if runif() <= t {
 			if p != b.P {
 				return b.N - em
 			}
 			return em
 		}
 	}
 }
 // Skewness returns the skewness of the distribution.
 func (b Binomial) Skewness() float64 {
 	return (1 - 2*b.P) / b.StdDev()
 }
 // StdDev returns the standard deviation of the probability distribution.
 func (b Binomial) StdDev() float64 {
 	return math.Sqrt(b.Variance())
 }
 // Survival returns the survival function (complementary CDF) at x.
 func (b Binomial) Survival(x float64) float64 {
 	return 1 - b.CDF(x)
 }
 // Variance returns the variance of the probability distribution.
 func (b Binomial) Variance() float64 {
 	return b.N * b.P * (1 - b.P)
 }
--- a/stat/distuv/binomial_test.go
+++ b/stat/distuv/binomial_test.go
@@ -0,0 +1,134 @@
 // Copyright ©2018 The Gonum Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package distuv
 import (
 	"sort"
 	"testing"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/floats"
 )
 func TestBinomialProb(t *testing.T) {
 	const tol = 1e-10
 	for i, tt := range []struct {
 		k    float64
 		n    float64
 		p    float64
 		want float64
 	}{
 		// Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com)
 		{0, 10, 0.5, 0.0009765625},
 		{1, 10, 0.5, 0.009765625},
 		{2, 10, 0.5, 0.0439453125},
 		{3, 10, 0.5, 0.1171875},
 		{4, 10, 0.5, 0.205078125},
 		{5, 10, 0.75, 5.839920043945313e-02},
 		{6, 10, 0.75, 0.1459980010986328},
 		{7, 10, 0.75, 0.2502822875976563},
 		{8, 10, 0.75, 0.2815675735473633},
 		{9, 10, 0.75, 0.1877117156982422},
 		{10, 10, 0.75, 5.6313514709472656e-02},
 		{0, 25, 0.25, 7.525434581650003e-04},
 		{2, 25, 0.25, 2.508478193883334e-02},
 		{5, 25, 0.25, 0.1645375881987921},
 		{7, 25, 0.25, 0.1654081574485211},
 		{10, 25, 0.25, 4.165835076481272e-02},
 		{12, 25, 0.01, 4.563372575901533e-18},
 		{15, 25, 0.01, 2.956207951505780e-24},
 		{17, 25, 0.01, 9.980175928758777e-29},
 		{20, 25, 0.99, 4.345539559454088e-06},
 		{22, 25, 0.99, 1.843750355939806e-03},
 		{25, 25, 0.99, 0.7778213593991468},
 		{0.5, 25, 0.5, 0},
 		{1.5, 25, 0.5, 0},
 		{2.5, 25, 0.5, 0},
 		{3.5, 25, 0.5, 0},
 		{4.5, 25, 0.5, 0},
 		{5.5, 25, 0.5, 0},
 		{6.5, 25, 0.5, 0},
 		{7.5, 25, 0.5, 0},
 		{8.5, 25, 0.5, 0},
 		{9.5, 25, 0.5, 0},
 	} {
 		b := Binomial{N: tt.n, P: tt.p}
 		got := b.Prob(tt.k)
 		if !floats.EqualWithinRel(got, tt.want, tol) {
 			t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
 		}
 	}
 }
 func TestBinomialCDF(t *testing.T) {
 	const tol = 1e-10
 	for i, tt := range []struct {
 		k    float64
 		n    float64
 		p    float64
 		want float64
 	}{
 		// Cumulative probabilities computed with SciPy
 		{0, 10, 0.5, 9.765625e-04},
 		{1, 10, 0.5, 1.0742187499999998e-02},
 		{2, 10, 0.5, 5.468749999999999e-02},
 		{3, 10, 0.5, 1.7187499999999994e-01},
 		{4, 10, 0.5, 3.769531249999999e-01},
 		{5, 10, 0.25, 9.802722930908203e-01},
 		{6, 10, 0.25, 9.964942932128906e-01},
 		{7, 10, 0.25, 9.995841979980469e-01},
 		{8, 10, 0.25, 9.999704360961914e-01},
 		{9, 10, 0.25, 9.999990463256836e-01},
 		{10, 10, 0.25, 1.0},
 		{0, 25, 0.75, 8.881784197001252e-16},
 		{2.5, 25, 0.75, 2.4655832930875472e-12},
 		{5, 25, 0.75, 1.243460090449844e-08},
 		{7.5, 25, 0.75, 1.060837565347583e-06},
 		{10, 25, 0.75, 2.1451240486669576e-04},
 		{12.5, 25, 0.01, 9.999999999999999e-01},
 		{15, 25, 0.01, 9.999999999999999e-01},
 		{17.5, 25, 0.01, 9.999999999999999e-01},
 		{20, 25, 0.99, 4.495958469027147e-06},
 		{22.5, 25, 0.99, 1.9506768897388268e-03},
 		{25, 25, 0.99, 1.0},
 	} {
 		b := Binomial{N: tt.n, P: tt.p}
 		got := b.CDF(tt.k)
 		if !floats.EqualWithinRel(got, tt.want, tol) {
 			t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
 		}
 	}
 }
 func TestBinomial(t *testing.T) {
 	src := rand.New(rand.NewSource(1))
 	for i, b := range []Binomial{
 		{100, 0.5, src},
 		{15, 0.25, src},
 		{10, 0.75, src},
 		{9000, 0.102, src},
 		{1e6, 0.001, src},
 		{25, 0.02, src},
 		{3, 0.8, src},
 	} {
 		testBinomial(t, b, i)
 	}
 }
 func testBinomial(t *testing.T, b Binomial, i int) {
 	const tol = 1e-2
 	const n = 1e6
 	x := make([]float64, n)
 	generateSamples(x, b)
 	sort.Float64s(x)
 	checkMean(t, i, x, b, tol)
 	checkVarAndStd(t, i, x, b, tol)
 	checkExKurtosis(t, i, x, b, 7e-2)
 }