mirror of
https://github.com/gonum/gonum.git
synced 2025-10-25 00:00:24 +08:00
stat/distuv: add binomial distribution
This commit is contained in:
committed by
Dan Kortschak
parent
b71a28080e
commit
435185761c
1
AUTHORS
1
AUTHORS
@@ -21,6 +21,7 @@ Daniel Fireman <danielfireman@gmail.com>
|
||||
David Samborski <bloggingarrow@gmail.com>
|
||||
Davor Kapsa <davor.kapsa@gmail.com>
|
||||
DeepMind Technologies
|
||||
Dezmond Goff <goff.dezmond@gmail.com>
|
||||
Egon Elbre <egonelbre@gmail.com>
|
||||
Ekaterina Efimova <katerina.efimova@gmail.com>
|
||||
Ethan Burns <burns.ethan@gmail.com>
|
||||
|
||||
@@ -28,6 +28,7 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
|
||||
Daniel Fireman <danielfireman@gmail.com>
|
||||
David Samborski <bloggingarrow@gmail.com>
|
||||
Davor Kapsa <davor.kapsa@gmail.com>
|
||||
Dezmond Goff <goff.dezmond@gmail.com>
|
||||
Egon Elbre <egonelbre@gmail.com>
|
||||
Ekaterina Efimova <katerina.efimova@gmail.com>
|
||||
Ethan Burns <burns.ethan@gmail.com>
|
||||
|
||||
188
stat/distuv/binomial.go
Normal file
188
stat/distuv/binomial.go
Normal file
@@ -0,0 +1,188 @@
|
||||
// Copyright ©2018 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package distuv
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"golang.org/x/exp/rand"
|
||||
|
||||
"gonum.org/v1/gonum/mathext"
|
||||
"gonum.org/v1/gonum/stat/combin"
|
||||
)
|
||||
|
||||
// Binomial implements the binomial distribution, a discrete probability distribution
|
||||
// that expresses the probability of a given number of successful Bernoulli trials
|
||||
// out of a total of n, each with successs probability p.
|
||||
// The binomial distribution has the density function:
|
||||
// f(k) = (n choose k) p^k (1-p)^(n-k)
|
||||
// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution.
|
||||
type Binomial struct {
|
||||
// N is the total number of Bernoulli trials. N must be greater than 0.
|
||||
N float64
|
||||
// P is the probablity of success in any given trial. P must be in [0, 1].
|
||||
P float64
|
||||
|
||||
Src rand.Source
|
||||
}
|
||||
|
||||
// CDF computes the value of the cumulative distribution function at x.
|
||||
func (b Binomial) CDF(x float64) float64 {
|
||||
if x < 0 {
|
||||
return 0
|
||||
}
|
||||
if x >= b.N {
|
||||
return 1
|
||||
}
|
||||
x = math.Floor(x)
|
||||
return mathext.RegIncBeta(b.N-x, x+1, 1-b.P)
|
||||
}
|
||||
|
||||
// ExKurtosis returns the excess kurtosis of the distribution.
|
||||
func (b Binomial) ExKurtosis() float64 {
|
||||
v := b.P * (1 - b.P)
|
||||
return (1 - 6*v) / (b.N * v)
|
||||
}
|
||||
|
||||
// LogProb computes the natural logarithm of the value of the probability
|
||||
// density function at x.
|
||||
func (b Binomial) LogProb(x float64) float64 {
|
||||
if x < 0 || x > b.N || math.Floor(x) != x {
|
||||
return math.Inf(-1)
|
||||
}
|
||||
lb := combin.LogGeneralizedBinomial(b.N, x)
|
||||
return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P)
|
||||
}
|
||||
|
||||
// Mean returns the mean of the probability distribution.
|
||||
func (b Binomial) Mean() float64 {
|
||||
return b.N * b.P
|
||||
}
|
||||
|
||||
// NumParameters returns the number of parameters in the distribution.
|
||||
func (Binomial) NumParameters() int {
|
||||
return 2
|
||||
}
|
||||
|
||||
// Prob computes the value of the probability density function at x.
|
||||
func (b Binomial) Prob(x float64) float64 {
|
||||
return math.Exp(b.LogProb(x))
|
||||
}
|
||||
|
||||
// Rand returns a random sample drawn from the distribution.
|
||||
func (b Binomial) Rand() float64 {
|
||||
// NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5)
|
||||
// p. 295-6
|
||||
// http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf
|
||||
|
||||
runif := rand.Float64
|
||||
rexp := rand.ExpFloat64
|
||||
if b.Src != nil {
|
||||
rnd := rand.New(b.Src)
|
||||
runif = rnd.Float64
|
||||
rexp = rnd.ExpFloat64
|
||||
}
|
||||
|
||||
p := b.P
|
||||
if p > 0.5 {
|
||||
p = 1 - p
|
||||
}
|
||||
am := b.N * p
|
||||
|
||||
if b.N < 25 {
|
||||
// Use direct method.
|
||||
bnl := 0.0
|
||||
for i := 0; i < int(b.N); i++ {
|
||||
if runif() < p {
|
||||
bnl++
|
||||
}
|
||||
}
|
||||
if p != b.P {
|
||||
return b.N - bnl
|
||||
}
|
||||
return bnl
|
||||
}
|
||||
|
||||
if am < 1 {
|
||||
// Use rejection method with Poisson proposal.
|
||||
const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling)
|
||||
var bnl float64
|
||||
z := -p
|
||||
pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x)
|
||||
for {
|
||||
bnl = 0.0
|
||||
t := 0.0
|
||||
for i := 0; i < int(b.N); i++ {
|
||||
t += rexp()
|
||||
if t >= am {
|
||||
break
|
||||
}
|
||||
bnl++
|
||||
}
|
||||
bnlc := b.N - bnl
|
||||
z = -bnl / b.N
|
||||
log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z)
|
||||
t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!)
|
||||
if rexp() >= t {
|
||||
break
|
||||
}
|
||||
}
|
||||
if p != b.P {
|
||||
return b.N - bnl
|
||||
}
|
||||
return bnl
|
||||
}
|
||||
// Original algorithm samples from a Poisson distribution with the
|
||||
// appropriate expected value. However, the Poisson approximation is
|
||||
// asymptotic such that the absolute deviation in probability is O(1/n).
|
||||
// Rejection sampling produces exact variates with at worst less than 3%
|
||||
// rejection with miminal additional computation.
|
||||
|
||||
// Use rejection method with Cauchy proposal.
|
||||
g, _ := math.Lgamma(b.N + 1)
|
||||
plog := math.Log(p)
|
||||
pclog := math.Log1p(-p)
|
||||
sq := math.Sqrt(2 * am * (1 - p))
|
||||
for {
|
||||
var em, y float64
|
||||
for {
|
||||
y = math.Tan(math.Pi * runif())
|
||||
em = sq*y + am
|
||||
if em >= 0 && em < b.N+1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
em = math.Floor(em)
|
||||
lg1, _ := math.Lgamma(em + 1)
|
||||
lg2, _ := math.Lgamma(b.N - em + 1)
|
||||
t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog)
|
||||
if runif() <= t {
|
||||
if p != b.P {
|
||||
return b.N - em
|
||||
}
|
||||
return em
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skewness returns the skewness of the distribution.
|
||||
func (b Binomial) Skewness() float64 {
|
||||
return (1 - 2*b.P) / b.StdDev()
|
||||
}
|
||||
|
||||
// StdDev returns the standard deviation of the probability distribution.
|
||||
func (b Binomial) StdDev() float64 {
|
||||
return math.Sqrt(b.Variance())
|
||||
}
|
||||
|
||||
// Survival returns the survival function (complementary CDF) at x.
|
||||
func (b Binomial) Survival(x float64) float64 {
|
||||
return 1 - b.CDF(x)
|
||||
}
|
||||
|
||||
// Variance returns the variance of the probability distribution.
|
||||
func (b Binomial) Variance() float64 {
|
||||
return b.N * b.P * (1 - b.P)
|
||||
}
|
||||
134
stat/distuv/binomial_test.go
Normal file
134
stat/distuv/binomial_test.go
Normal file
@@ -0,0 +1,134 @@
|
||||
// Copyright ©2018 The Gonum Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package distuv
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/exp/rand"
|
||||
|
||||
"gonum.org/v1/gonum/floats"
|
||||
)
|
||||
|
||||
func TestBinomialProb(t *testing.T) {
|
||||
const tol = 1e-10
|
||||
for i, tt := range []struct {
|
||||
k float64
|
||||
n float64
|
||||
p float64
|
||||
want float64
|
||||
}{
|
||||
// Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com)
|
||||
{0, 10, 0.5, 0.0009765625},
|
||||
{1, 10, 0.5, 0.009765625},
|
||||
{2, 10, 0.5, 0.0439453125},
|
||||
{3, 10, 0.5, 0.1171875},
|
||||
{4, 10, 0.5, 0.205078125},
|
||||
{5, 10, 0.75, 5.839920043945313e-02},
|
||||
{6, 10, 0.75, 0.1459980010986328},
|
||||
{7, 10, 0.75, 0.2502822875976563},
|
||||
{8, 10, 0.75, 0.2815675735473633},
|
||||
{9, 10, 0.75, 0.1877117156982422},
|
||||
{10, 10, 0.75, 5.6313514709472656e-02},
|
||||
|
||||
{0, 25, 0.25, 7.525434581650003e-04},
|
||||
{2, 25, 0.25, 2.508478193883334e-02},
|
||||
{5, 25, 0.25, 0.1645375881987921},
|
||||
{7, 25, 0.25, 0.1654081574485211},
|
||||
{10, 25, 0.25, 4.165835076481272e-02},
|
||||
{12, 25, 0.01, 4.563372575901533e-18},
|
||||
{15, 25, 0.01, 2.956207951505780e-24},
|
||||
{17, 25, 0.01, 9.980175928758777e-29},
|
||||
{20, 25, 0.99, 4.345539559454088e-06},
|
||||
{22, 25, 0.99, 1.843750355939806e-03},
|
||||
{25, 25, 0.99, 0.7778213593991468},
|
||||
|
||||
{0.5, 25, 0.5, 0},
|
||||
{1.5, 25, 0.5, 0},
|
||||
{2.5, 25, 0.5, 0},
|
||||
{3.5, 25, 0.5, 0},
|
||||
{4.5, 25, 0.5, 0},
|
||||
{5.5, 25, 0.5, 0},
|
||||
{6.5, 25, 0.5, 0},
|
||||
{7.5, 25, 0.5, 0},
|
||||
{8.5, 25, 0.5, 0},
|
||||
{9.5, 25, 0.5, 0},
|
||||
} {
|
||||
b := Binomial{N: tt.n, P: tt.p}
|
||||
got := b.Prob(tt.k)
|
||||
if !floats.EqualWithinRel(got, tt.want, tol) {
|
||||
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinomialCDF(t *testing.T) {
|
||||
const tol = 1e-10
|
||||
for i, tt := range []struct {
|
||||
k float64
|
||||
n float64
|
||||
p float64
|
||||
want float64
|
||||
}{
|
||||
// Cumulative probabilities computed with SciPy
|
||||
{0, 10, 0.5, 9.765625e-04},
|
||||
{1, 10, 0.5, 1.0742187499999998e-02},
|
||||
{2, 10, 0.5, 5.468749999999999e-02},
|
||||
{3, 10, 0.5, 1.7187499999999994e-01},
|
||||
{4, 10, 0.5, 3.769531249999999e-01},
|
||||
{5, 10, 0.25, 9.802722930908203e-01},
|
||||
{6, 10, 0.25, 9.964942932128906e-01},
|
||||
{7, 10, 0.25, 9.995841979980469e-01},
|
||||
{8, 10, 0.25, 9.999704360961914e-01},
|
||||
{9, 10, 0.25, 9.999990463256836e-01},
|
||||
{10, 10, 0.25, 1.0},
|
||||
|
||||
{0, 25, 0.75, 8.881784197001252e-16},
|
||||
{2.5, 25, 0.75, 2.4655832930875472e-12},
|
||||
{5, 25, 0.75, 1.243460090449844e-08},
|
||||
{7.5, 25, 0.75, 1.060837565347583e-06},
|
||||
{10, 25, 0.75, 2.1451240486669576e-04},
|
||||
{12.5, 25, 0.01, 9.999999999999999e-01},
|
||||
{15, 25, 0.01, 9.999999999999999e-01},
|
||||
{17.5, 25, 0.01, 9.999999999999999e-01},
|
||||
{20, 25, 0.99, 4.495958469027147e-06},
|
||||
{22.5, 25, 0.99, 1.9506768897388268e-03},
|
||||
{25, 25, 0.99, 1.0},
|
||||
} {
|
||||
b := Binomial{N: tt.n, P: tt.p}
|
||||
got := b.CDF(tt.k)
|
||||
if !floats.EqualWithinRel(got, tt.want, tol) {
|
||||
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBinomial(t *testing.T) {
|
||||
src := rand.New(rand.NewSource(1))
|
||||
for i, b := range []Binomial{
|
||||
{100, 0.5, src},
|
||||
{15, 0.25, src},
|
||||
{10, 0.75, src},
|
||||
{9000, 0.102, src},
|
||||
{1e6, 0.001, src},
|
||||
{25, 0.02, src},
|
||||
{3, 0.8, src},
|
||||
} {
|
||||
testBinomial(t, b, i)
|
||||
}
|
||||
}
|
||||
|
||||
func testBinomial(t *testing.T, b Binomial, i int) {
|
||||
const tol = 1e-2
|
||||
const n = 1e6
|
||||
x := make([]float64, n)
|
||||
generateSamples(x, b)
|
||||
sort.Float64s(x)
|
||||
|
||||
checkMean(t, i, x, b, tol)
|
||||
checkVarAndStd(t, i, x, b, tol)
|
||||
checkExKurtosis(t, i, x, b, 7e-2)
|
||||
}
|
||||
Reference in New Issue
Block a user