mirror of
https://github.com/gonum/gonum.git
synced 2025-10-25 00:00:24 +08:00
stat/distuv: add binomial distribution
This commit is contained in:
committed by
Dan Kortschak
parent
b71a28080e
commit
435185761c
1
AUTHORS
1
AUTHORS
@@ -21,6 +21,7 @@ Daniel Fireman <danielfireman@gmail.com>
|
|||||||
David Samborski <bloggingarrow@gmail.com>
|
David Samborski <bloggingarrow@gmail.com>
|
||||||
Davor Kapsa <davor.kapsa@gmail.com>
|
Davor Kapsa <davor.kapsa@gmail.com>
|
||||||
DeepMind Technologies
|
DeepMind Technologies
|
||||||
|
Dezmond Goff <goff.dezmond@gmail.com>
|
||||||
Egon Elbre <egonelbre@gmail.com>
|
Egon Elbre <egonelbre@gmail.com>
|
||||||
Ekaterina Efimova <katerina.efimova@gmail.com>
|
Ekaterina Efimova <katerina.efimova@gmail.com>
|
||||||
Ethan Burns <burns.ethan@gmail.com>
|
Ethan Burns <burns.ethan@gmail.com>
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
|
|||||||
Daniel Fireman <danielfireman@gmail.com>
|
Daniel Fireman <danielfireman@gmail.com>
|
||||||
David Samborski <bloggingarrow@gmail.com>
|
David Samborski <bloggingarrow@gmail.com>
|
||||||
Davor Kapsa <davor.kapsa@gmail.com>
|
Davor Kapsa <davor.kapsa@gmail.com>
|
||||||
|
Dezmond Goff <goff.dezmond@gmail.com>
|
||||||
Egon Elbre <egonelbre@gmail.com>
|
Egon Elbre <egonelbre@gmail.com>
|
||||||
Ekaterina Efimova <katerina.efimova@gmail.com>
|
Ekaterina Efimova <katerina.efimova@gmail.com>
|
||||||
Ethan Burns <burns.ethan@gmail.com>
|
Ethan Burns <burns.ethan@gmail.com>
|
||||||
|
|||||||
188
stat/distuv/binomial.go
Normal file
188
stat/distuv/binomial.go
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
// Copyright ©2018 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package distuv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"golang.org/x/exp/rand"
|
||||||
|
|
||||||
|
"gonum.org/v1/gonum/mathext"
|
||||||
|
"gonum.org/v1/gonum/stat/combin"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Binomial implements the binomial distribution, a discrete probability distribution
|
||||||
|
// that expresses the probability of a given number of successful Bernoulli trials
|
||||||
|
// out of a total of n, each with successs probability p.
|
||||||
|
// The binomial distribution has the density function:
|
||||||
|
// f(k) = (n choose k) p^k (1-p)^(n-k)
|
||||||
|
// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution.
|
||||||
|
type Binomial struct {
|
||||||
|
// N is the total number of Bernoulli trials. N must be greater than 0.
|
||||||
|
N float64
|
||||||
|
// P is the probablity of success in any given trial. P must be in [0, 1].
|
||||||
|
P float64
|
||||||
|
|
||||||
|
Src rand.Source
|
||||||
|
}
|
||||||
|
|
||||||
|
// CDF computes the value of the cumulative distribution function at x.
|
||||||
|
func (b Binomial) CDF(x float64) float64 {
|
||||||
|
if x < 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if x >= b.N {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
x = math.Floor(x)
|
||||||
|
return mathext.RegIncBeta(b.N-x, x+1, 1-b.P)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExKurtosis returns the excess kurtosis of the distribution.
|
||||||
|
func (b Binomial) ExKurtosis() float64 {
|
||||||
|
v := b.P * (1 - b.P)
|
||||||
|
return (1 - 6*v) / (b.N * v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LogProb computes the natural logarithm of the value of the probability
|
||||||
|
// density function at x.
|
||||||
|
func (b Binomial) LogProb(x float64) float64 {
|
||||||
|
if x < 0 || x > b.N || math.Floor(x) != x {
|
||||||
|
return math.Inf(-1)
|
||||||
|
}
|
||||||
|
lb := combin.LogGeneralizedBinomial(b.N, x)
|
||||||
|
return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mean returns the mean of the probability distribution.
|
||||||
|
func (b Binomial) Mean() float64 {
|
||||||
|
return b.N * b.P
|
||||||
|
}
|
||||||
|
|
||||||
|
// NumParameters returns the number of parameters in the distribution.
|
||||||
|
func (Binomial) NumParameters() int {
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prob computes the value of the probability density function at x.
|
||||||
|
func (b Binomial) Prob(x float64) float64 {
|
||||||
|
return math.Exp(b.LogProb(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand returns a random sample drawn from the distribution.
|
||||||
|
func (b Binomial) Rand() float64 {
|
||||||
|
// NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5)
|
||||||
|
// p. 295-6
|
||||||
|
// http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf
|
||||||
|
|
||||||
|
runif := rand.Float64
|
||||||
|
rexp := rand.ExpFloat64
|
||||||
|
if b.Src != nil {
|
||||||
|
rnd := rand.New(b.Src)
|
||||||
|
runif = rnd.Float64
|
||||||
|
rexp = rnd.ExpFloat64
|
||||||
|
}
|
||||||
|
|
||||||
|
p := b.P
|
||||||
|
if p > 0.5 {
|
||||||
|
p = 1 - p
|
||||||
|
}
|
||||||
|
am := b.N * p
|
||||||
|
|
||||||
|
if b.N < 25 {
|
||||||
|
// Use direct method.
|
||||||
|
bnl := 0.0
|
||||||
|
for i := 0; i < int(b.N); i++ {
|
||||||
|
if runif() < p {
|
||||||
|
bnl++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p != b.P {
|
||||||
|
return b.N - bnl
|
||||||
|
}
|
||||||
|
return bnl
|
||||||
|
}
|
||||||
|
|
||||||
|
if am < 1 {
|
||||||
|
// Use rejection method with Poisson proposal.
|
||||||
|
const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling)
|
||||||
|
var bnl float64
|
||||||
|
z := -p
|
||||||
|
pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x)
|
||||||
|
for {
|
||||||
|
bnl = 0.0
|
||||||
|
t := 0.0
|
||||||
|
for i := 0; i < int(b.N); i++ {
|
||||||
|
t += rexp()
|
||||||
|
if t >= am {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
bnl++
|
||||||
|
}
|
||||||
|
bnlc := b.N - bnl
|
||||||
|
z = -bnl / b.N
|
||||||
|
log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z)
|
||||||
|
t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!)
|
||||||
|
if rexp() >= t {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p != b.P {
|
||||||
|
return b.N - bnl
|
||||||
|
}
|
||||||
|
return bnl
|
||||||
|
}
|
||||||
|
// Original algorithm samples from a Poisson distribution with the
|
||||||
|
// appropriate expected value. However, the Poisson approximation is
|
||||||
|
// asymptotic such that the absolute deviation in probability is O(1/n).
|
||||||
|
// Rejection sampling produces exact variates with at worst less than 3%
|
||||||
|
// rejection with miminal additional computation.
|
||||||
|
|
||||||
|
// Use rejection method with Cauchy proposal.
|
||||||
|
g, _ := math.Lgamma(b.N + 1)
|
||||||
|
plog := math.Log(p)
|
||||||
|
pclog := math.Log1p(-p)
|
||||||
|
sq := math.Sqrt(2 * am * (1 - p))
|
||||||
|
for {
|
||||||
|
var em, y float64
|
||||||
|
for {
|
||||||
|
y = math.Tan(math.Pi * runif())
|
||||||
|
em = sq*y + am
|
||||||
|
if em >= 0 && em < b.N+1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
em = math.Floor(em)
|
||||||
|
lg1, _ := math.Lgamma(em + 1)
|
||||||
|
lg2, _ := math.Lgamma(b.N - em + 1)
|
||||||
|
t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog)
|
||||||
|
if runif() <= t {
|
||||||
|
if p != b.P {
|
||||||
|
return b.N - em
|
||||||
|
}
|
||||||
|
return em
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skewness returns the skewness of the distribution.
|
||||||
|
func (b Binomial) Skewness() float64 {
|
||||||
|
return (1 - 2*b.P) / b.StdDev()
|
||||||
|
}
|
||||||
|
|
||||||
|
// StdDev returns the standard deviation of the probability distribution.
|
||||||
|
func (b Binomial) StdDev() float64 {
|
||||||
|
return math.Sqrt(b.Variance())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Survival returns the survival function (complementary CDF) at x.
|
||||||
|
func (b Binomial) Survival(x float64) float64 {
|
||||||
|
return 1 - b.CDF(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Variance returns the variance of the probability distribution.
|
||||||
|
func (b Binomial) Variance() float64 {
|
||||||
|
return b.N * b.P * (1 - b.P)
|
||||||
|
}
|
||||||
134
stat/distuv/binomial_test.go
Normal file
134
stat/distuv/binomial_test.go
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
// Copyright ©2018 The Gonum Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package distuv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/exp/rand"
|
||||||
|
|
||||||
|
"gonum.org/v1/gonum/floats"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBinomialProb(t *testing.T) {
|
||||||
|
const tol = 1e-10
|
||||||
|
for i, tt := range []struct {
|
||||||
|
k float64
|
||||||
|
n float64
|
||||||
|
p float64
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
// Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com)
|
||||||
|
{0, 10, 0.5, 0.0009765625},
|
||||||
|
{1, 10, 0.5, 0.009765625},
|
||||||
|
{2, 10, 0.5, 0.0439453125},
|
||||||
|
{3, 10, 0.5, 0.1171875},
|
||||||
|
{4, 10, 0.5, 0.205078125},
|
||||||
|
{5, 10, 0.75, 5.839920043945313e-02},
|
||||||
|
{6, 10, 0.75, 0.1459980010986328},
|
||||||
|
{7, 10, 0.75, 0.2502822875976563},
|
||||||
|
{8, 10, 0.75, 0.2815675735473633},
|
||||||
|
{9, 10, 0.75, 0.1877117156982422},
|
||||||
|
{10, 10, 0.75, 5.6313514709472656e-02},
|
||||||
|
|
||||||
|
{0, 25, 0.25, 7.525434581650003e-04},
|
||||||
|
{2, 25, 0.25, 2.508478193883334e-02},
|
||||||
|
{5, 25, 0.25, 0.1645375881987921},
|
||||||
|
{7, 25, 0.25, 0.1654081574485211},
|
||||||
|
{10, 25, 0.25, 4.165835076481272e-02},
|
||||||
|
{12, 25, 0.01, 4.563372575901533e-18},
|
||||||
|
{15, 25, 0.01, 2.956207951505780e-24},
|
||||||
|
{17, 25, 0.01, 9.980175928758777e-29},
|
||||||
|
{20, 25, 0.99, 4.345539559454088e-06},
|
||||||
|
{22, 25, 0.99, 1.843750355939806e-03},
|
||||||
|
{25, 25, 0.99, 0.7778213593991468},
|
||||||
|
|
||||||
|
{0.5, 25, 0.5, 0},
|
||||||
|
{1.5, 25, 0.5, 0},
|
||||||
|
{2.5, 25, 0.5, 0},
|
||||||
|
{3.5, 25, 0.5, 0},
|
||||||
|
{4.5, 25, 0.5, 0},
|
||||||
|
{5.5, 25, 0.5, 0},
|
||||||
|
{6.5, 25, 0.5, 0},
|
||||||
|
{7.5, 25, 0.5, 0},
|
||||||
|
{8.5, 25, 0.5, 0},
|
||||||
|
{9.5, 25, 0.5, 0},
|
||||||
|
} {
|
||||||
|
b := Binomial{N: tt.n, P: tt.p}
|
||||||
|
got := b.Prob(tt.k)
|
||||||
|
if !floats.EqualWithinRel(got, tt.want, tol) {
|
||||||
|
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBinomialCDF(t *testing.T) {
|
||||||
|
const tol = 1e-10
|
||||||
|
for i, tt := range []struct {
|
||||||
|
k float64
|
||||||
|
n float64
|
||||||
|
p float64
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
// Cumulative probabilities computed with SciPy
|
||||||
|
{0, 10, 0.5, 9.765625e-04},
|
||||||
|
{1, 10, 0.5, 1.0742187499999998e-02},
|
||||||
|
{2, 10, 0.5, 5.468749999999999e-02},
|
||||||
|
{3, 10, 0.5, 1.7187499999999994e-01},
|
||||||
|
{4, 10, 0.5, 3.769531249999999e-01},
|
||||||
|
{5, 10, 0.25, 9.802722930908203e-01},
|
||||||
|
{6, 10, 0.25, 9.964942932128906e-01},
|
||||||
|
{7, 10, 0.25, 9.995841979980469e-01},
|
||||||
|
{8, 10, 0.25, 9.999704360961914e-01},
|
||||||
|
{9, 10, 0.25, 9.999990463256836e-01},
|
||||||
|
{10, 10, 0.25, 1.0},
|
||||||
|
|
||||||
|
{0, 25, 0.75, 8.881784197001252e-16},
|
||||||
|
{2.5, 25, 0.75, 2.4655832930875472e-12},
|
||||||
|
{5, 25, 0.75, 1.243460090449844e-08},
|
||||||
|
{7.5, 25, 0.75, 1.060837565347583e-06},
|
||||||
|
{10, 25, 0.75, 2.1451240486669576e-04},
|
||||||
|
{12.5, 25, 0.01, 9.999999999999999e-01},
|
||||||
|
{15, 25, 0.01, 9.999999999999999e-01},
|
||||||
|
{17.5, 25, 0.01, 9.999999999999999e-01},
|
||||||
|
{20, 25, 0.99, 4.495958469027147e-06},
|
||||||
|
{22.5, 25, 0.99, 1.9506768897388268e-03},
|
||||||
|
{25, 25, 0.99, 1.0},
|
||||||
|
} {
|
||||||
|
b := Binomial{N: tt.n, P: tt.p}
|
||||||
|
got := b.CDF(tt.k)
|
||||||
|
if !floats.EqualWithinRel(got, tt.want, tol) {
|
||||||
|
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBinomial(t *testing.T) {
|
||||||
|
src := rand.New(rand.NewSource(1))
|
||||||
|
for i, b := range []Binomial{
|
||||||
|
{100, 0.5, src},
|
||||||
|
{15, 0.25, src},
|
||||||
|
{10, 0.75, src},
|
||||||
|
{9000, 0.102, src},
|
||||||
|
{1e6, 0.001, src},
|
||||||
|
{25, 0.02, src},
|
||||||
|
{3, 0.8, src},
|
||||||
|
} {
|
||||||
|
testBinomial(t, b, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testBinomial(t *testing.T, b Binomial, i int) {
|
||||||
|
const tol = 1e-2
|
||||||
|
const n = 1e6
|
||||||
|
x := make([]float64, n)
|
||||||
|
generateSamples(x, b)
|
||||||
|
sort.Float64s(x)
|
||||||
|
|
||||||
|
checkMean(t, i, x, b, tol)
|
||||||
|
checkVarAndStd(t, i, x, b, tol)
|
||||||
|
checkExKurtosis(t, i, x, b, 7e-2)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user