mirror of
https://github.com/gonum/gonum.git
synced 2025-10-15 11:40:45 +08:00
stat/distmv: Add distance measures for Uniform, and fix Uniform source
This commit is contained in:
@@ -54,6 +54,40 @@ func (Bhattacharyya) DistNormal(l, r *Normal) float64 {
|
||||
return 0.125*mahalanobisSq + 0.5*ds - 0.25*dl - 0.25*dr
|
||||
}
|
||||
|
||||
// DistUniform computes the Bhattacharyya distance between uniform distributions l and r.
|
||||
// The dimensions of the input distributions must match or DistUniform will panic.
|
||||
func (Bhattacharyya) DistUniform(l, r *Uniform) float64 {
|
||||
if len(l.bounds) != len(r.bounds) {
|
||||
panic(badSizeMismatch)
|
||||
}
|
||||
// BC = \int \sqrt(p(x)q(x)), which for uniform distributions is a constant
|
||||
// over the volume where both distributions have positive probability.
|
||||
// Compute the overlap and the value of sqrt(p(x)q(x)). The entropy is the
|
||||
// negative log probability of the distribution (use instead of LogProb so
|
||||
// it is not necessary to construct an x value).
|
||||
//
|
||||
// BC = volume * sqrt(p(x)q(x))
|
||||
// logBC = log(volume) + 0.5*(logP + logQ)
|
||||
// D_B = -logBC
|
||||
return -unifLogVolOverlap(l.bounds, r.bounds) + 0.5*(l.Entropy()+r.Entropy())
|
||||
}
|
||||
|
||||
// unifLogVolOverlap computes the log of the volume of the hyper-rectangle where
|
||||
// both uniform distributions have positive probability.
|
||||
func unifLogVolOverlap(b1, b2 []Bound) float64 {
|
||||
var logVolOverlap float64
|
||||
for dim, v1 := range b1 {
|
||||
v2 := b2[dim]
|
||||
// If the surfaces don't overlap, then the volume is 0
|
||||
if v1.Max <= v2.Min || v2.Max <= v1.Min {
|
||||
return math.Inf(-1)
|
||||
}
|
||||
vol := math.Min(v1.Max, v2.Max) - math.Max(v1.Min, v2.Min)
|
||||
logVolOverlap += math.Log(vol)
|
||||
}
|
||||
return logVolOverlap
|
||||
}
|
||||
|
||||
// CrossEntropy is a type for computing the cross-entropy between probability
|
||||
// distributions.
|
||||
//
|
||||
@@ -139,6 +173,40 @@ func (KullbackLeibler) DistNormal(l, r *Normal) float64 {
|
||||
return r.logSqrtDet - l.logSqrtDet + 0.5*(mahalanobisSq+tr-float64(l.dim))
|
||||
}
|
||||
|
||||
// DistUniform returns the KullbackLeibler distance between uniform distributions
|
||||
// l and r. The dimensions of the input distributions must match or DistUniform
|
||||
// will panic.
|
||||
func (KullbackLeibler) DistUniform(l, r *Uniform) float64 {
|
||||
bl := l.Bounds(nil)
|
||||
br := r.Bounds(nil)
|
||||
if len(bl) != len(br) {
|
||||
panic(badSizeMismatch)
|
||||
}
|
||||
|
||||
// The KL is ∞ if l is not completely contained within r, because then
|
||||
// r(x) is zero when l(x) is non-zero for some x.
|
||||
contained := true
|
||||
for i, v := range bl {
|
||||
if v.Min < br[i].Min || br[i].Max < v.Max {
|
||||
contained = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !contained {
|
||||
return math.Inf(1)
|
||||
}
|
||||
|
||||
// The KL divergence is finite.
|
||||
//
|
||||
// KL defines 0*ln(0) = 0, so there is no contribution to KL where l(x) = 0.
|
||||
// Inside the region, l(x) and r(x) are constant (uniform distribution), and
|
||||
// this constant is integrated over l(x), which integrates out to one.
|
||||
// The entropy is -log(p(x)).
|
||||
logPx := -l.Entropy()
|
||||
logQx := -r.Entropy()
|
||||
return logPx - logQx
|
||||
}
|
||||
|
||||
// Wasserstein is a type for computing the Wasserstein distance between two
|
||||
// probability distributions.
|
||||
//
|
||||
|
@@ -38,24 +38,70 @@ func TestBhattacharyyaNormal(t *testing.T) {
|
||||
if !ok {
|
||||
panic("bad test")
|
||||
}
|
||||
lBhatt := make([]float64, test.samples)
|
||||
x := make([]float64, a.Dim())
|
||||
for i := 0; i < test.samples; i++ {
|
||||
// Do importance sampling over a: \int sqrt(a*b)/a * a dx
|
||||
a.Rand(x)
|
||||
pa := a.LogProb(x)
|
||||
pb := b.LogProb(x)
|
||||
lBhatt[i] = 0.5*pb - 0.5*pa
|
||||
}
|
||||
logBc := floats.LogSumExp(lBhatt) - math.Log(float64(test.samples))
|
||||
db := -logBc
|
||||
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
|
||||
got := Bhattacharyya{}.DistNormal(a, b)
|
||||
if math.Abs(db-got) > test.tol {
|
||||
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, db)
|
||||
if math.Abs(want-got) > test.tol {
|
||||
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
|
||||
}
|
||||
|
||||
// Bhattacharyya should by symmetric
|
||||
got2 := Bhattacharyya{}.DistNormal(b, a)
|
||||
if math.Abs(got-got2) > 1e-14 {
|
||||
t.Errorf("Bhattacharyya distance not symmetric")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBhattacharyyaUniform(t *testing.T) {
|
||||
rnd := rand.New(rand.NewSource(1))
|
||||
for cas, test := range []struct {
|
||||
a, b *Uniform
|
||||
samples int
|
||||
tol float64
|
||||
}{
|
||||
{
|
||||
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
|
||||
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
|
||||
samples: 100000,
|
||||
tol: 1e-2,
|
||||
},
|
||||
{
|
||||
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
|
||||
b: NewUniform([]Bound{{-5, -4}, {-7, 10}}, rnd),
|
||||
samples: 100000,
|
||||
tol: 1e-2,
|
||||
},
|
||||
} {
|
||||
a, b := test.a, test.b
|
||||
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
|
||||
got := Bhattacharyya{}.DistUniform(a, b)
|
||||
if math.Abs(want-got) > test.tol {
|
||||
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
|
||||
}
|
||||
// Bhattacharyya should by symmetric
|
||||
got2 := Bhattacharyya{}.DistUniform(b, a)
|
||||
if math.Abs(got-got2) > 1e-14 {
|
||||
t.Errorf("Bhattacharyya distance not symmetric")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// bhattacharyyaSample finds an estimate of the Bhattacharyya coefficient through
|
||||
// sampling.
|
||||
func bhattacharyyaSample(dim, samples int, l RandLogProber, r LogProber) float64 {
|
||||
lBhatt := make([]float64, samples)
|
||||
x := make([]float64, dim)
|
||||
for i := 0; i < samples; i++ {
|
||||
// Do importance sampling over a: \int sqrt(a*b)/a * a dx
|
||||
l.Rand(x)
|
||||
pa := l.LogProb(x)
|
||||
pb := r.LogProb(x)
|
||||
lBhatt[i] = 0.5*pb - 0.5*pa
|
||||
}
|
||||
logBc := floats.LogSumExp(lBhatt) - math.Log(float64(samples))
|
||||
return -logBc
|
||||
}
|
||||
|
||||
func TestCrossEntropyNormal(t *testing.T) {
|
||||
for cas, test := range []struct {
|
||||
am, bm []float64
|
||||
@@ -139,7 +185,7 @@ func TestHellingerNormal(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestKullbackLieblerNormal(t *testing.T) {
|
||||
func TestKullbackLeiblerNormal(t *testing.T) {
|
||||
for cas, test := range []struct {
|
||||
am, bm []float64
|
||||
ac, bc *mat64.SymDense
|
||||
@@ -164,18 +210,52 @@ func TestKullbackLieblerNormal(t *testing.T) {
|
||||
if !ok {
|
||||
panic("bad test")
|
||||
}
|
||||
var klmc float64
|
||||
x := make([]float64, a.Dim())
|
||||
for i := 0; i < test.samples; i++ {
|
||||
a.Rand(x)
|
||||
pa := a.LogProb(x)
|
||||
pb := b.LogProb(x)
|
||||
klmc += pa - pb
|
||||
}
|
||||
klmc /= float64(test.samples)
|
||||
kl := KullbackLeibler{}.DistNormal(a, b)
|
||||
if !floats.EqualWithinAbsOrRel(kl, klmc, test.tol, test.tol) {
|
||||
t.Errorf("Case %d, KL mismatch: got %v, want %v", cas, kl, klmc)
|
||||
want := klSample(a.Dim(), test.samples, a, b)
|
||||
got := KullbackLeibler{}.DistNormal(a, b)
|
||||
if !floats.EqualWithinAbsOrRel(want, got, test.tol, test.tol) {
|
||||
t.Errorf("Case %d, KL mismatch: got %v, want %v", cas, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestKullbackLeiblerUniform(t *testing.T) {
|
||||
rnd := rand.New(rand.NewSource(1))
|
||||
for cas, test := range []struct {
|
||||
a, b *Uniform
|
||||
samples int
|
||||
tol float64
|
||||
}{
|
||||
{
|
||||
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
|
||||
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
|
||||
samples: 100000,
|
||||
tol: 1e-2,
|
||||
},
|
||||
{
|
||||
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
|
||||
b: NewUniform([]Bound{{-9, -6}, {-7, 10}}, rnd),
|
||||
samples: 100000,
|
||||
tol: 1e-2,
|
||||
},
|
||||
} {
|
||||
a, b := test.a, test.b
|
||||
want := klSample(a.Dim(), test.samples, a, b)
|
||||
got := KullbackLeibler{}.DistUniform(a, b)
|
||||
if math.Abs(want-got) > test.tol {
|
||||
t.Errorf("Kullback-Leibler mismatch, case %d: got %v, want %v", cas, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// klSample finds an estimate of the Kullback-Leibler Divergence through sampling.
|
||||
func klSample(dim, samples int, l RandLogProber, r LogProber) float64 {
|
||||
var klmc float64
|
||||
x := make([]float64, dim)
|
||||
for i := 0; i < samples; i++ {
|
||||
l.Rand(x)
|
||||
pa := l.LogProb(x)
|
||||
pb := r.LogProb(x)
|
||||
klmc += pa - pb
|
||||
}
|
||||
return klmc / float64(samples)
|
||||
}
|
||||
|
@@ -18,11 +18,11 @@ type Bound struct {
|
||||
type Uniform struct {
|
||||
bounds []Bound
|
||||
dim int
|
||||
src *rand.Source
|
||||
src *rand.Rand
|
||||
}
|
||||
|
||||
// NewUniform creates a new uniform distribution with the given bounds.
|
||||
func NewUniform(bnds []Bound, src *rand.Source) *Uniform {
|
||||
func NewUniform(bnds []Bound, src *rand.Rand) *Uniform {
|
||||
dim := len(bnds)
|
||||
if dim == 0 {
|
||||
panic(badZeroDimension)
|
||||
@@ -47,7 +47,7 @@ func NewUniform(bnds []Bound, src *rand.Source) *Uniform {
|
||||
// NewUnitUniform creates a new Uniform distribution over the dim-dimensional
|
||||
// unit hypercube. That is, a uniform distribution where each dimension has
|
||||
// Min = 0 and Max = 1.
|
||||
func NewUnitUniform(dim int, src *rand.Source) *Uniform {
|
||||
func NewUnitUniform(dim int, src *rand.Rand) *Uniform {
|
||||
if dim <= 0 {
|
||||
panic(nonPosDimension)
|
||||
}
|
||||
|
Reference in New Issue
Block a user