stat/distmv: Add distance measures for Uniform, and fix Uniform source

This commit is contained in:
Brendan Tracey
2017-05-06 17:07:36 -06:00
parent cd3537419d
commit cb206ff047
3 changed files with 177 additions and 29 deletions

View File

@@ -54,6 +54,40 @@ func (Bhattacharyya) DistNormal(l, r *Normal) float64 {
return 0.125*mahalanobisSq + 0.5*ds - 0.25*dl - 0.25*dr
}
// DistUniform computes the Bhattacharyya distance between uniform distributions l and r.
// The dimensions of the input distributions must match or DistUniform will panic.
func (Bhattacharyya) DistUniform(l, r *Uniform) float64 {
if len(l.bounds) != len(r.bounds) {
panic(badSizeMismatch)
}
// BC = \int \sqrt(p(x)q(x)), which for uniform distributions is a constant
// over the volume where both distributions have positive probability.
// Compute the overlap and the value of sqrt(p(x)q(x)). The entropy is the
// negative log probability of the distribution (use instead of LogProb so
// it is not necessary to construct an x value).
//
// BC = volume * sqrt(p(x)q(x))
// logBC = log(volume) + 0.5*(logP + logQ)
// D_B = -logBC
return -unifLogVolOverlap(l.bounds, r.bounds) + 0.5*(l.Entropy()+r.Entropy())
}
// unifLogVolOverlap computes the log of the volume of the hyper-rectangle where
// both uniform distributions have positive probability.
func unifLogVolOverlap(b1, b2 []Bound) float64 {
var logVolOverlap float64
for dim, v1 := range b1 {
v2 := b2[dim]
// If the surfaces don't overlap, then the volume is 0
if v1.Max <= v2.Min || v2.Max <= v1.Min {
return math.Inf(-1)
}
vol := math.Min(v1.Max, v2.Max) - math.Max(v1.Min, v2.Min)
logVolOverlap += math.Log(vol)
}
return logVolOverlap
}
// CrossEntropy is a type for computing the cross-entropy between probability
// distributions.
//
@@ -139,6 +173,40 @@ func (KullbackLeibler) DistNormal(l, r *Normal) float64 {
return r.logSqrtDet - l.logSqrtDet + 0.5*(mahalanobisSq+tr-float64(l.dim))
}
// DistUniform returns the KullbackLeibler distance between uniform distributions
// l and r. The dimensions of the input distributions must match or DistUniform
// will panic.
func (KullbackLeibler) DistUniform(l, r *Uniform) float64 {
bl := l.Bounds(nil)
br := r.Bounds(nil)
if len(bl) != len(br) {
panic(badSizeMismatch)
}
// The KL is ∞ if l is not completely contained within r, because then
// r(x) is zero when l(x) is non-zero for some x.
contained := true
for i, v := range bl {
if v.Min < br[i].Min || br[i].Max < v.Max {
contained = false
break
}
}
if !contained {
return math.Inf(1)
}
// The KL divergence is finite.
//
// KL defines 0*ln(0) = 0, so there is no contribution to KL where l(x) = 0.
// Inside the region, l(x) and r(x) are constant (uniform distribution), and
// this constant is integrated over l(x), which integrates out to one.
// The entropy is -log(p(x)).
logPx := -l.Entropy()
logQx := -r.Entropy()
return logPx - logQx
}
// Wasserstein is a type for computing the Wasserstein distance between two
// probability distributions.
//

View File

@@ -38,24 +38,70 @@ func TestBhattacharyyaNormal(t *testing.T) {
if !ok {
panic("bad test")
}
lBhatt := make([]float64, test.samples)
x := make([]float64, a.Dim())
for i := 0; i < test.samples; i++ {
// Do importance sampling over a: \int sqrt(a*b)/a * a dx
a.Rand(x)
pa := a.LogProb(x)
pb := b.LogProb(x)
lBhatt[i] = 0.5*pb - 0.5*pa
}
logBc := floats.LogSumExp(lBhatt) - math.Log(float64(test.samples))
db := -logBc
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
got := Bhattacharyya{}.DistNormal(a, b)
if math.Abs(db-got) > test.tol {
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, db)
if math.Abs(want-got) > test.tol {
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
}
// Bhattacharyya should by symmetric
got2 := Bhattacharyya{}.DistNormal(b, a)
if math.Abs(got-got2) > 1e-14 {
t.Errorf("Bhattacharyya distance not symmetric")
}
}
}
func TestBhattacharyyaUniform(t *testing.T) {
rnd := rand.New(rand.NewSource(1))
for cas, test := range []struct {
a, b *Uniform
samples int
tol float64
}{
{
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
{
a: NewUniform([]Bound{{-3, 2}, {-5, 8}}, rnd),
b: NewUniform([]Bound{{-5, -4}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
} {
a, b := test.a, test.b
want := bhattacharyyaSample(a.Dim(), test.samples, a, b)
got := Bhattacharyya{}.DistUniform(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Bhattacharyya mismatch, case %d: got %v, want %v", cas, got, want)
}
// Bhattacharyya should by symmetric
got2 := Bhattacharyya{}.DistUniform(b, a)
if math.Abs(got-got2) > 1e-14 {
t.Errorf("Bhattacharyya distance not symmetric")
}
}
}
// bhattacharyyaSample finds an estimate of the Bhattacharyya coefficient through
// sampling.
func bhattacharyyaSample(dim, samples int, l RandLogProber, r LogProber) float64 {
lBhatt := make([]float64, samples)
x := make([]float64, dim)
for i := 0; i < samples; i++ {
// Do importance sampling over a: \int sqrt(a*b)/a * a dx
l.Rand(x)
pa := l.LogProb(x)
pb := r.LogProb(x)
lBhatt[i] = 0.5*pb - 0.5*pa
}
logBc := floats.LogSumExp(lBhatt) - math.Log(float64(samples))
return -logBc
}
func TestCrossEntropyNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
@@ -139,7 +185,7 @@ func TestHellingerNormal(t *testing.T) {
}
}
func TestKullbackLieblerNormal(t *testing.T) {
func TestKullbackLeiblerNormal(t *testing.T) {
for cas, test := range []struct {
am, bm []float64
ac, bc *mat64.SymDense
@@ -164,18 +210,52 @@ func TestKullbackLieblerNormal(t *testing.T) {
if !ok {
panic("bad test")
}
var klmc float64
x := make([]float64, a.Dim())
for i := 0; i < test.samples; i++ {
a.Rand(x)
pa := a.LogProb(x)
pb := b.LogProb(x)
klmc += pa - pb
}
klmc /= float64(test.samples)
kl := KullbackLeibler{}.DistNormal(a, b)
if !floats.EqualWithinAbsOrRel(kl, klmc, test.tol, test.tol) {
t.Errorf("Case %d, KL mismatch: got %v, want %v", cas, kl, klmc)
want := klSample(a.Dim(), test.samples, a, b)
got := KullbackLeibler{}.DistNormal(a, b)
if !floats.EqualWithinAbsOrRel(want, got, test.tol, test.tol) {
t.Errorf("Case %d, KL mismatch: got %v, want %v", cas, got, want)
}
}
}
func TestKullbackLeiblerUniform(t *testing.T) {
rnd := rand.New(rand.NewSource(1))
for cas, test := range []struct {
a, b *Uniform
samples int
tol float64
}{
{
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
b: NewUniform([]Bound{{-4, 1}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
{
a: NewUniform([]Bound{{-5, 2}, {-7, 12}}, rnd),
b: NewUniform([]Bound{{-9, -6}, {-7, 10}}, rnd),
samples: 100000,
tol: 1e-2,
},
} {
a, b := test.a, test.b
want := klSample(a.Dim(), test.samples, a, b)
got := KullbackLeibler{}.DistUniform(a, b)
if math.Abs(want-got) > test.tol {
t.Errorf("Kullback-Leibler mismatch, case %d: got %v, want %v", cas, got, want)
}
}
}
// klSample finds an estimate of the Kullback-Leibler Divergence through sampling.
func klSample(dim, samples int, l RandLogProber, r LogProber) float64 {
var klmc float64
x := make([]float64, dim)
for i := 0; i < samples; i++ {
l.Rand(x)
pa := l.LogProb(x)
pb := r.LogProb(x)
klmc += pa - pb
}
return klmc / float64(samples)
}

View File

@@ -18,11 +18,11 @@ type Bound struct {
type Uniform struct {
bounds []Bound
dim int
src *rand.Source
src *rand.Rand
}
// NewUniform creates a new uniform distribution with the given bounds.
func NewUniform(bnds []Bound, src *rand.Source) *Uniform {
func NewUniform(bnds []Bound, src *rand.Rand) *Uniform {
dim := len(bnds)
if dim == 0 {
panic(badZeroDimension)
@@ -47,7 +47,7 @@ func NewUniform(bnds []Bound, src *rand.Source) *Uniform {
// NewUnitUniform creates a new Uniform distribution over the dim-dimensional
// unit hypercube. That is, a uniform distribution where each dimension has
// Min = 0 and Max = 1.
func NewUnitUniform(dim int, src *rand.Source) *Uniform {
func NewUnitUniform(dim int, src *rand.Rand) *Uniform {
if dim <= 0 {
panic(nonPosDimension)
}