Files
gonum/stat/samplemv/samplemv.go
mewmew 7e53ef2fae all: use fixdocs tool to store package documentation in doc.go
Add copyright header to doc.go

Keep original comment style (e.g. line comments and block comments).

Fix doc comments containing multiple line comments.

Remove vanity imports from all files except doc.go.

Fixes #12.

The source code of fixdocs is located at:
https://play.golang.org/p/7RtYLzldsO
2017-08-16 14:26:00 +02:00

275 lines
8.6 KiB
Go

// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package samplemv
import (
"errors"
"math"
"math/rand"
"gonum.org/v1/gonum/mat"
"gonum.org/v1/gonum/stat/distmv"
)
var (
badLengthMismatch = "samplemv: slice length mismatch"
)
var (
_ Sampler = LatinHypercuber{}
_ Sampler = (*Rejectioner)(nil)
_ Sampler = IIDer{}
_ WeightedSampler = SampleUniformWeighted{}
_ WeightedSampler = Importancer{}
)
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Sampler generates a batch of samples according to the rule specified by the
// implementing type. The number of samples generated is equal to rows(batch),
// and the samples are stored in-place into the input.
type Sampler interface {
Sample(batch *mat.Dense)
}
// WeightedSampler generates a batch of samples and their relative weights
// according to the rule specified by the implementing type. The number of samples
// generated is equal to rows(batch), and the samples and weights
// are stored in-place into the inputs. The length of weights must equal
// rows(batch), otherwise SampleWeighted will panic.
type WeightedSampler interface {
SampleWeighted(batch *mat.Dense, weights []float64)
}
// SampleUniformWeighted wraps a Sampler type to create a WeightedSampler where all
// weights are equal.
type SampleUniformWeighted struct {
Sampler
}
// SampleWeighted generates rows(batch) samples from the embedded Sampler type
// and sets all of the weights equal to 1. If rows(batch) and len(weights)
// of weights are not equal, SampleWeighted will panic.
func (w SampleUniformWeighted) SampleWeighted(batch *mat.Dense, weights []float64) {
r, _ := batch.Dims()
if r != len(weights) {
panic(badLengthMismatch)
}
w.Sample(batch)
for i := range weights {
weights[i] = 1
}
}
// LatinHypercuber is a wrapper around the LatinHypercube sampling generation
// method.
type LatinHypercuber struct {
Q distmv.Quantiler
Src *rand.Rand
}
// Sample generates rows(batch) samples using the LatinHypercube generation
// procedure.
func (l LatinHypercuber) Sample(batch *mat.Dense) {
LatinHypercube(batch, l.Q, l.Src)
}
// LatinHypercube generates rows(batch) samples using Latin hypercube sampling
// from the given distribution. If src is not nil, it will be used to generate
// random numbers, otherwise rand.Float64 will be used.
//
// Latin hypercube sampling divides the cumulative distribution function into equally
// spaced bins and guarantees that one sample is generated per bin. Within each bin,
// the location is randomly sampled. The distmv.NewUnitUniform function can be used
// for easy sampling from the unit hypercube.
func LatinHypercube(batch *mat.Dense, q distmv.Quantiler, src *rand.Rand) {
r, c := batch.Dims()
var f64 func() float64
var perm func(int) []int
if src != nil {
f64 = src.Float64
perm = src.Perm
} else {
f64 = rand.Float64
perm = rand.Perm
}
r64 := float64(r)
for i := 0; i < c; i++ {
p := perm(r)
for j := 0; j < r; j++ {
v := f64()/r64 + float64(j)/r64
batch.Set(p[j], i, v)
}
}
p := make([]float64, c)
for i := 0; i < r; i++ {
copy(p, batch.RawRowView(i))
q.Quantile(batch.RawRowView(i), p)
}
}
// Importancer is a wrapper around the Importance sampling generation method.
type Importancer struct {
Target distmv.LogProber
Proposal distmv.RandLogProber
}
// SampleWeighted generates rows(batch) samples using the Importance sampling
// generation procedure.
func (l Importancer) SampleWeighted(batch *mat.Dense, weights []float64) {
Importance(batch, weights, l.Target, l.Proposal)
}
// Importance sampling generates rows(batch) samples from the proposal distribution,
// and stores the locations and importance sampling weights in place.
//
// Importance sampling is a variance reduction technique where samples are
// generated from a proposal distribution, q(x), instead of the target distribution
// p(x). This allows relatively unlikely samples in p(x) to be generated more frequently.
//
// The importance sampling weight at x is given by p(x)/q(x). To reduce variance,
// a good proposal distribution will bound this sampling weight. This implies the
// support of q(x) should be at least as broad as p(x), and q(x) should be "fatter tailed"
// than p(x).
//
// If weights is nil, the weights are not stored. The length of weights must equal
// the length of batch, otherwise Importance will panic.
func Importance(batch *mat.Dense, weights []float64, target distmv.LogProber, proposal distmv.RandLogProber) {
r, _ := batch.Dims()
if r != len(weights) {
panic(badLengthMismatch)
}
for i := 0; i < r; i++ {
v := batch.RawRowView(i)
proposal.Rand(v)
weights[i] = math.Exp(target.LogProb(v) - proposal.LogProb(v))
}
}
// ErrRejection is returned when the constant in Rejection is not sufficiently high.
var ErrRejection = errors.New("rejection: acceptance ratio above 1")
// Rejectioner is a wrapper around the Rejection sampling generation procedure.
// If the rejection sampling fails during the call to Sample, all samples will
// be set to math.NaN() and a call to Err will return a non-nil value.
type Rejectioner struct {
C float64
Target distmv.LogProber
Proposal distmv.RandLogProber
Src *rand.Rand
err error
proposed int
}
// Err returns nil if the most recent call to sample was successful, and returns
// ErrRejection if it was not.
func (r *Rejectioner) Err() error {
return r.err
}
// Proposed returns the number of samples proposed during the most recent call to
// Sample.
func (r *Rejectioner) Proposed() int {
return r.proposed
}
// Sample generates rows(batch) using the Rejection sampling generation procedure.
// Rejection sampling may fail if the constant is insufficiently high, as described
// in the function comment for Rejection. If the generation fails, the samples
// are set to math.NaN(), and a call to Err will return a non-nil value.
func (r *Rejectioner) Sample(batch *mat.Dense) {
r.err = nil
r.proposed = 0
proposed, ok := Rejection(batch, r.Target, r.Proposal, r.C, r.Src)
if !ok {
r.err = ErrRejection
}
r.proposed = proposed
}
// Rejection generates rows(batch) samples using the rejection sampling algorithm and
// stores them in place into samples.
// Sampling continues until batch is filled. Rejection returns the total number of proposed
// locations and a boolean indicating if the rejection sampling assumption is
// violated (see details below). If the returned boolean is false, all elements
// of samples are set to NaN. If src != nil, it will be used to generate random
// numbers, otherwise rand.Float64 will be used.
//
// Rejection sampling generates points from the target distribution by using
// the proposal distribution. At each step of the algorithm, the proposaed point
// is accepted with probability
// p = target(x) / (proposal(x) * c)
// where target(x) is the probability of the point according to the target distribution
// and proposal(x) is the probability according to the proposal distribution.
// The constant c must be chosen such that target(x) < proposal(x) * c for all x.
// The expected number of proposed samples is len(samples) * c.
//
// Target may return the true (log of) the probablity of the location, or it may return
// a value that is proportional to the probability (logprob + constant). This is
// useful for cases where the probability distribution is only known up to a normalization
// constant.
func Rejection(batch *mat.Dense, target distmv.LogProber, proposal distmv.RandLogProber, c float64, src *rand.Rand) (nProposed int, ok bool) {
if c < 1 {
panic("rejection: acceptance constant must be greater than 1")
}
f64 := rand.Float64
if src != nil {
f64 = src.Float64
}
r, dim := batch.Dims()
v := make([]float64, dim)
var idx int
for {
nProposed++
proposal.Rand(v)
qx := proposal.LogProb(v)
px := target.LogProb(v)
accept := math.Exp(px-qx) / c
if accept > 1 {
// Invalidate the whole result and return a failure.
for i := 0; i < r; i++ {
for j := 0; j < dim; j++ {
batch.Set(i, j, math.NaN())
}
}
return nProposed, false
}
if accept > f64() {
batch.SetRow(idx, v)
idx++
if idx == r {
break
}
}
}
return nProposed, true
}
// IIDer is a wrapper around the IID sample generation method.
type IIDer struct {
Dist distmv.Rander
}
// Sample generates a set of identically and independently distributed samples.
func (iid IIDer) Sample(batch *mat.Dense) {
IID(batch, iid.Dist)
}
// IID generates a set of independently and identically distributed samples from
// the input distribution.
func IID(batch *mat.Dense, d distmv.Rander) {
r, _ := batch.Dims()
for i := 0; i < r; i++ {
d.Rand(batch.RawRowView(i))
}
}