// Copyright ©2015 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package sampleuv import ( "errors" "math" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) const badLengthMismatch = "sample: slice length mismatch" var ( _ Sampler = LatinHypercube{} _ Sampler = MetropolisHastings{} _ Sampler = (*Rejection)(nil) _ Sampler = IIDer{} _ WeightedSampler = SampleUniformWeighted{} _ WeightedSampler = Importance{} ) // Sampler generates a batch of samples according to the rule specified by the // implementing type. The number of samples generated is equal to len(batch), // and the samples are stored in-place into the input. type Sampler interface { Sample(batch []float64) } // WeightedSampler generates a batch of samples and their relative weights // according to the rule specified by the implementing type. The number of samples // generated is equal to len(batch), and the samples and weights // are stored in-place into the inputs. The length of weights must equal // len(batch), otherwise SampleWeighted will panic. type WeightedSampler interface { SampleWeighted(batch, weights []float64) } // SampleUniformWeighted wraps a Sampler type to create a WeightedSampler where all // weights are equal. type SampleUniformWeighted struct { Sampler } // SampleWeighted generates len(batch) samples from the embedded Sampler type // and sets all of the weights equal to 1. If len(batch) and len(weights) // are not equal, SampleWeighted will panic. func (w SampleUniformWeighted) SampleWeighted(batch, weights []float64) { if len(batch) != len(weights) { panic(badLengthMismatch) } w.Sample(batch) for i := range weights { weights[i] = 1 } } // LatinHypercube is a type for sampling using Latin hypercube sampling // from the given distribution. If src is not nil, it will be used to generate // random numbers, otherwise rand.Float64 will be used. // // Latin hypercube sampling divides the cumulative distribution function into equally // spaced bins and guarantees that one sample is generated per bin. Within each bin, // the location is randomly sampled. The distuv.UnitUniform variable can be used // for easy sampling from the unit hypercube. type LatinHypercube struct { Q distuv.Quantiler Src rand.Source } // Sample generates len(batch) samples using the LatinHypercube generation // procedure. func (l LatinHypercube) Sample(batch []float64) { latinHypercube(batch, l.Q, l.Src) } func latinHypercube(batch []float64, q distuv.Quantiler, src rand.Source) { n := len(batch) var perm []int var f64 func() float64 if src != nil { r := rand.New(src) f64 = r.Float64 perm = r.Perm(n) } else { f64 = rand.Float64 perm = rand.Perm(n) } for i := range batch { v := f64()/float64(n) + float64(i)/float64(n) batch[perm[i]] = q.Quantile(v) } } // Importance is a type for performing importance sampling using the given // Target and Proposal distributions. // // Importance sampling is a variance reduction technique where samples are // generated from a proposal distribution, q(x), instead of the target distribution // p(x). This allows relatively unlikely samples in p(x) to be generated more frequently. // // The importance sampling weight at x is given by p(x)/q(x). To reduce variance, // a good proposal distribution will bound this sampling weight. This implies the // support of q(x) should be at least as broad as p(x), and q(x) should be "fatter tailed" // than p(x). type Importance struct { Target distuv.LogProber Proposal distuv.RandLogProber } // SampleWeighted generates len(batch) samples using the Importance sampling // generation procedure. // // The length of weights must equal the length of batch, otherwise Importance will panic. func (l Importance) SampleWeighted(batch, weights []float64) { importance(batch, weights, l.Target, l.Proposal) } func importance(batch, weights []float64, target distuv.LogProber, proposal distuv.RandLogProber) { if len(batch) != len(weights) { panic(badLengthMismatch) } for i := range batch { v := proposal.Rand() batch[i] = v weights[i] = math.Exp(target.LogProb(v) - proposal.LogProb(v)) } } // ErrRejection is returned when the constant in Rejection is not sufficiently high. var ErrRejection = errors.New("rejection: acceptance ratio above 1") // Rejection is a type for sampling using the rejection sampling algorithm. // // Rejection sampling generates points from the target distribution by using // the proposal distribution. At each step of the algorithm, the proposed point // is accepted with probability // // p = target(x) / (proposal(x) * c) // // where target(x) is the probability of the point according to the target distribution // and proposal(x) is the probability according to the proposal distribution. // The constant c must be chosen such that target(x) < proposal(x) * c for all x. // The expected number of proposed samples is len(samples) * c. // // The number of proposed locations during sampling can be found with a call to // Proposed. If there was an error during sampling, all elements of samples are // set to NaN and the error can be accessed with the Err method. If src != nil, // it will be used to generate random numbers, otherwise rand.Float64 will be used. // // Target may return the true (log of) the probability of the location, or it may return // a value that is proportional to the probability (logprob + constant). This is // useful for cases where the probability distribution is only known up to a normalization // constant. type Rejection struct { C float64 Target distuv.LogProber Proposal distuv.RandLogProber Src rand.Source err error proposed int } // Err returns nil if the most recent call to sample was successful, and returns // ErrRejection if it was not. func (r *Rejection) Err() error { return r.err } // Proposed returns the number of samples proposed during the most recent call to // Sample. func (r *Rejection) Proposed() int { return r.proposed } // Sample generates len(batch) using the Rejection sampling generation procedure. // Rejection sampling may fail if the constant is insufficiently high, as described // in the type comment for Rejection. If the generation fails, the samples // are set to math.NaN(), and a call to Err will return a non-nil value. func (r *Rejection) Sample(batch []float64) { r.err = nil r.proposed = 0 proposed, ok := rejection(batch, r.Target, r.Proposal, r.C, r.Src) if !ok { r.err = ErrRejection } r.proposed = proposed } func rejection(batch []float64, target distuv.LogProber, proposal distuv.RandLogProber, c float64, src rand.Source) (nProposed int, ok bool) { if c < 1 { panic("rejection: acceptance constant must be greater than 1") } f64 := rand.Float64 if src != nil { f64 = rand.New(src).Float64 } var idx int for { nProposed++ v := proposal.Rand() qx := proposal.LogProb(v) px := target.LogProb(v) accept := math.Exp(px-qx) / c if accept > 1 { // Invalidate the whole result and return a failure. for i := range batch { batch[i] = math.NaN() } return nProposed, false } if accept > f64() { batch[idx] = v idx++ if idx == len(batch) { break } } } return nProposed, true } // MHProposal defines a proposal distribution for Metropolis Hastings. type MHProposal interface { // ConditionalDist returns the probability of the first argument conditioned on // being at the second argument // p(x|y) ConditionalLogProb(x, y float64) (prob float64) // ConditionalRand generates a new random location conditioned being at the // location y. ConditionalRand(y float64) (x float64) } // MetropolisHastings is a type for generating samples using the Metropolis Hastings // algorithm (http://en.wikipedia.org/wiki/Metropolis%E2%80%93Hastings_algorithm), // with the given target and proposal distributions, starting at the location // specified by Initial. If src != nil, it will be used to generate random // numbers, otherwise rand.Float64 will be used. // // Metropolis-Hastings is a Markov-chain Monte Carlo algorithm that generates // samples according to the distribution specified by target using the Markov // chain implicitly defined by the proposal distribution. At each // iteration, a proposal point is generated randomly from the current location. // This proposal point is accepted with probability // // p = min(1, (target(new) * proposal(current|new)) / (target(current) * proposal(new|current))) // // If the new location is accepted, it becomes the new current location. // If it is rejected, the current location remains. This is the sample stored in // batch, ignoring BurnIn and Rate (discussed below). // // The samples in Metropolis Hastings are correlated with one another through the // Markov chain. As a result, the initial value can have a significant influence // on the early samples, and so, typically, the first samples generated by the chain // are ignored. This is known as "burn-in", and the number of samples ignored // at the beginning is specified by BurnIn. The proper BurnIn value will depend // on the mixing time of the Markov chain defined by the target and proposal // distributions. // // Many choose to have a sampling "rate" where a number of samples // are ignored in between each kept sample. This helps decorrelate // the samples from one another, but also reduces the number of available samples. // This value is specified by Rate. If Rate is 0 it is defaulted to 1 (keep // every sample). // // The initial value is NOT changed during calls to Sample. type MetropolisHastings struct { Initial float64 Target distuv.LogProber Proposal MHProposal Src rand.Source BurnIn int Rate int } // Sample generates len(batch) samples using the Metropolis Hastings sample // generation method. The initial location is NOT updated during the call to Sample. func (m MetropolisHastings) Sample(batch []float64) { burnIn := m.BurnIn rate := m.Rate if rate == 0 { rate = 1 } // Use the optimal size for the temporary memory to allow the fewest calls // to MetropolisHastings. The case where tmp shadows samples must be // aligned with the logic after burn-in so that tmp does not shadow samples // during the rate portion. tmp := batch if rate > len(batch) { tmp = make([]float64, rate) } // Perform burn-in. remaining := burnIn initial := m.Initial for remaining != 0 { newSamp := min(len(tmp), remaining) metropolisHastings(tmp[newSamp:], initial, m.Target, m.Proposal, m.Src) initial = tmp[newSamp-1] remaining -= newSamp } if rate == 1 { metropolisHastings(batch, initial, m.Target, m.Proposal, m.Src) return } if len(tmp) <= len(batch) { tmp = make([]float64, rate) } // Take a single sample from the chain metropolisHastings(batch[0:1], initial, m.Target, m.Proposal, m.Src) initial = batch[0] // For all of the other samples, first generate Rate samples and then actually // accept the last one. for i := 1; i < len(batch); i++ { metropolisHastings(tmp, initial, m.Target, m.Proposal, m.Src) v := tmp[rate-1] batch[i] = v initial = v } } func metropolisHastings(batch []float64, initial float64, target distuv.LogProber, proposal MHProposal, src rand.Source) { f64 := rand.Float64 if src != nil { f64 = rand.New(src).Float64 } current := initial currentLogProb := target.LogProb(initial) for i := range batch { proposed := proposal.ConditionalRand(current) proposedLogProb := target.LogProb(proposed) probTo := proposal.ConditionalLogProb(proposed, current) probBack := proposal.ConditionalLogProb(current, proposed) accept := math.Exp(proposedLogProb + probBack - probTo - currentLogProb) if accept > f64() { current = proposed currentLogProb = proposedLogProb } batch[i] = current } } // IIDer generates a set of independently and identically distributed samples from // the input distribution. type IIDer struct { Dist distuv.Rander } // Sample generates a set of identically and independently distributed samples. func (iid IIDer) Sample(batch []float64) { for i := range batch { batch[i] = iid.Dist.Rand() } }