optimize: imported optimize as a subtree

This commit is contained in:
Brendan Tracey
2017-05-23 00:02:57 -06:00
38 changed files with 8830 additions and 0 deletions

23
optimize/.travis.yml Normal file
View File

@@ -0,0 +1,23 @@
sudo: false
language: go
# Versions of go that are explicitly supported by gonum.
go:
- 1.5.4
- 1.6.3
- 1.7.3
# Required for coverage.
before_install:
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
# Get deps, build, test, and ensure the code is gofmt'ed.
# If we are building as gonum, then we have access to the coveralls api key, so we can run coverage as well.
script:
- go get -d -t -v ./...
- go build -v ./...
- go test -v ./...
- test -z "$(gofmt -d .)"
- if [[ $TRAVIS_SECURE_ENV_VARS = "true" ]]; then bash ./.travis/test-coverage.sh; fi

View File

@@ -0,0 +1,35 @@
#!/bin/bash
PROFILE_OUT=$PWD/profile.out
ACC_OUT=$PWD/acc.out
testCover() {
# set the return value to 0 (succesful)
retval=0
# get the directory to check from the parameter. Default to '.'
d=${1:-.}
# skip if there are no Go files here
ls $d/*.go &> /dev/null || return $retval
# switch to the directory to check
pushd $d > /dev/null
# create the coverage profile
coverageresult=`go test -v -coverprofile=$PROFILE_OUT`
# output the result so we can check the shell output
echo ${coverageresult}
# append the results to acc.out if coverage didn't fail, else set the retval to 1 (failed)
( [[ ${coverageresult} == *FAIL* ]] && retval=1 ) || ( [ -f $PROFILE_OUT ] && grep -v "mode: set" $PROFILE_OUT >> $ACC_OUT )
# return to our working dir
popd > /dev/null
# return our return value
return $retval
}
# Init acc.out
echo "mode: set" > $ACC_OUT
# Run test coverage on all directories containing go files
find . -maxdepth 10 -type d | while read d; do testCover $d || exit; done
# Upload the coverage profile to coveralls.io
[ -n "$COVERALLS_TOKEN" ] && goveralls -coverprofile=$ACC_OUT -service=travis-ci -repotoken $COVERALLS_TOKEN

13
optimize/README.md Normal file
View File

@@ -0,0 +1,13 @@
# Gonum Optimize [![Build Status](https://travis-ci.org/gonum/optimize.svg?branch=master)](https://travis-ci.org/gonum/optimize) [![Coverage Status](https://coveralls.io/repos/gonum/optimize/badge.svg?branch=master&service=github)](https://coveralls.io/github/gonum/optimize?branch=master) [![GoDoc](https://godoc.org/github.com/gonum/optimize?status.svg)](https://godoc.org/github.com/gonum/optimize)
This is an optimization package for the Go language. More documentation can be seen at godoc.org/github.com/gonum/optimize
## Issues
If you find any bugs, feel free to file an issue on the github issue tracker. Discussions on API changes, added features, code review, or similar requests are preferred on the gonum-dev Google Group.
https://groups.google.com/forum/#!forum/gonum-dev
## License
Please see github.com/gonum/license for general license information, contributors, authors, etc on the Gonum suite of packages.

82
optimize/backtracking.go Normal file
View File

@@ -0,0 +1,82 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
const (
defaultBacktrackingContraction = 0.5
defaultBacktrackingDecrease = 1e-4
minimumBacktrackingStepSize = 1e-20
)
// Backtracking is a Linesearcher that uses backtracking to find a point that
// satisfies the Armijo condition with the given decrease factor. If the Armijo
// condition has not been met, the step size is decreased by ContractionFactor.
//
// The Armijo condition only requires the gradient at the beginning of each
// major iteration (not at successive step locations), and so Backtracking may
// be a good linesearch for functions with expensive gradients. Backtracking is
// not appropriate for optimizers that require the Wolfe conditions to be met,
// such as BFGS.
//
// Both DecreaseFactor and ContractionFactor must be between zero and one, and
// Backtracking will panic otherwise. If either DecreaseFactor or
// ContractionFactor are zero, it will be set to a reasonable default.
type Backtracking struct {
DecreaseFactor float64 // Constant factor in the sufficient decrease (Armijo) condition.
ContractionFactor float64 // Step size multiplier at each iteration (step *= ContractionFactor).
stepSize float64
initF float64
initG float64
lastOp Operation
}
func (b *Backtracking) Init(f, g float64, step float64) Operation {
if step <= 0 {
panic("backtracking: bad step size")
}
if g >= 0 {
panic("backtracking: initial derivative is non-negative")
}
if b.ContractionFactor == 0 {
b.ContractionFactor = defaultBacktrackingContraction
}
if b.DecreaseFactor == 0 {
b.DecreaseFactor = defaultBacktrackingDecrease
}
if b.ContractionFactor <= 0 || b.ContractionFactor >= 1 {
panic("backtracking: ContractionFactor must be between 0 and 1")
}
if b.DecreaseFactor <= 0 || b.DecreaseFactor >= 1 {
panic("backtracking: DecreaseFactor must be between 0 and 1")
}
b.stepSize = step
b.initF = f
b.initG = g
b.lastOp = FuncEvaluation
return b.lastOp
}
func (b *Backtracking) Iterate(f, _ float64) (Operation, float64, error) {
if b.lastOp != FuncEvaluation {
panic("backtracking: Init has not been called")
}
if ArmijoConditionMet(f, b.initF, b.initG, b.stepSize, b.DecreaseFactor) {
b.lastOp = MajorIteration
return b.lastOp, b.stepSize, nil
}
b.stepSize *= b.ContractionFactor
if b.stepSize < minimumBacktrackingStepSize {
b.lastOp = NoOperation
return b.lastOp, b.stepSize, ErrLinesearcherFailure
}
b.lastOp = FuncEvaluation
return b.lastOp, b.stepSize, nil
}

160
optimize/bfgs.go Normal file
View File

@@ -0,0 +1,160 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/matrix/mat64"
)
// BFGS implements the BroydenFletcherGoldfarbShanno optimization method. It
// is a quasi-Newton method that performs successive rank-one updates to an
// estimate of the inverse Hessian of the objective function. It exhibits
// super-linear convergence when in proximity to a local minimum. It has memory
// cost that is O(n^2) relative to the input dimension.
type BFGS struct {
// Linesearcher selects suitable steps along the descent direction.
// Accepted steps should satisfy the strong Wolfe conditions.
// If Linesearcher == nil, an appropriate default is chosen.
Linesearcher Linesearcher
ls *LinesearchMethod
dim int
x mat64.Vector // Location of the last major iteration.
grad mat64.Vector // Gradient at the last major iteration.
s mat64.Vector // Difference between locations in this and the previous iteration.
y mat64.Vector // Difference between gradients in this and the previous iteration.
tmp mat64.Vector
invHess *mat64.SymDense
first bool // Indicator of the first iteration.
}
func (b *BFGS) Init(loc *Location) (Operation, error) {
if b.Linesearcher == nil {
b.Linesearcher = &Bisection{}
}
if b.ls == nil {
b.ls = &LinesearchMethod{}
}
b.ls.Linesearcher = b.Linesearcher
b.ls.NextDirectioner = b
return b.ls.Init(loc)
}
func (b *BFGS) Iterate(loc *Location) (Operation, error) {
return b.ls.Iterate(loc)
}
func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
b.dim = dim
b.first = true
x := mat64.NewVector(dim, loc.X)
grad := mat64.NewVector(dim, loc.Gradient)
b.x.CloneVec(x)
b.grad.CloneVec(grad)
b.y.Reset()
b.s.Reset()
b.tmp.Reset()
if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
b.invHess = mat64.NewSymDense(dim, nil)
} else {
b.invHess = mat64.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
}
// The values of the inverse Hessian are initialized in the first call to
// NextDirection.
// Initial direction is just negative of the gradient because the Hessian
// is an identity matrix.
d := mat64.NewVector(dim, dir)
d.ScaleVec(-1, grad)
return 1 / mat64.Norm(d, 2)
}
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
dim := b.dim
if len(loc.X) != dim {
panic("bfgs: unexpected size mismatch")
}
if len(loc.Gradient) != dim {
panic("bfgs: unexpected size mismatch")
}
if len(dir) != dim {
panic("bfgs: unexpected size mismatch")
}
x := mat64.NewVector(dim, loc.X)
grad := mat64.NewVector(dim, loc.Gradient)
// s = x_{k+1} - x_{k}
b.s.SubVec(x, &b.x)
// y = g_{k+1} - g_{k}
b.y.SubVec(grad, &b.grad)
sDotY := mat64.Dot(&b.s, &b.y)
if b.first {
// Rescale the initial Hessian.
// From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed).
// Springer (2006), page 143, eq. 6.20.
yDotY := mat64.Dot(&b.y, &b.y)
scale := sDotY / yDotY
for i := 0; i < dim; i++ {
for j := i; j < dim; j++ {
if i == j {
b.invHess.SetSym(i, i, scale)
} else {
b.invHess.SetSym(i, j, 0)
}
}
}
b.first = false
}
if math.Abs(sDotY) != 0 {
// Update the inverse Hessian according to the formula
//
// B_{k+1}^-1 = B_k^-1
// + (s_k^T y_k + y_k^T B_k^-1 y_k) / (s_k^T y_k)^2 * (s_k s_k^T)
// - (B_k^-1 y_k s_k^T + s_k y_k^T B_k^-1) / (s_k^T y_k).
//
// Note that y_k^T B_k^-1 y_k is a scalar, and that the third term is a
// rank-two update where B_k^-1 y_k is one vector and s_k is the other.
yBy := mat64.Inner(&b.y, b.invHess, &b.y)
b.tmp.MulVec(b.invHess, &b.y)
scale := (1 + yBy/sDotY) / sDotY
b.invHess.SymRankOne(b.invHess, scale, &b.s)
b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s)
}
// Update the stored BFGS data.
b.x.CopyVec(x)
b.grad.CopyVec(grad)
// New direction is stored in dir.
d := mat64.NewVector(dim, dir)
d.MulVec(b.invHess, grad)
d.ScaleVec(-1, d)
return 1
}
func (*BFGS) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, false}
}

146
optimize/bisection.go Normal file
View File

@@ -0,0 +1,146 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import "math"
const (
defaultBisectionCurvature = 0.9
)
// Bisection is a Linesearcher that uses a bisection to find a point that
// satisfies the strong Wolfe conditions with the given curvature factor and
// a decrease factor of zero.
type Bisection struct {
// CurvatureFactor is the constant factor in the curvature condition.
// Smaller values result in a more exact line search.
// A set value must be in the interval (0, 1), otherwise Init will panic.
// If it is zero, it will be defaulted to 0.9.
CurvatureFactor float64
minStep float64
maxStep float64
currStep float64
initF float64
minF float64
maxF float64
lastF float64
initGrad float64
lastOp Operation
}
func (b *Bisection) Init(f, g float64, step float64) Operation {
if step <= 0 {
panic("bisection: bad step size")
}
if g >= 0 {
panic("bisection: initial derivative is non-negative")
}
if b.CurvatureFactor == 0 {
b.CurvatureFactor = defaultBisectionCurvature
}
if b.CurvatureFactor <= 0 || b.CurvatureFactor >= 1 {
panic("bisection: CurvatureFactor not between 0 and 1")
}
b.minStep = 0
b.maxStep = math.Inf(1)
b.currStep = step
b.initF = f
b.minF = f
b.maxF = math.NaN()
b.initGrad = g
// Only evaluate the gradient when necessary.
b.lastOp = FuncEvaluation
return b.lastOp
}
func (b *Bisection) Iterate(f, g float64) (Operation, float64, error) {
if b.lastOp != FuncEvaluation && b.lastOp != GradEvaluation {
panic("bisection: Init has not been called")
}
minF := b.initF
if b.maxF < minF {
minF = b.maxF
}
if b.minF < minF {
minF = b.minF
}
if b.lastOp == FuncEvaluation {
// See if the function value is good enough to make progress. If it is,
// evaluate the gradient. If not, set it to the upper bound if the bound
// has not yet been found, otherwise iterate toward the minimum location.
if f <= minF {
b.lastF = f
b.lastOp = GradEvaluation
return b.lastOp, b.currStep, nil
}
if math.IsInf(b.maxStep, 1) {
b.maxStep = b.currStep
b.maxF = f
return b.nextStep((b.minStep + b.maxStep) / 2)
}
if b.minF <= b.maxF {
b.maxStep = b.currStep
b.maxF = f
} else {
b.minStep = b.currStep
b.minF = f
}
return b.nextStep((b.minStep + b.maxStep) / 2)
}
f = b.lastF
// The function value was lower. Check if this location is sufficient to
// converge the linesearch, otherwise iterate.
if StrongWolfeConditionsMet(f, g, minF, b.initGrad, b.currStep, 0, b.CurvatureFactor) {
b.lastOp = MajorIteration
return b.lastOp, b.currStep, nil
}
if math.IsInf(b.maxStep, 1) {
// The function value is lower. If the gradient is positive, an upper bound
// of the minimum been found. If the gradient is negative, search farther
// in that direction.
if g > 0 {
b.maxStep = b.currStep
b.maxF = f
return b.nextStep((b.minStep + b.maxStep) / 2)
}
b.minStep = b.currStep
b.minF = f
return b.nextStep(b.currStep * 2)
}
// The interval has been bounded, and we have found a new lowest value. Use
// the gradient to decide which direction.
if g < 0 {
b.minStep = b.currStep
b.minF = f
} else {
b.maxStep = b.currStep
b.maxF = f
}
return b.nextStep((b.minStep + b.maxStep) / 2)
}
// nextStep checks if the new step is equal to the old step.
// This can happen if min and max are the same, or if the step size is infinity,
// both of which indicate the minimization must stop. If the steps are different,
// it sets the new step size and returns the evaluation type and the step. If the steps
// are the same, it returns an error.
func (b *Bisection) nextStep(step float64) (Operation, float64, error) {
if b.currStep == step {
b.lastOp = NoOperation
return b.lastOp, b.currStep, ErrLinesearcherFailure
}
b.currStep = step
b.lastOp = FuncEvaluation
return b.lastOp, b.currStep, nil
}

313
optimize/cg.go Normal file
View File

@@ -0,0 +1,313 @@
package optimize
import (
"math"
"github.com/gonum/floats"
)
const (
iterationRestartFactor = 6
angleRestartThreshold = -0.9
)
// CGVariant calculates the scaling parameter, β, used for updating the
// conjugate direction in the nonlinear conjugate gradient (CG) method.
type CGVariant interface {
// Init is called at the first iteration and provides a way to initialize
// any internal state.
Init(loc *Location)
// Beta returns the value of the scaling parameter that is computed
// according to the particular variant of the CG method.
Beta(grad, gradPrev, dirPrev []float64) float64
}
// CG implements the nonlinear conjugate gradient method for solving nonlinear
// unconstrained optimization problems. It is a line search method that
// generates the search directions d_k according to the formula
// d_{k+1} = -∇f_{k+1} + β_k*d_k, d_0 = -∇f_0.
// Variants of the conjugate gradient method differ in the choice of the
// parameter β_k. The conjugate gradient method usually requires fewer function
// evaluations than the gradient descent method and no matrix storage, but
// L-BFGS is usually more efficient.
//
// CG implements a restart strategy that takes the steepest descent direction
// (i.e., d_{k+1} = -∇f_{k+1}) whenever any of the following conditions holds:
//
// - A certain number of iterations has elapsed without a restart. This number
// is controllable via IterationRestartFactor and if equal to 0, it is set to
// a reasonable default based on the problem dimension.
// - The angle between the gradients at two consecutive iterations ∇f_k and
// ∇f_{k+1} is too large.
// - The direction d_{k+1} is not a descent direction.
// - β_k returned from CGVariant.Beta is equal to zero.
//
// The line search for CG must yield step sizes that satisfy the strong Wolfe
// conditions at every iteration, otherwise the generated search direction
// might fail to be a descent direction. The line search should be more
// stringent compared with those for Newton-like methods, which can be achieved
// by setting the gradient constant in the strong Wolfe conditions to a small
// value.
//
// See also William Hager, Hongchao Zhang, A survey of nonlinear conjugate
// gradient methods. Pacific Journal of Optimization, 2 (2006), pp. 35-58, and
// references therein.
type CG struct {
// Linesearcher must satisfy the strong Wolfe conditions at every iteration.
// If Linesearcher == nil, an appropriate default is chosen.
Linesearcher Linesearcher
// Variant implements the particular CG formula for computing β_k.
// If Variant is nil, an appropriate default is chosen.
Variant CGVariant
// InitialStep estimates the initial line search step size, because the CG
// method does not generate well-scaled search directions.
// If InitialStep is nil, an appropriate default is chosen.
InitialStep StepSizer
// IterationRestartFactor determines the frequency of restarts based on the
// problem dimension. The negative gradient direction is taken whenever
// ceil(IterationRestartFactor*(problem dimension)) iterations have elapsed
// without a restart. For medium and large-scale problems
// IterationRestartFactor should be set to 1, low-dimensional problems a
// larger value should be chosen. Note that if the ceil function returns 1,
// CG will be identical to gradient descent.
// If IterationRestartFactor is 0, it will be set to 6.
// CG will panic if IterationRestartFactor is negative.
IterationRestartFactor float64
// AngleRestartThreshold sets the threshold angle for restart. The method
// is restarted if the cosine of the angle between two consecutive
// gradients is smaller than or equal to AngleRestartThreshold, that is, if
// ∇f_k·∇f_{k+1} / (|∇f_k| |∇f_{k+1}|) <= AngleRestartThreshold.
// A value of AngleRestartThreshold closer to -1 (successive gradients in
// exact opposite directions) will tend to reduce the number of restarts.
// If AngleRestartThreshold is 0, it will be set to -0.9.
// CG will panic if AngleRestartThreshold is not in the interval [-1, 0].
AngleRestartThreshold float64
ls *LinesearchMethod
restartAfter int
iterFromRestart int
dirPrev []float64
gradPrev []float64
gradPrevNorm float64
}
func (cg *CG) Init(loc *Location) (Operation, error) {
if cg.IterationRestartFactor < 0 {
panic("cg: IterationRestartFactor is negative")
}
if cg.AngleRestartThreshold < -1 || cg.AngleRestartThreshold > 0 {
panic("cg: AngleRestartThreshold not in [-1, 0]")
}
if cg.Linesearcher == nil {
cg.Linesearcher = &MoreThuente{CurvatureFactor: 0.1}
}
if cg.Variant == nil {
cg.Variant = &HestenesStiefel{}
}
if cg.InitialStep == nil {
cg.InitialStep = &FirstOrderStepSize{}
}
if cg.IterationRestartFactor == 0 {
cg.IterationRestartFactor = iterationRestartFactor
}
if cg.AngleRestartThreshold == 0 {
cg.AngleRestartThreshold = angleRestartThreshold
}
if cg.ls == nil {
cg.ls = &LinesearchMethod{}
}
cg.ls.Linesearcher = cg.Linesearcher
cg.ls.NextDirectioner = cg
return cg.ls.Init(loc)
}
func (cg *CG) Iterate(loc *Location) (Operation, error) {
return cg.ls.Iterate(loc)
}
func (cg *CG) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
cg.restartAfter = int(math.Ceil(cg.IterationRestartFactor * float64(dim)))
cg.iterFromRestart = 0
// The initial direction is always the negative gradient.
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
cg.dirPrev = resize(cg.dirPrev, dim)
copy(cg.dirPrev, dir)
cg.gradPrev = resize(cg.gradPrev, dim)
copy(cg.gradPrev, loc.Gradient)
cg.gradPrevNorm = floats.Norm(loc.Gradient, 2)
cg.Variant.Init(loc)
return cg.InitialStep.Init(loc, dir)
}
func (cg *CG) NextDirection(loc *Location, dir []float64) (stepSize float64) {
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
cg.iterFromRestart++
var restart bool
if cg.iterFromRestart == cg.restartAfter {
// Restart because too many iterations have been taken without a restart.
restart = true
}
gDot := floats.Dot(loc.Gradient, cg.gradPrev)
gNorm := floats.Norm(loc.Gradient, 2)
if gDot <= cg.AngleRestartThreshold*gNorm*cg.gradPrevNorm {
// Restart because the angle between the last two gradients is too large.
restart = true
}
// Compute the scaling factor β_k even when restarting, because cg.Variant
// may be keeping an inner state that needs to be updated at every iteration.
beta := cg.Variant.Beta(loc.Gradient, cg.gradPrev, cg.dirPrev)
if beta == 0 {
// β_k == 0 means that the steepest descent direction will be taken, so
// indicate that the method is in fact being restarted.
restart = true
}
if !restart {
// The method is not being restarted, so update the descent direction.
floats.AddScaled(dir, beta, cg.dirPrev)
if floats.Dot(loc.Gradient, dir) >= 0 {
// Restart because the new direction is not a descent direction.
restart = true
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
}
}
// Get the initial line search step size from the StepSizer even if the
// method was restarted, because StepSizers need to see every iteration.
stepSize = cg.InitialStep.StepSize(loc, dir)
if restart {
// The method was restarted and since the steepest descent direction is
// not related to the previous direction, discard the estimated step
// size from cg.InitialStep and use step size of 1 instead.
stepSize = 1
// Reset to 0 the counter of iterations taken since the last restart.
cg.iterFromRestart = 0
}
copy(cg.gradPrev, loc.Gradient)
copy(cg.dirPrev, dir)
cg.gradPrevNorm = gNorm
return stepSize
}
func (*CG) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, false}
}
// FletcherReeves implements the Fletcher-Reeves variant of the CG method that
// computes the scaling parameter β_k according to the formula
// β_k = |∇f_{k+1}|^2 / |∇f_k|^2.
type FletcherReeves struct {
prevNorm float64
}
func (fr *FletcherReeves) Init(loc *Location) {
fr.prevNorm = floats.Norm(loc.Gradient, 2)
}
func (fr *FletcherReeves) Beta(grad, _, _ []float64) (beta float64) {
norm := floats.Norm(grad, 2)
beta = (norm / fr.prevNorm) * (norm / fr.prevNorm)
fr.prevNorm = norm
return beta
}
// PolakRibierePolyak implements the Polak-Ribiere-Polyak variant of the CG
// method that computes the scaling parameter β_k according to the formula
// β_k = max(0, ∇f_{k+1}·y_k / |∇f_k|^2),
// where y_k = ∇f_{k+1} - ∇f_k.
type PolakRibierePolyak struct {
prevNorm float64
}
func (pr *PolakRibierePolyak) Init(loc *Location) {
pr.prevNorm = floats.Norm(loc.Gradient, 2)
}
func (pr *PolakRibierePolyak) Beta(grad, gradPrev, _ []float64) (beta float64) {
norm := floats.Norm(grad, 2)
dot := floats.Dot(grad, gradPrev)
beta = (norm*norm - dot) / (pr.prevNorm * pr.prevNorm)
pr.prevNorm = norm
return math.Max(0, beta)
}
// HestenesStiefel implements the Hestenes-Stiefel variant of the CG method
// that computes the scaling parameter β_k according to the formula
// β_k = max(0, ∇f_{k+1}·y_k / d_k·y_k),
// where y_k = ∇f_{k+1} - ∇f_k.
type HestenesStiefel struct {
y []float64
}
func (hs *HestenesStiefel) Init(loc *Location) {
hs.y = resize(hs.y, len(loc.Gradient))
}
func (hs *HestenesStiefel) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
floats.SubTo(hs.y, grad, gradPrev)
beta = floats.Dot(grad, hs.y) / floats.Dot(dirPrev, hs.y)
return math.Max(0, beta)
}
// DaiYuan implements the Dai-Yuan variant of the CG method that computes the
// scaling parameter β_k according to the formula
// β_k = |∇f_{k+1}|^2 / d_k·y_k,
// where y_k = ∇f_{k+1} - ∇f_k.
type DaiYuan struct {
y []float64
}
func (dy *DaiYuan) Init(loc *Location) {
dy.y = resize(dy.y, len(loc.Gradient))
}
func (dy *DaiYuan) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
floats.SubTo(dy.y, grad, gradPrev)
norm := floats.Norm(grad, 2)
return norm * norm / floats.Dot(dirPrev, dy.y)
}
// HagerZhang implements the Hager-Zhang variant of the CG method that computes the
// scaling parameter β_k according to the formula
// β_k = (y_k - 2 d_k |y_k|^2/(d_k·y_k))·∇f_{k+1} / (d_k·y_k),
// where y_k = ∇f_{k+1} - ∇f_k.
type HagerZhang struct {
y []float64
}
func (hz *HagerZhang) Init(loc *Location) {
hz.y = resize(hz.y, len(loc.Gradient))
}
func (hz *HagerZhang) Beta(grad, gradPrev, dirPrev []float64) (beta float64) {
floats.SubTo(hz.y, grad, gradPrev)
dirDotY := floats.Dot(dirPrev, hz.y)
gDotY := floats.Dot(grad, hz.y)
gDotDir := floats.Dot(grad, dirPrev)
yNorm := floats.Norm(hz.y, 2)
return (gDotY - 2*gDotDir*yNorm*yNorm/dirDotY) / dirDotY
}

View File

@@ -0,0 +1,141 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lp
import (
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
// TODO(btracey): Have some sort of preprocessing step for helping to fix A to make it
// full rank?
// TODO(btracey): Reduce rows? Get rid of all zeros, places where only one variable
// is there, etc. Could be implemented with a Reduce function.
// TODO(btracey): Provide method of artificial variables for help when problem
// is infeasible?
// TODO(btracey): Add an lp.Solve that solves an LP in non-standard form.
// Convert converts a General-form LP into a standard form LP.
// The general form of an LP is:
// minimize c^T * x
// s.t G * x <= h
// A * x = b
// And the standard form is:
// minimize cNew^T * x
// s.t aNew * x = bNew
// x >= 0
// If there are no constraints of the given type, the inputs may be nil.
func Convert(c []float64, g mat64.Matrix, h []float64, a mat64.Matrix, b []float64) (cNew []float64, aNew *mat64.Dense, bNew []float64) {
nVar := len(c)
nIneq := len(h)
// Check input sizes.
if g == nil {
if nIneq != 0 {
panic(badShape)
}
} else {
gr, gc := g.Dims()
if gr != nIneq {
panic(badShape)
}
if gc != nVar {
panic(badShape)
}
}
nEq := len(b)
if a == nil {
if nEq != 0 {
panic(badShape)
}
} else {
ar, ac := a.Dims()
if ar != nEq {
panic(badShape)
}
if ac != nVar {
panic(badShape)
}
}
// Convert the general form LP.
// Derivation:
// 0. Start with general form
// min. c^T * x
// s.t. G * x <= h
// A * x = b
// 1. Introduce slack variables for each constraint
// min. c^T * x
// s.t. G * x + s = h
// A * x = b
// s >= 0
// 2. Add non-negativity constraints for x by splitting x
// into positive and negative components.
// x = xp - xn
// xp >= 0, xn >= 0
// This makes the LP
// min. c^T * xp - c^T xn
// s.t. G * xp - G * xn + s = h
// A * xp - A * xn = b
// xp >= 0, xn >= 0, s >= 0
// 3. Write the above in standard form:
// xt = [xp
// xn
// s ]
// min. [c^T, -c^T, 0] xt
// s.t. [G, -G, I] xt = h
// [A, -A, 0] xt = b
// x >= 0
// In summary:
// Original LP:
// min. c^T * x
// s.t. G * x <= h
// A * x = b
// Standard Form:
// xt = [xp; xn; s]
// min. [c^T, -c^T, 0] xt
// s.t. [G, -G, I] xt = h
// [A, -A, 0] xt = b
// x >= 0
// New size of x is [xp, xn, s]
nNewVar := nVar + nVar + nIneq
// Construct cNew = [c; -c; 0]
cNew = make([]float64, nNewVar)
copy(cNew, c)
copy(cNew[nVar:], c)
floats.Scale(-1, cNew[nVar:2*nVar])
// New number of equality constraints is the number of total constraints.
nNewEq := nIneq + nEq
// Construct bNew = [h, b].
bNew = make([]float64, nNewEq)
copy(bNew, h)
copy(bNew[nIneq:], b)
// Construct aNew = [G, -G, I; A, -A, 0].
aNew = mat64.NewDense(nNewEq, nNewVar, nil)
if nIneq != 0 {
aView := (aNew.View(0, 0, nIneq, nVar)).(*mat64.Dense)
aView.Copy(g)
aView = (aNew.View(0, nVar, nIneq, nVar)).(*mat64.Dense)
aView.Scale(-1, g)
aView = (aNew.View(0, 2*nVar, nIneq, nIneq)).(*mat64.Dense)
for i := 0; i < nIneq; i++ {
aView.Set(i, i, 1)
}
}
if nEq != 0 {
aView := (aNew.View(nIneq, 0, nEq, nVar)).(*mat64.Dense)
aView.Copy(a)
aView = (aNew.View(nIneq, nVar, nEq, nVar)).(*mat64.Dense)
aView.Scale(-1, a)
}
return cNew, aNew, bNew
}

View File

@@ -0,0 +1,619 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// package lp implements routines for solving linear programs.
package lp
import (
"errors"
"fmt"
"math"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
// TODO(btracey): Could have a solver structure with an abstract factorizer. With
// this transformation the same high-level code could handle both Dense and Sparse.
// TODO(btracey): Need to improve error handling. Only want to panic if condition number inf.
// TODO(btracey): Performance enhancements. There are currently lots of linear
// solves that can be improved by doing rank-one updates. For example, the swap
// step is just a rank-one update.
// TODO(btracey): Better handling on the linear solve errors. If the condition
// number is not inf and the equation solved "well", should keep moving.
var (
ErrBland = errors.New("lp: bland: all replacements are negative or cause ill-conditioned ab")
ErrInfeasible = errors.New("lp: problem is infeasible")
ErrLinSolve = errors.New("lp: linear solve failure")
ErrUnbounded = errors.New("lp: problem is unbounded")
ErrSingular = errors.New("lp: A is singular")
ErrZeroColumn = errors.New("lp: A has a column of all zeros")
ErrZeroRow = errors.New("lp: A has a row of all zeros")
)
var (
badShape = "lp: size mismatch"
)
// TODO(btracey): Should these tolerances be part of a settings struct?
const (
// initPosTol is the tolerance on the initial condition being feasible. Strictly,
// the x should be positive, but instead it must be greater than -initPosTol.
initPosTol = 1e-13
// blandNegTol is the tolerance on the value being greater than 0 in the bland test.
blandNegTol = 1e-14
// rRoundTol is the tolerance for rounding values to zero when testing if
// constraints are met.
rRoundTol = 1e-13
// dRoundTol is the tolerance for testing if values are zero for the problem
// being unbounded.
dRoundTol = 1e-13
// phaseIZeroTol tests if the Phase I problem returned a feasible solution.
phaseIZeroTol = 1e-12
// blandZeroTol is the tolerance on testing if the bland solution can move.
blandZeroTol = 1e-12
)
// Simplex solves a linear program in standard form using Danzig's Simplex
// algorithm. The standard form of a linear program is:
// minimize c^T x
// s.t. A*x = b
// x >= 0 .
// The input tol sets how close to the optimal solution is found (specifically,
// when the maximal reduced cost is below tol). An error will be returned if the
// problem is infeasible or unbounded. In rare cases, numeric errors can cause
// the Simplex to fail. In this case, an error will be returned along with the
// most recently found feasible solution.
//
// The Convert function can be used to transform a general LP into standard form.
//
// The input matrix A must have full rank and may not contain any columns with
// all zeros. Furthermore, len(c) must equal the number of columns of A, and len(b)
// must equal the number of rows of A. Simplex will panic if these conditions are
// not met.
//
// initialBasic can be used to set the initial set of indices for a feasible
// solution to the LP. If an initial feasible solution is not known, initialBasic
// may be nil. If initialBasic is non-nil, len(initialBasic) must equal the number
// of rows of A and must be an actual feasible solution to the LP, otherwise
// Simplex will panic.
//
// A description of the Simplex algorithm can be found in Ch. 8 of
// Strang, Gilbert. "Linear Algebra and Applications." Academic, New York (1976).
// For a detailed video introduction, see lectures 11-13 of UC Math 352
// https://www.youtube.com/watch?v=ESzYPFkY3og&index=11&list=PLh464gFUoJWOmBYla3zbZbc4nv2AXez6X.
func Simplex(c []float64, A mat64.Matrix, b []float64, tol float64, initialBasic []int) (optF float64, optX []float64, err error) {
ans, x, _, err := simplex(initialBasic, c, A, b, tol)
return ans, x, err
}
func simplex(initialBasic []int, c []float64, A mat64.Matrix, b []float64, tol float64) (float64, []float64, []int, error) {
err := verifyInputs(initialBasic, c, A, b)
if err != nil {
if err == ErrUnbounded {
return math.Inf(-1), nil, nil, ErrUnbounded
}
return math.NaN(), nil, nil, err
}
m, n := A.Dims()
// There is at least one optimal solution to the LP which is at the intersection
// to a set of constraint boundaries. For a standard form LP with m variables
// and n equality constraints, at least m-n elements of x must equal zero
// at optimality. The Simplex algorithm solves the standard-form LP by starting
// at an initial constraint vertex and successively moving to adjacent constraint
// vertices. At every vertex, the set of non-zero x values is the "basic
// feasible solution". The list of non-zero x's are maintained in basicIdxs,
// the respective columns of A are in ab, and the actual non-zero values of
// x are in xb.
//
// The LP is equality constrained such that A * x = b. This can be expanded
// to
// ab * xb + an * xn = b
// where ab are the columns of a in the basic set, and an are all of the
// other columns. Since each element of xn is zero by definition, this means
// that for all feasible solutions xb = ab^-1 * b.
//
// Before the simplex algorithm can start, an initial feasible solution must
// be found. If initialBasic is non-nil a feasible solution has been supplied.
// Otherwise the "Phase I" problem must be solved to find an initial feasible
// solution.
var basicIdxs []int // The indices of the non-zero x values.
var ab *mat64.Dense // The subset of columns of A listed in basicIdxs.
var xb []float64 // The non-zero elements of x. xb = ab^-1 b
if initialBasic != nil {
// InitialBasic supplied. Panic if incorrect length or infeasible.
if len(initialBasic) != m {
panic("lp: incorrect number of initial vectors")
}
ab = mat64.NewDense(m, len(initialBasic), nil)
extractColumns(ab, A, initialBasic)
xb = make([]float64, m)
err = initializeFromBasic(xb, ab, b)
if err != nil {
panic(err)
}
basicIdxs = make([]int, len(initialBasic))
copy(basicIdxs, initialBasic)
} else {
// No inital basis supplied. Solve the PhaseI problem.
basicIdxs, ab, xb, err = findInitialBasic(A, b)
if err != nil {
return math.NaN(), nil, nil, err
}
}
// basicIdxs contains the indexes for an initial feasible solution,
// ab contains the extracted columns of A, and xb contains the feasible
// solution. All x not in the basic set are 0 by construction.
// nonBasicIdx is the set of nonbasic variables.
nonBasicIdx := make([]int, 0, n-m)
inBasic := make(map[int]struct{})
for _, v := range basicIdxs {
inBasic[v] = struct{}{}
}
for i := 0; i < n; i++ {
_, ok := inBasic[i]
if !ok {
nonBasicIdx = append(nonBasicIdx, i)
}
}
// cb is the subset of c for the basic variables. an and cn
// are the equivalents to ab and cb but for the nonbasic variables.
cb := make([]float64, len(basicIdxs))
for i, idx := range basicIdxs {
cb[i] = c[idx]
}
cn := make([]float64, len(nonBasicIdx))
for i, idx := range nonBasicIdx {
cn[i] = c[idx]
}
an := mat64.NewDense(m, len(nonBasicIdx), nil)
extractColumns(an, A, nonBasicIdx)
bVec := mat64.NewVector(len(b), b)
cbVec := mat64.NewVector(len(cb), cb)
// Temporary data needed each iteration. (Described later)
r := make([]float64, n-m)
move := make([]float64, m)
// Solve the linear program starting from the initial feasible set. This is
// the "Phase 2" problem.
//
// Algorithm:
// 1) Compute the "reduced costs" for the non-basic variables. The reduced
// costs are the lagrange multipliers of the constraints.
// r = cn - an^T * ab^-T * cb
// 2) If all of the reduced costs are positive, no improvement is possible,
// and the solution is optimal (xn can only increase because of
// non-negativity constraints). Otherwise, the solution can be improved and
// one element will be exchanged in the basic set.
// 3) Choose the x_n with the most negative value of r. Call this value xe.
// This variable will be swapped into the basic set.
// 4) Increase xe until the next constraint boundary is met. This will happen
// when the first element in xb becomes 0. The distance xe can increase before
// a given element in xb becomes negative can be found from
// xb = Ab^-1 b - Ab^-1 An xn
// = Ab^-1 b - Ab^-1 Ae xe
// = bhat + d x_e
// xe = bhat_i / - d_i
// where Ae is the column of A corresponding to xe.
// The constraining basic index is the first index for which this is true,
// so remove the element which is min_i (bhat_i / -d_i), assuming d_i is negative.
// If no d_i is less than 0, then the problem is unbounded.
// 5) If the new xe is 0 (that is, bhat_i == 0), then this location is at
// the intersection of several constraints. Use the Bland rule instead
// of the rule in step 4 to avoid cycling.
for {
// Compute reduced costs -- r = cn - an^T ab^-T cb
var tmp mat64.Vector
err = tmp.SolveVec(ab.T(), cbVec)
if err != nil {
break
}
data := make([]float64, n-m)
tmp2 := mat64.NewVector(n-m, data)
tmp2.MulVec(an.T(), &tmp)
floats.SubTo(r, cn, data)
// Replace the most negative element in the simplex. If there are no
// negative entries then the optimal solution has been found.
minIdx := floats.MinIdx(r)
if r[minIdx] >= -tol {
break
}
for i, v := range r {
if math.Abs(v) < rRoundTol {
r[i] = 0
}
}
// Compute the moving distance.
err = computeMove(move, minIdx, A, ab, xb, nonBasicIdx)
if err != nil {
if err == ErrUnbounded {
return math.Inf(-1), nil, nil, ErrUnbounded
}
break
}
// Replace the basic index along the tightest constraint.
replace := floats.MinIdx(move)
if move[replace] <= 0 {
replace, minIdx, err = replaceBland(A, ab, xb, basicIdxs, nonBasicIdx, r, move)
if err != nil {
if err == ErrUnbounded {
return math.Inf(-1), nil, nil, ErrUnbounded
}
break
}
}
// Replace the constrained basicIdx with the newIdx.
basicIdxs[replace], nonBasicIdx[minIdx] = nonBasicIdx[minIdx], basicIdxs[replace]
cb[replace], cn[minIdx] = cn[minIdx], cb[replace]
tmpCol1 := mat64.Col(nil, replace, ab)
tmpCol2 := mat64.Col(nil, minIdx, an)
ab.SetCol(replace, tmpCol2)
an.SetCol(minIdx, tmpCol1)
// Compute the new xb.
xbVec := mat64.NewVector(len(xb), xb)
err = xbVec.SolveVec(ab, bVec)
if err != nil {
break
}
}
// Found the optimum successfully or died trying. The basic variables get
// their values, and the non-basic variables are all zero.
opt := floats.Dot(cb, xb)
xopt := make([]float64, n)
for i, v := range basicIdxs {
xopt[v] = xb[i]
}
return opt, xopt, basicIdxs, err
}
// computeMove computes how far can be moved replacing each index. The results
// are stored into move.
func computeMove(move []float64, minIdx int, A mat64.Matrix, ab *mat64.Dense, xb []float64, nonBasicIdx []int) error {
// Find ae.
col := mat64.Col(nil, nonBasicIdx[minIdx], A)
aCol := mat64.NewVector(len(col), col)
// d = - Ab^-1 Ae
nb, _ := ab.Dims()
d := make([]float64, nb)
dVec := mat64.NewVector(nb, d)
err := dVec.SolveVec(ab, aCol)
if err != nil {
return ErrLinSolve
}
floats.Scale(-1, d)
for i, v := range d {
if math.Abs(v) < dRoundTol {
d[i] = 0
}
}
// If no di < 0, then problem is unbounded.
if floats.Min(d) >= 0 {
return ErrUnbounded
}
// move = bhat_i / - d_i, assuming d is negative.
bHat := xb // ab^-1 b
for i, v := range d {
if v >= 0 {
move[i] = math.Inf(1)
} else {
move[i] = bHat[i] / math.Abs(v)
}
}
return nil
}
// replaceBland uses the Bland rule to find the indices to swap if the minimum
// move is 0. The indices to be swapped are replace and minIdx (following the
// nomenclature in the main routine).
func replaceBland(A mat64.Matrix, ab *mat64.Dense, xb []float64, basicIdxs, nonBasicIdx []int, r, move []float64) (replace, minIdx int, err error) {
m, _ := A.Dims()
// Use the traditional bland rule, except don't replace a constraint which
// causes the new ab to be singular.
for i, v := range r {
if v > -blandNegTol {
continue
}
minIdx = i
err = computeMove(move, minIdx, A, ab, xb, nonBasicIdx)
if err != nil {
// Either unbounded or something went wrong.
return -1, -1, err
}
replace = floats.MinIdx(move)
if math.Abs(move[replace]) > blandZeroTol {
// Large enough that it shouldn't be a problem
return replace, minIdx, nil
}
// Find a zero index where replacement is non-singular.
biCopy := make([]int, len(basicIdxs))
for replace, v := range move {
if v > blandZeroTol {
continue
}
copy(biCopy, basicIdxs)
biCopy[replace] = nonBasicIdx[minIdx]
abTmp := mat64.NewDense(m, len(biCopy), nil)
extractColumns(abTmp, A, biCopy)
// If the condition number is reasonable, use this index.
if mat64.Cond(abTmp, 1) < 1e16 {
return replace, minIdx, nil
}
}
}
return -1, -1, ErrBland
}
func verifyInputs(initialBasic []int, c []float64, A mat64.Matrix, b []float64) error {
m, n := A.Dims()
if len(c) != n {
panic("lp: c vector incorrect length")
}
if len(b) != m {
panic("lp: b vector incorrect length")
}
if len(c) != n {
panic("lp: c vector incorrect length")
}
if len(initialBasic) != 0 && len(initialBasic) != m {
panic("lp: initialBasic incorrect length")
}
// Do some sanity checks so that ab does not become singular during the
// simplex solution. If the ZeroRow checks are removed then the code for
// finding a set of linearly indepent columns must be improved.
// Check that if a row of A only has zero elements that corresponding
// element in b is zero, otherwise the problem is infeasible.
// Otherwise return ErrZeroRow.
for i := 0; i < m; i++ {
isZero := true
for j := 0; j < n; j++ {
if A.At(i, j) != 0 {
isZero = false
break
}
}
if isZero && b[i] != 0 {
// Infeasible
return ErrInfeasible
} else if isZero {
return ErrZeroRow
}
}
// Check that if a column only has zero elements that the respective C vector
// is positive (otherwise unbounded). Otherwise return ErrZeroColumn.
for j := 0; j < n; j++ {
isZero := true
for i := 0; i < m; i++ {
if A.At(i, j) != 0 {
isZero = false
break
}
}
if isZero && c[j] < 0 {
return ErrUnbounded
} else if isZero {
return ErrZeroColumn
}
}
return nil
}
// initializeFromBasic initializes the basic feasible solution given a set of
// basic indices. It extracts the columns of A specified by basicIdxs and finds
// the x values at that location. These are stored into xb.
//
// If the columns of A are not linearly independent or if the initial set is not
// feasible, an error is returned.
func initializeFromBasic(xb []float64, ab *mat64.Dense, b []float64) error {
m, _ := ab.Dims()
if len(xb) != m {
panic("simplex: bad xb length")
}
xbMat := mat64.NewVector(m, xb)
err := xbMat.SolveVec(ab, mat64.NewVector(m, b))
if err != nil {
return errors.New("lp: subcolumns of A for supplied initial basic singular")
}
// The solve ensures that the equality constraints are met (ab * xb = b).
// Thus, the solution is feasible if and only if all of the x's are positive.
allPos := true
for _, v := range xb {
if v < -initPosTol {
allPos = false
break
}
}
if !allPos {
return errors.New("lp: supplied subcolumns not a feasible solution")
}
return nil
}
// extractColumns copies the columns specified by cols into the columns of dst.
func extractColumns(dst *mat64.Dense, A mat64.Matrix, cols []int) {
r, c := dst.Dims()
ra, _ := A.Dims()
if ra != r {
panic("simplex: row mismatch")
}
if c != len(cols) {
panic("simplex: column mismatch")
}
col := make([]float64, r)
for j, idx := range cols {
mat64.Col(col, idx, A)
dst.SetCol(j, col)
}
}
// findInitialBasic finds an initial basic solution, and returns the basic
// indices, ab, and xb.
func findInitialBasic(A mat64.Matrix, b []float64) ([]int, *mat64.Dense, []float64, error) {
m, n := A.Dims()
basicIdxs := findLinearlyIndependent(A)
if len(basicIdxs) != m {
return nil, nil, nil, ErrSingular
}
// It may be that this linearly independent basis is also a feasible set. If
// so, the Phase I problem can be avoided.
ab := mat64.NewDense(m, len(basicIdxs), nil)
extractColumns(ab, A, basicIdxs)
xb := make([]float64, m)
err := initializeFromBasic(xb, ab, b)
if err == nil {
return basicIdxs, ab, xb, nil
}
// This set was not feasible. Instead the "Phase I" problem must be solved
// to find an initial feasible set of basis.
//
// Method: Construct an LP whose optimal solution is a feasible solution
// to the original LP.
// 1) Introduce an artificial variable x_{n+1}.
// 2) Let x_j be the most negative element of x_b (largest constraint violation).
// 3) Add the artificial variable to A with:
// a_{n+1} = b - \sum_{i in basicIdxs} a_i + a_j
// swap j with n+1 in the basicIdxs.
// 4) Define a new LP:
// minimize x_{n+1}
// subject to [A A_{n+1}][x_1 ... x_{n+1}] = b
// x, x_{n+1} >= 0
// 5) Solve this LP. If x_{n+1} != 0, then the problem is infeasible, otherwise
// the found basis can be used as an initial basis for phase II.
//
// The extra column in Step 3 is defined such that the vector of 1s is an
// initial feasible solution.
// Find the largest constraint violator.
// Compute a_{n+1} = b - \sum{i in basicIdxs}a_i + a_j. j is in basicIDx, so
// instead just subtract the basicIdx columns that are not minIDx.
minIdx := floats.MinIdx(xb)
aX1 := make([]float64, m)
copy(aX1, b)
col := make([]float64, m)
for i, v := range basicIdxs {
if i == minIdx {
continue
}
mat64.Col(col, v, A)
floats.Sub(aX1, col)
}
// Construct the new LP.
// aNew = [A, a_{n+1}]
// bNew = b
// cNew = 1 for x_{n+1}
aNew := mat64.NewDense(m, n+1, nil)
aNew.Copy(A)
aNew.SetCol(n, aX1)
basicIdxs[minIdx] = n // swap minIdx with n in the basic set.
c := make([]float64, n+1)
c[n] = 1
// Solve the Phase I linear program.
_, xOpt, newBasic, err := simplex(basicIdxs, c, aNew, b, 1e-10)
if err != nil {
return nil, nil, nil, errors.New(fmt.Sprintf("lp: error finding feasible basis: %s", err))
}
// The original LP is infeasible if the added variable has non-zero value
// in the optimal solution to the Phase I problem.
if math.Abs(xOpt[n]) > phaseIZeroTol {
return nil, nil, nil, ErrInfeasible
}
// The basis found in Phase I is a feasible solution to the original LP if
// the added variable is not in the basis.
addedIdx := -1
for i, v := range newBasic {
if v == n {
addedIdx = i
}
xb[i] = xOpt[v]
}
if addedIdx == -1 {
extractColumns(ab, A, newBasic)
return newBasic, ab, xb, nil
}
// The value of the added variable is in the basis, but it has a zero value.
// See if exchanging another variable into the basic set finds a feasible
// solution.
basicMap := make(map[int]struct{})
for _, v := range newBasic {
basicMap[v] = struct{}{}
}
var set bool
for i := range xOpt {
if _, inBasic := basicMap[i]; inBasic {
continue
}
newBasic[addedIdx] = i
if set {
mat64.Col(col, i, A)
ab.SetCol(addedIdx, col)
} else {
extractColumns(ab, A, newBasic)
set = true
}
err := initializeFromBasic(xb, ab, b)
if err == nil {
return newBasic, ab, xb, nil
}
}
return nil, nil, nil, ErrInfeasible
}
// findLinearlyIndependnt finds a set of linearly independent columns of A, and
// returns the column indexes of the linearly independent columns.
func findLinearlyIndependent(A mat64.Matrix) []int {
m, n := A.Dims()
idxs := make([]int, 0, m)
columns := mat64.NewDense(m, m, nil)
newCol := make([]float64, m)
// Walk in reverse order because slack variables are typically the last columns
// of A.
for i := n - 1; i >= 0; i-- {
if len(idxs) == m {
break
}
mat64.Col(newCol, i, A)
columns.SetCol(len(idxs), newCol)
if len(idxs) == 0 {
// A column is linearly independent from the null set.
// If all-zero column of A are allowed, this code needs to be adjusted.
idxs = append(idxs, i)
continue
}
if mat64.Cond(columns.View(0, 0, m, len(idxs)+1), 1) > 1e12 {
// Not linearly independent.
continue
}
idxs = append(idxs, i)
}
return idxs
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,29 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lp_test
import (
"fmt"
"log"
"github.com/gonum/matrix/mat64"
"github.com/gonum/optimize/convex/lp"
)
func ExampleSimplex() {
c := []float64{-1, -2, 0, 0}
A := mat64.NewDense(2, 4, []float64{-1, 2, 1, 0, 3, 1, 0, 1})
b := []float64{4, 9}
opt, x, err := lp.Simplex(c, A, b, 0, nil)
if err != nil {
log.Fatal(err)
}
fmt.Printf("opt: %v\n", opt)
fmt.Printf("x: %v\n", x)
// Output:
// opt: -8
// x: [2 3 0 0]
}

6
optimize/doc.go Normal file
View File

@@ -0,0 +1,6 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package optimize implements algorithms for finding the optimum value of functions.
package optimize

72
optimize/errors.go Normal file
View File

@@ -0,0 +1,72 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"errors"
"fmt"
"math"
)
var (
// ErrZeroDimensional signifies an optimization was called with an input of length 0.
ErrZeroDimensional = errors.New("optimize: zero dimensional input")
// ErrLinesearcherFailure signifies that a Linesearcher has iterated too
// many times. This may occur if the gradient tolerance is set too low.
ErrLinesearcherFailure = errors.New("linesearch: failed to converge")
// ErrNonDescentDirection signifies that LinesearchMethod has received a
// search direction from a NextDirectioner in which the function is not
// decreasing.
ErrNonDescentDirection = errors.New("linesearch: non-descent search direction")
// ErrNoProgress signifies that LinesearchMethod cannot make further
// progress because there is no change in location after Linesearcher step
// due to floating-point arithmetic.
ErrNoProgress = errors.New("linesearch: no change in location after Linesearcher step")
// ErrLinesearcherBound signifies that a Linesearcher reached a step that
// lies out of allowed bounds.
ErrLinesearcherBound = errors.New("linesearch: step out of bounds")
)
// ErrFunc is returned when an initial function value is invalid. The error
// state may be either +Inf or NaN. ErrFunc satisfies the error interface.
type ErrFunc float64
func (err ErrFunc) Error() string {
switch {
case math.IsInf(float64(err), 1):
return "optimize: initial function value is infinite"
case math.IsNaN(float64(err)):
return "optimize: initial function value is NaN"
default:
panic("optimize: bad ErrFunc")
}
}
// ErrGrad is returned when an initial gradient is invalid. The error gradient
// may be either ±Inf or NaN. ErrGrad satisfies the error interface.
type ErrGrad struct {
Grad float64 // Grad is the invalid gradient value.
Index int // Index is the position at which the invalid gradient was found.
}
func (err ErrGrad) Error() string {
switch {
case math.IsInf(err.Grad, 0):
return fmt.Sprintf("optimize: initial gradient is infinite at position %d", err.Index)
case math.IsNaN(err.Grad):
return fmt.Sprintf("optimize: initial gradient is NaN at position %d", err.Index)
default:
panic("optimize: bad ErrGrad")
}
}
// List of shared panic strings
var (
badProblem = "optimize: objective function is undefined"
)

View File

@@ -0,0 +1,40 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import "math"
// FunctionConverge tests for the convergence of function values. See comment
// in Settings.
type FunctionConverge struct {
Absolute float64
Relative float64
Iterations int
best float64
iter int
}
func (fc *FunctionConverge) Init(f float64) {
fc.best = f
fc.iter = 0
}
func (fc *FunctionConverge) FunctionConverged(f float64) Status {
if fc.Iterations == 0 {
return NotTerminated
}
maxAbs := math.Max(math.Abs(f), math.Abs(fc.best))
if f < fc.best && fc.best-f > fc.Relative*maxAbs+fc.Absolute {
fc.best = f
fc.iter = 0
return NotTerminated
}
fc.iter++
if fc.iter < fc.Iterations {
return NotTerminated
}
return FunctionConvergence
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,326 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package functions
import "testing"
func TestBeale(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 1},
F: 14.203125,
Gradient: []float64{0, 27.75},
},
{
X: []float64{1, 4},
F: 4624.453125,
Gradient: []float64{8813.25, 6585},
},
}
testFunction(Beale{}, tests, t)
}
func TestBiggsEXP2(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2},
F: 32.26255055084012,
Gradient: []float64{8.308203800550878, -25.32607145221645},
},
}
testFunction(BiggsEXP2{}, tests, t)
}
func TestBiggsEXP3(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2, 1},
F: 1.598844540607779,
Gradient: []float64{1.0633795027631927, -0.5196392672262664, -0.3180919155433357},
},
}
testFunction(BiggsEXP3{}, tests, t)
}
func TestBiggsEXP4(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2, 1, 1},
F: 1.598844540607779,
Gradient: []float64{1.0633795027631927, -0.5196392672262664,
-0.44245622408151464, -0.3180919155433357},
},
}
testFunction(BiggsEXP4{}, tests, t)
}
func TestBiggsEXP5(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2, 1, 1, 1},
F: 13.386420552801937,
Gradient: []float64{-6.54665204477596, 3.5259856535515293,
14.36984212995392, -9.522506150695783, -19.639956134327882},
},
}
testFunction(BiggsEXP5{}, tests, t)
}
func TestBiggsEXP6(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2, 1, 1, 1, 1},
F: 0.77907007565597,
Gradient: []float64{-0.149371887533426, -0.183163468182936,
-1.483958013575642, 1.428277503849742, -0.149371887533426,
-1.483958013575642},
},
}
testFunction(BiggsEXP6{}, tests, t)
}
func TestBox3D(t *testing.T) {
tests := []funcTest{
{
X: []float64{0, 10, 20},
F: 1031.1538106093985,
Gradient: []float64{98.22343149849218, -2.11937420675874, 112.38817362220350},
},
}
testFunction(Box3D{}, tests, t)
}
func TestBrownBadlyScaled(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 1},
F: 999998000003,
Gradient: []float64{-2e+6, -4e-6},
},
}
testFunction(BrownBadlyScaled{}, tests, t)
}
// TODO(vladimir-ch): The minimum of BrownAndDennis is not known accurately
// enough, which would force defaultGradTol to be unnecessarily large for the
// tests to pass. This is the only function that causes problems, so disable
// this test until the minimum is more accurate.
// func TestBrownAndDennis(t *testing.T) {
// tests := []funcTest{
// {
// X: []float64{25, 5, -5, -1},
// F: 7926693.33699744,
// Gradient: []float64{1149322.836365895, 1779291.674339785, -254579.585463521, -173400.429253115},
// },
// }
// testFunction(BrownAndDennis{}, tests, t)
// }
func TestExtendedPowellSingular(t *testing.T) {
tests := []funcTest{
{
X: []float64{3, -1, 0, 3},
F: 95,
Gradient: []float64{-14, -144, -22, 30},
},
{
X: []float64{3, -1, 0, 3, 3, -1, 0, 3},
F: 190,
Gradient: []float64{-14, -144, -22, 30, -14, -144, -22, 30},
},
}
testFunction(ExtendedPowellSingular{}, tests, t)
}
func TestExtendedRosenbrock(t *testing.T) {
tests := []funcTest{
{
X: []float64{-1.2, 1},
F: 24.2,
Gradient: []float64{-215.6, -88},
},
{
X: []float64{-1.2, 1, -1.2},
F: 508.2,
Gradient: []float64{-215.6, 792, -440},
},
{
X: []float64{-1.2, 1, -1.2, 1},
F: 532.4,
Gradient: []float64{-215.6, 792, -655.6, -88},
},
}
testFunction(ExtendedRosenbrock{}, tests, t)
}
func TestGaussian(t *testing.T) {
tests := []funcTest{
{
X: []float64{0.4, 1, 0},
F: 3.88810699116688e-06,
Gradient: []float64{7.41428466839991e-03, -7.44126392165149e-04, -5.30189685421989e-20},
},
}
testFunction(Gaussian{}, tests, t)
}
func TestGulfResearchAndDevelopment(t *testing.T) {
tests := []funcTest{
{
X: []float64{5, 2.5, 0.15},
F: 12.11070582556949,
Gradient: []float64{2.0879783574289799, 0.0345792619697154, -39.6766801029386400},
},
}
testFunction(GulfResearchAndDevelopment{}, tests, t)
}
func TestHelicalValley(t *testing.T) {
tests := []funcTest{
{
X: []float64{-1, 0, 0},
F: 2500,
Gradient: []float64{0, -1.59154943091895e+03, -1e+03},
},
}
testFunction(HelicalValley{}, tests, t)
}
func TestPenaltyI(t *testing.T) {
tests := []funcTest{
{
X: []float64{1, 2, 3, 4},
F: 885.06264,
Gradient: []float64{119, 238.00002, 357.00004, 476.00006},
},
{
X: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
F: 148032.56535,
Gradient: []float64{1539, 3078.00002, 4617.00004, 6156.00006,
7695.00008, 9234.0001, 10773.00012, 12312.00014, 13851.00016, 15390.00018},
},
}
testFunction(PenaltyI{}, tests, t)
}
func TestPenaltyII(t *testing.T) {
tests := []funcTest{
{
X: []float64{0.5, 0.5, 0.5, 0.5},
F: 2.34000880546302,
Gradient: []float64{12.59999952896435, 8.99999885134508,
5.99999776830493, 2.99999875380719},
},
{
X: []float64{0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5},
F: 162.65277656596712,
Gradient: []float64{255.5999995289644, 229.4999988513451,
203.9999977683049, 178.4999965713605, 152.9999952485322,
127.4999937865809, 101.9999921708749, 76.4999903852436,
50.9999884118158, 25.4999938418451},
},
}
testFunction(PenaltyII{}, tests, t)
}
func TestPowelBadlyScaled(t *testing.T) {
tests := []funcTest{
{
X: []float64{0, 1},
F: 1.13526171734838,
Gradient: []float64{-2.00007355588823e+04, -2.70596990584991e-01},
},
}
testFunction(PowellBadlyScaled{}, tests, t)
}
func TestTrigonometric(t *testing.T) {
tests := []funcTest{
{
X: []float64{0.5, 0.5},
F: 0.0126877761614045,
Gradient: []float64{-0.00840962732040673, -0.09606967736232540},
},
{
X: []float64{0.2, 0.2, 0.2, 0.2, 0.2},
F: 0.0116573789904718,
Gradient: []float64{0.04568602319608119, -0.00896259022885634,
-0.04777056509084983, -0.07073790138989976, -0.07786459912600564},
},
{
X: []float64{0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1},
F: 0.00707575946622261,
Gradient: []float64{0.03562782195259399, 0.01872017956076182,
0.00380754216611998, -0.00911009023133202, -0.02003271763159338,
-0.02896034003466506, -0.03589295744054654, -0.04083056984923782,
-0.04377317726073873, -0.04472077967504980},
},
}
testFunction(Trigonometric{}, tests, t)
}
func TestVariablyDimensioned(t *testing.T) {
tests := []funcTest{
{
X: []float64{0.5, 0},
F: 46.5625,
Gradient: []float64{-68.5, -137},
},
{
X: []float64{2.0 / 3, 1.0 / 3, 0},
F: 497.60493827160514,
Gradient: []float64{-416.518518518519, -833.037037037037, -1249.555555555556},
},
{
X: []float64{0.75, 0.5, 0.25, 0},
F: 3222.1875,
Gradient: []float64{-1703, -3406, -5109, -6812},
},
}
testFunction(VariablyDimensioned{}, tests, t)
}
func TestWatson(t *testing.T) {
tests := []funcTest{
{
X: []float64{0, 0},
F: 30,
Gradient: []float64{0, -60},
},
{
X: []float64{0, 0, 0, 0, 0, 0},
F: 30,
Gradient: []float64{0, -60, -60, -61.034482758620697,
-62.068965517241381, -63.114928861371936},
},
{
X: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0},
F: 30,
Gradient: []float64{0, -60, -60, -61.034482758620697,
-62.068965517241381, -63.114928861371936, -64.172372791012350,
-65.241283655050239, -66.321647802373235},
},
{
X: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
F: 30,
Gradient: []float64{0, -60, -60, -61.034482758620697,
-62.068965517241381, -63.114928861371936, -64.172372791012350,
-65.241283655050239, -66.321647802373235, -67.413448880864095,
-68.516667837400661, -69.631282933991471},
},
}
testFunction(Watson{}, tests, t)
}
func TestWood(t *testing.T) {
tests := []funcTest{
{
X: []float64{-3, -1, -3, -1},
F: 19192,
Gradient: []float64{-12008, -2080, -10808, -1880},
},
}
testFunction(Wood{}, tests, t)
}

View File

@@ -0,0 +1,252 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package functions
import (
"fmt"
"math"
)
// MinimalSurface implements a finite element approximation to a minimal
// surface problem: determine the surface with minimal area and given boundary
// values in a unit square centered at the origin.
//
// References:
// Averick, M.B., Carter, R.G., Moré, J.J., Xue, G.-L.: The Minpack-2 Test
// Problem Collection. Preprint MCS-P153-0692, Argonne National Laboratory (1992)
type MinimalSurface struct {
bottom, top []float64
left, right []float64
origin, step [2]float64
}
// NewMinimalSurface creates a new discrete minimal surface problem and
// precomputes its boundary values. The problem is discretized on a rectilinear
// grid with nx×ny nodes which means that the problem dimension is (nx-2)(ny-2).
func NewMinimalSurface(nx, ny int) *MinimalSurface {
ms := &MinimalSurface{
bottom: make([]float64, nx),
top: make([]float64, nx),
left: make([]float64, ny),
right: make([]float64, ny),
origin: [2]float64{-0.5, -0.5},
step: [2]float64{1 / float64(nx-1), 1 / float64(ny-1)},
}
ms.initBoundary(ms.bottom, ms.origin[0], ms.origin[1], ms.step[0], 0)
startY := ms.origin[1] + float64(ny-1)*ms.step[1]
ms.initBoundary(ms.top, ms.origin[0], startY, ms.step[0], 0)
ms.initBoundary(ms.left, ms.origin[0], ms.origin[1], 0, ms.step[1])
startX := ms.origin[0] + float64(nx-1)*ms.step[0]
ms.initBoundary(ms.right, startX, ms.origin[1], 0, ms.step[1])
return ms
}
// Func returns the area of the surface represented by the vector x.
func (ms *MinimalSurface) Func(x []float64) (area float64) {
nx, ny := ms.Dims()
if len(x) != (nx-2)*(ny-2) {
panic("functions: problem size mismatch")
}
hx, hy := ms.Steps()
for j := 0; j < ny-1; j++ {
for i := 0; i < nx-1; i++ {
vLL := ms.at(i, j, x)
vLR := ms.at(i+1, j, x)
vUL := ms.at(i, j+1, x)
vUR := ms.at(i+1, j+1, x)
dvLdx := (vLR - vLL) / hx
dvLdy := (vUL - vLL) / hy
dvUdx := (vUR - vUL) / hx
dvUdy := (vUR - vLR) / hy
fL := math.Sqrt(1 + dvLdx*dvLdx + dvLdy*dvLdy)
fU := math.Sqrt(1 + dvUdx*dvUdx + dvUdy*dvUdy)
area += fL + fU
}
}
area *= 0.5 * hx * hy
return area
}
// Grad evaluates the area gradient of the surface represented by the vector.
func (ms *MinimalSurface) Grad(grad, x []float64) {
nx, ny := ms.Dims()
if len(x) != (nx-2)*(ny-2) {
panic("functions: problem size mismatch")
}
if grad != nil && len(x) != len(grad) {
panic("functions: unexpected size mismatch")
}
for i := range grad {
grad[i] = 0
}
hx, hy := ms.Steps()
for j := 0; j < ny-1; j++ {
for i := 0; i < nx-1; i++ {
vLL := ms.at(i, j, x)
vLR := ms.at(i+1, j, x)
vUL := ms.at(i, j+1, x)
vUR := ms.at(i+1, j+1, x)
dvLdx := (vLR - vLL) / hx
dvLdy := (vUL - vLL) / hy
dvUdx := (vUR - vUL) / hx
dvUdy := (vUR - vLR) / hy
fL := math.Sqrt(1 + dvLdx*dvLdx + dvLdy*dvLdy)
fU := math.Sqrt(1 + dvUdx*dvUdx + dvUdy*dvUdy)
if grad != nil {
if i > 0 {
if j > 0 {
grad[ms.index(i, j)] -= (dvLdx/hx + dvLdy/hy) / fL
}
if j < ny-2 {
grad[ms.index(i, j+1)] += (dvLdy/hy)/fL - (dvUdx/hx)/fU
}
}
if i < nx-2 {
if j > 0 {
grad[ms.index(i+1, j)] += (dvLdx/hx)/fL - (dvUdy/hy)/fU
}
if j < ny-2 {
grad[ms.index(i+1, j+1)] += (dvUdx/hx + dvUdy/hy) / fU
}
}
}
}
}
cellSize := 0.5 * hx * hy
for i := range grad {
grad[i] *= cellSize
}
}
// InitX returns a starting location for the minimization problem. Length of
// the returned slice is (nx-2)(ny-2).
func (ms *MinimalSurface) InitX() []float64 {
nx, ny := ms.Dims()
x := make([]float64, (nx-2)*(ny-2))
for j := 1; j < ny-1; j++ {
for i := 1; i < nx-1; i++ {
x[ms.index(i, j)] = (ms.left[j] + ms.bottom[i]) / 2
}
}
return x
}
// ExactX returns the exact solution to the _continuous_ minimization problem
// projected on the interior nodes of the grid. Length of the returned slice is
// (nx-2)(ny-2).
func (ms *MinimalSurface) ExactX() []float64 {
nx, ny := ms.Dims()
v := make([]float64, (nx-2)*(ny-2))
for j := 1; j < ny-1; j++ {
for i := 1; i < nx-1; i++ {
v[ms.index(i, j)] = ms.ExactSolution(ms.x(i), ms.y(j))
}
}
return v
}
// ExactSolution returns the value of the exact solution to the minimal surface
// problem at (x,y). The exact solution is
// F_exact(x,y) = U^2(x,y) - V^2(x,y),
// where U and V are the unique solutions to the equations
// x = u + uv^2 - u^3/3,
// y = -v - u^2v + v^3/3.
func (ms *MinimalSurface) ExactSolution(x, y float64) float64 {
var u = [2]float64{x, -y}
var f [2]float64
var jac [2][2]float64
for k := 0; k < 100; k++ {
f[0] = u[0] + u[0]*u[1]*u[1] - u[0]*u[0]*u[0]/3 - x
f[1] = -u[1] - u[0]*u[0]*u[1] + u[1]*u[1]*u[1]/3 - y
fNorm := math.Hypot(f[0], f[1])
if fNorm < 1e-13 {
break
}
jac[0][0] = 1 + u[1]*u[1] - u[0]*u[0]
jac[0][1] = 2 * u[0] * u[1]
jac[1][0] = -2 * u[0] * u[1]
jac[1][1] = -1 - u[0]*u[0] + u[1]*u[1]
det := jac[0][0]*jac[1][1] - jac[0][1]*jac[1][0]
u[0] -= (jac[1][1]*f[0] - jac[0][1]*f[1]) / det
u[1] -= (jac[0][0]*f[1] - jac[1][0]*f[0]) / det
}
return u[0]*u[0] - u[1]*u[1]
}
// Dims returns the size of the underlying rectilinear grid.
func (ms *MinimalSurface) Dims() (nx, ny int) {
return len(ms.bottom), len(ms.left)
}
// Steps returns the spatial step sizes of the underlying rectilinear grid.
func (ms *MinimalSurface) Steps() (hx, hy float64) {
return ms.step[0], ms.step[1]
}
func (ms *MinimalSurface) x(i int) float64 {
return ms.origin[0] + float64(i)*ms.step[0]
}
func (ms *MinimalSurface) y(j int) float64 {
return ms.origin[1] + float64(j)*ms.step[1]
}
func (ms *MinimalSurface) at(i, j int, x []float64) float64 {
nx, ny := ms.Dims()
if i < 0 || i >= nx {
panic(fmt.Sprintf("node [%v,%v] not on grid", i, j))
}
if j < 0 || j >= ny {
panic(fmt.Sprintf("node [%v,%v] not on grid", i, j))
}
if i == 0 {
return ms.left[j]
}
if j == 0 {
return ms.bottom[i]
}
if i == nx-1 {
return ms.right[j]
}
if j == ny-1 {
return ms.top[i]
}
return x[ms.index(i, j)]
}
// index maps an interior grid node (i, j) to a one-dimensional index and
// returns it.
func (ms *MinimalSurface) index(i, j int) int {
nx, ny := ms.Dims()
if i <= 0 || i >= nx-1 {
panic(fmt.Sprintf("[%v,%v] is not an interior node", i, j))
}
if j <= 0 || j >= ny-1 {
panic(fmt.Sprintf("[%v,%v] is not an interior node", i, j))
}
return i - 1 + (j-1)*(nx-2)
}
// initBoundary initializes with the exact solution the boundary b whose i-th
// element b[i] is located at [startX+i×hx, startY+i×hy].
func (ms *MinimalSurface) initBoundary(b []float64, startX, startY, hx, hy float64) {
for i := range b {
x := startX + float64(i)*hx
y := startY + float64(i)*hy
b[i] = ms.ExactSolution(x, y)
}
}

View File

@@ -0,0 +1,48 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package functions
import (
"math"
"testing"
"github.com/gonum/diff/fd"
"github.com/gonum/floats"
)
func TestMinimalSurface(t *testing.T) {
for _, size := range [][2]int{
{20, 30},
{30, 30},
{50, 40},
} {
f := NewMinimalSurface(size[0], size[1])
x0 := f.InitX()
grad := make([]float64, len(x0))
f.Grad(grad, x0)
fdGrad := fd.Gradient(nil, f.Func, x0, &fd.Settings{Formula: fd.Central})
// Test that the numerical and analytical gradients agree.
dist := floats.Distance(grad, fdGrad, math.Inf(1))
if dist > 1e-9 {
t.Errorf("grid %v x %v: numerical and analytical gradient do not match. |fdGrad - grad|_∞ = %v",
size[0], size[1], dist)
}
// Test that the gradient at the minimum is small enough.
// In some sense this test is not completely correct because ExactX
// returns the exact solution to the continuous problem projected on the
// grid, not the exact solution to the discrete problem which we are
// solving. This is the reason why a relatively loose tolerance 1e-4
// must be used.
xSol := f.ExactX()
f.Grad(grad, xSol)
norm := floats.Norm(grad, math.Inf(1))
if norm > 1e-4 {
t.Errorf("grid %v x %v: gradient at the minimum not small enough. |grad|_∞ = %v",
size[0], size[1], norm)
}
}
}

View File

@@ -0,0 +1,133 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package functions provides objective functions for testing optimization
// algorithms.
//
// We encourage outside contributions of additional test functions that exhibit
// properties not already covered in the testing suite or that have
// significance due to prior use as benchmark cases.
package functions
import (
"math"
"testing"
"github.com/gonum/diff/fd"
"github.com/gonum/floats"
)
// function represents an objective function.
type function interface {
Func(x []float64) float64
}
type gradient interface {
Grad(grad, x []float64)
}
// minimumer is an objective function that can also provide information about
// its minima.
type minimumer interface {
function
// Minima returns _known_ minima of the function.
Minima() []Minimum
}
// Minimum represents information about an optimal location of a function.
type Minimum struct {
// X is the location of the minimum. X may not be nil.
X []float64
// F is the value of the objective function at X.
F float64
// Global indicates if the location is a global minimum.
Global bool
}
type funcTest struct {
X []float64
// F is the expected function value at X.
F float64
// Gradient is the expected gradient at X. If nil, it is not evaluated.
Gradient []float64
}
// TODO(vladimir-ch): Decide and implement an exported testing function:
// func Test(f Function, ??? ) ??? {
// }
const (
defaultTol = 1e-12
defaultGradTol = 1e-9
defaultFDGradTol = 1e-5
)
// testFunction checks that the function can evaluate itself (and its gradient)
// correctly.
func testFunction(f function, ftests []funcTest, t *testing.T) {
// Make a copy of tests because we may append to the slice.
tests := make([]funcTest, len(ftests))
copy(tests, ftests)
// Get information about the function.
fMinima, isMinimumer := f.(minimumer)
fGradient, isGradient := f.(gradient)
// If the function is a Minimumer, append its minima to the tests.
if isMinimumer {
for _, minimum := range fMinima.Minima() {
// Allocate gradient only if the function can evaluate it.
var grad []float64
if isGradient {
grad = make([]float64, len(minimum.X))
}
tests = append(tests, funcTest{
X: minimum.X,
F: minimum.F,
Gradient: grad,
})
}
}
for i, test := range tests {
F := f.Func(test.X)
// Check that the function value is as expected.
if math.Abs(F-test.F) > defaultTol {
t.Errorf("Test #%d: function value given by Func is incorrect. Want: %v, Got: %v",
i, test.F, F)
}
if test.Gradient == nil {
continue
}
// Evaluate the finite difference gradient.
fdGrad := fd.Gradient(nil, f.Func, test.X, &fd.Settings{
Formula: fd.Central,
Step: 1e-6,
})
// Check that the finite difference and expected gradients match.
if !floats.EqualApprox(fdGrad, test.Gradient, defaultFDGradTol) {
dist := floats.Distance(fdGrad, test.Gradient, math.Inf(1))
t.Errorf("Test #%d: numerical and expected gradients do not match. |fdGrad - WantGrad|_∞ = %v",
i, dist)
}
// If the function is a Gradient, check that it computes the gradient correctly.
if isGradient {
grad := make([]float64, len(test.Gradient))
fGradient.Grad(grad, test.X)
if !floats.EqualApprox(grad, test.Gradient, defaultGradTol) {
dist := floats.Distance(grad, test.Gradient, math.Inf(1))
t.Errorf("Test #%d: gradient given by Grad is incorrect. |grad - WantGrad|_∞ = %v",
i, dist)
}
}
}
}

240
optimize/global.go Normal file
View File

@@ -0,0 +1,240 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"sync"
"time"
)
// GlobalMethod is a global optimizer. Typically will require more function
// evaluations and no sense of local convergence
type GlobalMethod interface {
// Global tells method the max number of tasks, method returns how many it wants.
// This is needed to sync the Global goroutines and inside goroutines.
InitGlobal(dim, tasks int) int
// Global method may assume that the same task id always has the same pointer with it.
IterateGlobal(task int, loc *Location) (Operation, error)
Needser
// Done communicates to the optimization method that the optimization has
// concluded to allow for shutdown.
Done()
}
// Global uses a global optimizer to search for the gloabl minimum of a
// function. A maximization problem can be transformed into a
// minimization problem by multiplying the function by -1.
//
// The first argument represents the problem to be minimized. Its fields are
// routines that evaluate the objective function, gradient, and other
// quantities related to the problem. The objective function, p.Func, must not
// be nil. The optimization method used may require other fields to be non-nil
// as specified by method.Needs. Global will panic if these are not met. The
// method can be determined automatically from the supplied problem which is
// described below.
//
// If p.Status is not nil, it is called before every evaluation. If the
// returned Status is not NotTerminated or the error is not nil, the
// optimization run is terminated.
//
// The third argument contains the settings for the minimization. The
// DefaultGlobalSettings function can be called for a Settings struct with the
// default values initialized. If settings == nil, the default settings are used.
// Global optimization methods typically do not make assumptions about the number
// and location of local minima. Thus, the only convergence metric used is the
// function values found at major iterations of the optimization. Bounds on the
// length of optimization are obeyed, such as the number of allowed function
// evaluations.
//
// The final argument is the optimization method to use. If method == nil, then
// an appropriate default is chosen based on the properties of the other arguments
// (dimension, gradient-free or gradient-based, etc.).
//
// If method implements Statuser, method.Status is called before every call
// to method.Iterate. If the returned Status is not NotTerminated or the
// error is non-nil, the optimization run is terminated.
//
// Global returns a Result struct and any error that occurred. See the
// documentation of Result for more information.
//
// Be aware that the default behavior of Global is to find the minimum.
// For certain functions and optimization methods, this process can take many
// function evaluations. If you would like to put limits on this, for example
// maximum runtime or maximum function evaluations, modify the Settings
// input struct.
//
// Something about Global cannot guarantee strict bounds on function evaluations,
// iterations, etc. in the precense of concurrency.
func Global(p Problem, dim int, settings *Settings, method GlobalMethod) (*Result, error) {
startTime := time.Now()
if method == nil {
method = &GuessAndCheck{}
}
if settings == nil {
settings = DefaultSettingsGlobal()
}
stats := &Stats{}
err := checkOptimization(p, dim, method, settings.Recorder)
if err != nil {
return nil, err
}
optLoc := newLocation(dim, method)
optLoc.F = math.Inf(1)
if settings.FunctionConverge != nil {
settings.FunctionConverge.Init(optLoc.F)
}
stats.Runtime = time.Since(startTime)
// Send initial location to Recorder
if settings.Recorder != nil {
err = settings.Recorder.Record(optLoc, InitIteration, stats)
if err != nil {
return nil, err
}
}
// Run optimization
var status Status
status, err = minimizeGlobal(&p, method, settings, stats, optLoc, startTime)
// Cleanup and collect results
if settings.Recorder != nil && err == nil {
err = settings.Recorder.Record(optLoc, PostIteration, stats)
}
stats.Runtime = time.Since(startTime)
return &Result{
Location: *optLoc,
Stats: *stats,
Status: status,
}, err
}
func minimizeGlobal(p *Problem, method GlobalMethod, settings *Settings, stats *Stats, optLoc *Location, startTime time.Time) (status Status, err error) {
dim := len(optLoc.X)
statuser, _ := method.(Statuser)
gs := &globalStatus{
mux: &sync.RWMutex{},
stats: stats,
status: NotTerminated,
p: p,
startTime: startTime,
optLoc: optLoc,
settings: settings,
statuser: statuser,
}
nTasks := settings.Concurrent
nTasks = method.InitGlobal(dim, nTasks)
// Launch optimization workers
var wg sync.WaitGroup
for task := 0; task < nTasks; task++ {
wg.Add(1)
go func(task int) {
defer wg.Done()
loc := newLocation(dim, method)
x := make([]float64, dim)
globalWorker(task, method, gs, loc, x)
}(task)
}
wg.Wait()
method.Done()
return gs.status, gs.err
}
type globalStatus struct {
mux *sync.RWMutex
stats *Stats
status Status
p *Problem
startTime time.Time
optLoc *Location
settings *Settings
method GlobalMethod
statuser Statuser
err error
}
func globalWorker(task int, m GlobalMethod, g *globalStatus, loc *Location, x []float64) {
for {
// Find Evaluation location
op, err := m.IterateGlobal(task, loc)
if err != nil {
// TODO(btracey): Figure out how to handle errors properly. Shut
// everything down? Pass to globalStatus so it can shut everything down?
g.mux.Lock()
g.err = err
g.status = Failure
g.mux.Unlock()
break
}
// Evaluate location and/or update stats.
status := g.globalOperation(op, loc, x)
if status != NotTerminated {
break
}
}
}
// globalOperation updates handles the status received by an individual worker.
// It uses a mutex to protect updates where necessary.
func (g *globalStatus) globalOperation(op Operation, loc *Location, x []float64) Status {
// Do a quick check to see if one of the other workers converged in the meantime.
var status Status
var err error
g.mux.RLock()
status = g.status
g.mux.RUnlock()
if status != NotTerminated {
return status
}
switch op {
case NoOperation:
case InitIteration:
panic("optimize: Method returned InitIteration")
case PostIteration:
panic("optimize: Method returned PostIteration")
case MajorIteration:
g.mux.Lock()
g.stats.MajorIterations++
copyLocation(g.optLoc, loc)
g.mux.Unlock()
g.mux.RLock()
status = checkConvergence(g.optLoc, g.settings, false)
g.mux.RUnlock()
default: // Any of the Evaluation operations.
status, err = evaluate(g.p, loc, op, x)
g.mux.Lock()
updateStats(g.stats, op)
g.mux.Unlock()
}
g.mux.Lock()
status, err = iterCleanup(status, err, g.stats, g.settings, g.statuser, g.startTime, loc, op)
// Update the termination status if it hasn't already terminated.
if g.status == NotTerminated {
g.status = status
g.err = err
}
g.mux.Unlock()
return status
}
func DefaultSettingsGlobal() *Settings {
return &Settings{
FunctionThreshold: math.Inf(-1),
FunctionConverge: &FunctionConverge{
Absolute: 1e-10,
Iterations: 100,
},
}
}

View File

@@ -0,0 +1,63 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import "github.com/gonum/floats"
// GradientDescent implements the steepest descent optimization method that
// performs successive steps along the direction of the negative gradient.
type GradientDescent struct {
// Linesearcher selects suitable steps along the descent direction.
// If Linesearcher is nil, a reasonable default will be chosen.
Linesearcher Linesearcher
// StepSizer determines the initial step size along each direction.
// If StepSizer is nil, a reasonable default will be chosen.
StepSizer StepSizer
ls *LinesearchMethod
}
func (g *GradientDescent) Init(loc *Location) (Operation, error) {
if g.Linesearcher == nil {
g.Linesearcher = &Backtracking{}
}
if g.StepSizer == nil {
g.StepSizer = &QuadraticStepSize{}
}
if g.ls == nil {
g.ls = &LinesearchMethod{}
}
g.ls.Linesearcher = g.Linesearcher
g.ls.NextDirectioner = g
return g.ls.Init(loc)
}
func (g *GradientDescent) Iterate(loc *Location) (Operation, error) {
return g.ls.Iterate(loc)
}
func (g *GradientDescent) InitDirection(loc *Location, dir []float64) (stepSize float64) {
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
return g.StepSizer.Init(loc, dir)
}
func (g *GradientDescent) NextDirection(loc *Location, dir []float64) (stepSize float64) {
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
return g.StepSizer.StepSize(loc, dir)
}
func (*GradientDescent) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, false}
}

60
optimize/guessandcheck.go Normal file
View File

@@ -0,0 +1,60 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"sync"
"github.com/gonum/stat/distmv"
)
// GuessAndCheck is a global optimizer that evaluates the function at random
// locations. Not a good optimizer, but useful for comparison and debugging.
type GuessAndCheck struct {
Rander distmv.Rander
eval []bool
mux *sync.Mutex
bestF float64
bestX []float64
}
func (g *GuessAndCheck) Needs() struct{ Gradient, Hessian bool } {
return struct{ Gradient, Hessian bool }{false, false}
}
func (g *GuessAndCheck) Done() {
// No cleanup needed
}
func (g *GuessAndCheck) InitGlobal(dim, tasks int) int {
g.eval = make([]bool, tasks)
g.bestF = math.Inf(1)
g.bestX = resize(g.bestX, dim)
g.mux = &sync.Mutex{}
return tasks
}
func (g *GuessAndCheck) IterateGlobal(task int, loc *Location) (Operation, error) {
// Task is true if it contains a new function evaluation.
if g.eval[task] {
g.eval[task] = false
g.mux.Lock()
if loc.F < g.bestF {
g.bestF = loc.F
copy(g.bestX, loc.X)
} else {
loc.F = g.bestF
copy(loc.X, g.bestX)
}
g.mux.Unlock()
return MajorIteration, nil
}
g.eval[task] = true
g.Rander.Rand(loc.X)
return FuncEvaluation, nil
}

View File

@@ -0,0 +1,34 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"testing"
"github.com/gonum/matrix/mat64"
"github.com/gonum/optimize/functions"
"github.com/gonum/stat/distmv"
)
func TestGuessAndCheck(t *testing.T) {
dim := 3000
problem := Problem{
Func: functions.ExtendedRosenbrock{}.Func,
}
mu := make([]float64, dim)
sigma := mat64.NewSymDense(dim, nil)
for i := 0; i < dim; i++ {
sigma.SetSym(i, i, 1)
}
d, ok := distmv.NewNormal(mu, sigma, nil)
if !ok {
panic("bad test")
}
Global(problem, dim, nil, &GuessAndCheck{Rander: d})
settings := DefaultSettingsGlobal()
settings.Concurrent = 5
settings.MajorIterations = 15
Global(problem, dim, settings, &GuessAndCheck{Rander: d})
}

130
optimize/interfaces.go Normal file
View File

@@ -0,0 +1,130 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
// A Method can optimize an objective function.
//
// It uses a reverse-communication interface between the optimization method
// and the caller. Method acts as a client that asks the caller to perform
// needed operations via Operation returned from Init and Iterate methods.
// This provides independence of the optimization algorithm on user-supplied
// data and their representation, and enables automation of common operations
// like checking for (various types of) convergence and maintaining statistics.
//
// A Method can command an Evaluation, a MajorIteration or NoOperation operations.
//
// An evaluation operation is one or more of the Evaluation operations
// (FuncEvaluation, GradEvaluation, etc.) which can be combined with
// the bitwise or operator. In an evaluation operation, the requested fields of
// Problem will be evaluated at the point specified in Location.X.
// The corresponding fields of Location will be filled with the results that
// can be retrieved upon the next call to Iterate. The Method interface
// requires that entries of Location are not modified aside from the commanded
// evaluations. Thus, the type implementing Method may use multiple Operations
// to set the Location fields at a particular x value.
//
// Instead of an Evaluation, a Method may declare MajorIteration. In
// a MajorIteration, the values in the fields of Location are treated as
// a potential optimizer. The convergence of the optimization routine
// (GradientThreshold, etc.) is checked at this new best point. In
// a MajorIteration, the fields of Location must be valid and consistent.
//
// A Method must not return InitIteration and PostIteration operations. These are
// reserved for the clients to be passed to Recorders. A Method must also not
// combine the Evaluation operations with the Iteration operations.
type Method interface {
// Init initializes the method based on the initial data in loc, updates it
// and returns the first operation to be carried out by the caller.
// The initial location must be valid as specified by Needs.
Init(loc *Location) (Operation, error)
// Iterate retrieves data from loc, performs one iteration of the method,
// updates loc and returns the next operation.
Iterate(loc *Location) (Operation, error)
Needser
}
type Needser interface {
// Needs specifies information about the objective function needed by the
// optimizer beyond just the function value. The information is used
// internally for initialization and must match evaluation types returned
// by Init and Iterate during the optimization process.
Needs() struct {
Gradient bool
Hessian bool
}
}
// Statuser can report the status and any error. It is intended for methods as
// an additional error reporting mechanism apart from the errors returned from
// Init and Iterate.
type Statuser interface {
Status() (Status, error)
}
// Linesearcher is a type that can perform a line search. It tries to find an
// (approximate) minimum of the objective function along the search direction
// dir_k starting at the most recent location x_k, i.e., it tries to minimize
// the function
// φ(step) := f(x_k + step * dir_k) where step > 0.
// Typically, a Linesearcher will be used in conjuction with LinesearchMethod
// for performing gradient-based optimization through sequential line searches.
type Linesearcher interface {
// Init initializes the Linesearcher and a new line search. Value and
// derivative contain φ(0) and φ'(0), respectively, and step contains the
// first trial step length. It returns an Operation that must be one of
// FuncEvaluation, GradEvaluation, FuncEvaluation|GradEvaluation. The
// caller must evaluate φ(step), φ'(step), or both, respectively, and pass
// the result to Linesearcher in value and derivative arguments to Iterate.
Init(value, derivative float64, step float64) Operation
// Iterate takes in the values of φ and φ' evaluated at the previous step
// and returns the next operation.
//
// If op is one of FuncEvaluation, GradEvaluation,
// FuncEvaluation|GradEvaluation, the caller must evaluate φ(step),
// φ'(step), or both, respectively, and pass the result to Linesearcher in
// value and derivative arguments on the next call to Iterate.
//
// If op is MajorIteration, a sufficiently accurate minimum of φ has been
// found at the previous step and the line search has concluded. Init must
// be called again to initialize a new line search.
//
// If err is nil, op must not specify another operation. If err is not nil,
// the values of op and step are undefined.
Iterate(value, derivative float64) (op Operation, step float64, err error)
}
// NextDirectioner implements a strategy for computing a new line search
// direction at each major iteration. Typically, a NextDirectioner will be
// used in conjuction with LinesearchMethod for performing gradient-based
// optimization through sequential line searches.
type NextDirectioner interface {
// InitDirection initializes the NextDirectioner at the given starting location,
// putting the initial direction in place into dir, and returning the initial
// step size. InitDirection must not modify Location.
InitDirection(loc *Location, dir []float64) (step float64)
// NextDirection updates the search direction and step size. Location is
// the location seen at the conclusion of the most recent linesearch. The
// next search direction is put in place into dir, and the next step size
// is returned. NextDirection must not modify Location.
NextDirection(loc *Location, dir []float64) (step float64)
}
// StepSizer can set the next step size of the optimization given the last Location.
// Returned step size must be positive.
type StepSizer interface {
Init(loc *Location, dir []float64) float64
StepSize(loc *Location, dir []float64) float64
}
// A Recorder can record the progress of the optimization, for example to print
// the progress to StdOut or to a log file. A Recorder must not modify any data.
type Recorder interface {
Init() error
Record(*Location, Operation, *Stats) error
}

167
optimize/lbfgs.go Normal file
View File

@@ -0,0 +1,167 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"github.com/gonum/floats"
)
// LBFGS implements the limited-memory BFGS method for gradient-based
// unconstrained minimization.
//
// It stores a modified version of the inverse Hessian approximation H
// implicitly from the last Store iterations while the normal BFGS method
// stores and manipulates H directly as a dense matrix. Therefore LBFGS is more
// appropriate than BFGS for large problems as the cost of LBFGS scales as
// O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of
// LBFGS may also make it perform better than BFGS for functions with Hessians
// that vary rapidly spatially.
type LBFGS struct {
// Linesearcher selects suitable steps along the descent direction.
// Accepted steps should satisfy the strong Wolfe conditions.
// If Linesearcher is nil, a reasonable default will be chosen.
Linesearcher Linesearcher
// Store is the size of the limited-memory storage.
// If Store is 0, it will be defaulted to 15.
Store int
ls *LinesearchMethod
dim int // Dimension of the problem
x []float64 // Location at the last major iteration
grad []float64 // Gradient at the last major iteration
// History
oldest int // Index of the oldest element of the history
y [][]float64 // Last Store values of y
s [][]float64 // Last Store values of s
rho []float64 // Last Store values of rho
a []float64 // Cache of Hessian updates
}
func (l *LBFGS) Init(loc *Location) (Operation, error) {
if l.Linesearcher == nil {
l.Linesearcher = &Bisection{}
}
if l.Store == 0 {
l.Store = 15
}
if l.ls == nil {
l.ls = &LinesearchMethod{}
}
l.ls.Linesearcher = l.Linesearcher
l.ls.NextDirectioner = l
return l.ls.Init(loc)
}
func (l *LBFGS) Iterate(loc *Location) (Operation, error) {
return l.ls.Iterate(loc)
}
func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
l.dim = dim
l.oldest = 0
l.a = resize(l.a, l.Store)
l.rho = resize(l.rho, l.Store)
l.y = l.initHistory(l.y)
l.s = l.initHistory(l.s)
l.x = resize(l.x, dim)
copy(l.x, loc.X)
l.grad = resize(l.grad, dim)
copy(l.grad, loc.Gradient)
copy(dir, loc.Gradient)
floats.Scale(-1, dir)
return 1 / floats.Norm(dir, 2)
}
func (l *LBFGS) initHistory(hist [][]float64) [][]float64 {
c := cap(hist)
if c < l.Store {
n := make([][]float64, l.Store-c)
hist = append(hist[:c], n...)
}
hist = hist[:l.Store]
for i := range hist {
hist[i] = resize(hist[i], l.dim)
for j := range hist[i] {
hist[i][j] = 0
}
}
return hist
}
func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
// Uses two-loop correction as described in
// Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178.
if len(loc.X) != l.dim {
panic("lbfgs: unexpected size mismatch")
}
if len(loc.Gradient) != l.dim {
panic("lbfgs: unexpected size mismatch")
}
if len(dir) != l.dim {
panic("lbfgs: unexpected size mismatch")
}
y := l.y[l.oldest]
floats.SubTo(y, loc.Gradient, l.grad)
s := l.s[l.oldest]
floats.SubTo(s, loc.X, l.x)
sDotY := floats.Dot(s, y)
l.rho[l.oldest] = 1 / sDotY
l.oldest = (l.oldest + 1) % l.Store
copy(l.x, loc.X)
copy(l.grad, loc.Gradient)
copy(dir, loc.Gradient)
// Start with the most recent element and go backward,
for i := 0; i < l.Store; i++ {
idx := l.oldest - i - 1
if idx < 0 {
idx += l.Store
}
l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir)
floats.AddScaled(dir, -l.a[idx], l.y[idx])
}
// Scale the initial Hessian.
gamma := sDotY / floats.Dot(y, y)
floats.Scale(gamma, dir)
// Start with the oldest element and go forward.
for i := 0; i < l.Store; i++ {
idx := i + l.oldest
if idx >= l.Store {
idx -= l.Store
}
beta := l.rho[idx] * floats.Dot(l.y[idx], dir)
floats.AddScaled(dir, l.a[idx]-beta, l.s[idx])
}
// dir contains H^{-1} * g, so flip the direction for minimization.
floats.Scale(-1, dir)
return 1
}
func (*LBFGS) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, false}
}

218
optimize/linesearch.go Normal file
View File

@@ -0,0 +1,218 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/floats"
)
// LinesearchMethod represents an abstract optimization method in which a
// function is optimized through successive line search optimizations.
type LinesearchMethod struct {
// NextDirectioner specifies the search direction of each linesearch.
NextDirectioner NextDirectioner
// Linesearcher performs a linesearch along the search direction.
Linesearcher Linesearcher
x []float64 // Starting point for the current iteration.
dir []float64 // Search direction for the current iteration.
first bool // Indicator of the first iteration.
nextMajor bool // Indicates that MajorIteration must be commanded at the next call to Iterate.
eval Operation // Indicator of valid fields in Location.
lastStep float64 // Step taken from x in the previous call to Iterate.
lastOp Operation // Operation returned from the previous call to Iterate.
}
func (ls *LinesearchMethod) Init(loc *Location) (Operation, error) {
if loc.Gradient == nil {
panic("linesearch: gradient is nil")
}
dim := len(loc.X)
ls.x = resize(ls.x, dim)
ls.dir = resize(ls.dir, dim)
ls.first = true
ls.nextMajor = false
// Indicate that all fields of loc are valid.
ls.eval = FuncEvaluation | GradEvaluation
if loc.Hessian != nil {
ls.eval |= HessEvaluation
}
ls.lastStep = math.NaN()
ls.lastOp = NoOperation
return ls.initNextLinesearch(loc)
}
func (ls *LinesearchMethod) Iterate(loc *Location) (Operation, error) {
switch ls.lastOp {
case NoOperation:
// TODO(vladimir-ch): Either Init has not been called, or the caller is
// trying to resume the optimization run after Iterate previously
// returned with an error. Decide what is the proper thing to do. See also #125.
case MajorIteration:
// The previous updated location did not converge the full
// optimization. Initialize a new Linesearch.
return ls.initNextLinesearch(loc)
default:
// Update the indicator of valid fields of loc.
ls.eval |= ls.lastOp
if ls.nextMajor {
ls.nextMajor = false
// Linesearcher previously finished, and the invalid fields of loc
// have now been validated. Announce MajorIteration.
ls.lastOp = MajorIteration
return ls.lastOp, nil
}
}
// Continue the linesearch.
f := math.NaN()
if ls.eval&FuncEvaluation != 0 {
f = loc.F
}
projGrad := math.NaN()
if ls.eval&GradEvaluation != 0 {
projGrad = floats.Dot(loc.Gradient, ls.dir)
}
op, step, err := ls.Linesearcher.Iterate(f, projGrad)
if err != nil {
return ls.error(err)
}
switch op {
case MajorIteration:
// Linesearch has been finished.
ls.lastOp = complementEval(loc, ls.eval)
if ls.lastOp == NoOperation {
// loc is complete, MajorIteration can be declared directly.
ls.lastOp = MajorIteration
} else {
// Declare MajorIteration on the next call to Iterate.
ls.nextMajor = true
}
case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation:
if step != ls.lastStep {
// We are moving to a new location, and not, say, evaluating extra
// information at the current location.
// Compute the next evaluation point and store it in loc.X.
floats.AddScaledTo(loc.X, ls.x, step, ls.dir)
if floats.Equal(ls.x, loc.X) {
// Step size has become so small that the next evaluation point is
// indistinguishable from the starting point for the current
// iteration due to rounding errors.
return ls.error(ErrNoProgress)
}
ls.lastStep = step
ls.eval = NoOperation // Indicate all invalid fields of loc.
}
ls.lastOp = op
default:
panic("linesearch: Linesearcher returned invalid operation")
}
return ls.lastOp, nil
}
func (ls *LinesearchMethod) error(err error) (Operation, error) {
ls.lastOp = NoOperation
return ls.lastOp, err
}
// initNextLinesearch initializes the next linesearch using the previous
// complete location stored in loc. It fills loc.X and returns an evaluation
// to be performed at loc.X.
func (ls *LinesearchMethod) initNextLinesearch(loc *Location) (Operation, error) {
copy(ls.x, loc.X)
var step float64
if ls.first {
ls.first = false
step = ls.NextDirectioner.InitDirection(loc, ls.dir)
} else {
step = ls.NextDirectioner.NextDirection(loc, ls.dir)
}
projGrad := floats.Dot(loc.Gradient, ls.dir)
if projGrad >= 0 {
return ls.error(ErrNonDescentDirection)
}
op := ls.Linesearcher.Init(loc.F, projGrad, step)
switch op {
case FuncEvaluation, GradEvaluation, FuncEvaluation | GradEvaluation:
default:
panic("linesearch: Linesearcher returned invalid operation")
}
floats.AddScaledTo(loc.X, ls.x, step, ls.dir)
if floats.Equal(ls.x, loc.X) {
// Step size is so small that the next evaluation point is
// indistinguishable from the starting point for the current iteration
// due to rounding errors.
return ls.error(ErrNoProgress)
}
ls.lastStep = step
ls.eval = NoOperation // Invalidate all fields of loc.
ls.lastOp = op
return ls.lastOp, nil
}
// ArmijoConditionMet returns true if the Armijo condition (aka sufficient
// decrease) has been met. Under normal conditions, the following should be
// true, though this is not enforced:
// - initGrad < 0
// - step > 0
// - 0 < decrease < 1
func ArmijoConditionMet(currObj, initObj, initGrad, step, decrease float64) bool {
return currObj <= initObj+decrease*step*initGrad
}
// StrongWolfeConditionsMet returns true if the strong Wolfe conditions have been met.
// The strong Wolfe conditions ensure sufficient decrease in the function
// value, and sufficient decrease in the magnitude of the projected gradient.
// Under normal conditions, the following should be true, though this is not
// enforced:
// - initGrad < 0
// - step > 0
// - 0 <= decrease < curvature < 1
func StrongWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool {
if currObj > initObj+decrease*step*initGrad {
return false
}
return math.Abs(currGrad) < curvature*math.Abs(initGrad)
}
// WeakWolfeConditionsMet returns true if the weak Wolfe conditions have been met.
// The weak Wolfe conditions ensure sufficient decrease in the function value,
// and sufficient decrease in the value of the projected gradient. Under normal
// conditions, the following should be true, though this is not enforced:
// - initGrad < 0
// - step > 0
// - 0 <= decrease < curvature< 1
func WeakWolfeConditionsMet(currObj, currGrad, initObj, initGrad, step, decrease, curvature float64) bool {
if currObj > initObj+decrease*step*initGrad {
return false
}
return currGrad >= curvature*initGrad
}

View File

@@ -0,0 +1,136 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"fmt"
"math"
"reflect"
"testing"
"github.com/gonum/optimize/functions"
)
func TestMoreThuente(t *testing.T) {
d := 0.001
c := 0.001
ls := &MoreThuente{
DecreaseFactor: d,
CurvatureFactor: c,
}
testLinesearcher(t, ls, d, c, true)
}
func TestBisection(t *testing.T) {
c := 0.1
ls := &Bisection{
CurvatureFactor: c,
}
testLinesearcher(t, ls, 0, c, true)
}
func TestBacktracking(t *testing.T) {
d := 0.001
ls := &Backtracking{
DecreaseFactor: d,
}
testLinesearcher(t, ls, d, 0, false)
}
type funcGrader interface {
Func([]float64) float64
Grad([]float64, []float64)
}
type linesearcherTest struct {
name string
f func(float64) float64
g func(float64) float64
}
func newLinesearcherTest(name string, fg funcGrader) linesearcherTest {
grad := make([]float64, 1)
return linesearcherTest{
name: name,
f: func(x float64) float64 {
return fg.Func([]float64{x})
},
g: func(x float64) float64 {
fg.Grad(grad, []float64{x})
return grad[0]
},
}
}
func testLinesearcher(t *testing.T, ls Linesearcher, decrease, curvature float64, strongWolfe bool) {
for i, prob := range []linesearcherTest{
newLinesearcherTest("Concave-to-the-right function", functions.ConcaveRight{}),
newLinesearcherTest("Concave-to-the-left function", functions.ConcaveLeft{}),
newLinesearcherTest("Plassmann wiggly function (l=39, beta=0.01)", functions.Plassmann{39, 0.01}),
newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.001, beta2=0.001)", functions.YanaiOzawaKaneko{0.001, 0.001}),
newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.01, beta2=0.001)", functions.YanaiOzawaKaneko{0.01, 0.001}),
newLinesearcherTest("Yanai-Ozawa-Kaneko function (beta1=0.001, beta2=0.01)", functions.YanaiOzawaKaneko{0.001, 0.01}),
} {
for _, initStep := range []float64{0.001, 0.1, 1, 10, 1000} {
prefix := fmt.Sprintf("test %d (%v started from %v)", i, prob.name, initStep)
f0 := prob.f(0)
g0 := prob.g(0)
if g0 >= 0 {
panic("bad test function")
}
op := ls.Init(f0, g0, initStep)
if !op.isEvaluation() {
t.Errorf("%v: Linesearcher.Init returned non-evaluating operation %v", op)
continue
}
var (
err error
k int
f, g float64
step float64
)
loop:
for {
switch op {
case MajorIteration:
if f > f0+step*decrease*g0 {
t.Errorf("%v: %v found step %v that does not satisfy the sufficient decrease condition",
prefix, reflect.TypeOf(ls), step)
}
if strongWolfe && math.Abs(g) > curvature*(-g0) {
t.Errorf("%v: %v found step %v that does not satisfy the curvature condition",
prefix, reflect.TypeOf(ls), step)
}
break loop
case FuncEvaluation:
f = prob.f(step)
case GradEvaluation:
g = prob.g(step)
case FuncEvaluation | GradEvaluation:
f = prob.f(step)
g = prob.g(step)
default:
t.Errorf("%v: Linesearcher returned an invalid operation %v", op)
break loop
}
k++
if k == 1000 {
t.Errorf("%v: %v did not finish", prefix, reflect.TypeOf(ls))
break
}
op, step, err = ls.Iterate(f, g)
if err != nil {
t.Errorf("%v: %v failed at step %v with %v", prefix, reflect.TypeOf(ls), step, err)
break
}
}
}
}
}

226
optimize/local.go Normal file
View File

@@ -0,0 +1,226 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"time"
)
// Local finds a local minimum of a minimization problem using a sequential
// algorithm. A maximization problem can be transformed into a minimization
// problem by multiplying the function by -1.
//
// The first argument represents the problem to be minimized. Its fields are
// routines that evaluate the objective function, gradient, and other
// quantities related to the problem. The objective function, p.Func, must not
// be nil. The optimization method used may require other fields to be non-nil
// as specified by method.Needs. Local will panic if these are not met. The
// method can be determined automatically from the supplied problem which is
// described below.
//
// If p.Status is not nil, it is called before every evaluation. If the
// returned Status is not NotTerminated or the error is not nil, the
// optimization run is terminated.
//
// The second argument is the initial location at which to start the minimization.
// The initial location must be supplied, and must have a length equal to the
// problem dimension.
//
// The third argument contains the settings for the minimization. It is here that
// gradient tolerance, etc. are specified. The DefaultSettings function
// can be called for a Settings struct with the default values initialized.
// If settings == nil, the default settings are used. See the documentation
// for the Settings structure for more information. The optimization Method used
// may also contain settings, see documentation for the appropriate optimizer.
//
// The final argument is the optimization method to use. If method == nil, then
// an appropriate default is chosen based on the properties of the other arguments
// (dimension, gradient-free or gradient-based, etc.). The optimization
// methods in this package are designed such that reasonable defaults occur
// if options are not specified explicitly. For example, the code
// method := &optimize.BFGS{}
// creates a pointer to a new BFGS struct. When Local is called, the settings
// in the method will be populated with default values. The methods are also
// designed such that they can be reused in future calls to Local.
//
// If method implements Statuser, method.Status is called before every call
// to method.Iterate. If the returned Status is not NotTerminated or the
// error is non-nil, the optimization run is terminated.
//
// Local returns a Result struct and any error that occurred. See the
// documentation of Result for more information.
//
// Be aware that the default behavior of Local is to find the minimum.
// For certain functions and optimization methods, this process can take many
// function evaluations. If you would like to put limits on this, for example
// maximum runtime or maximum function evaluations, modify the Settings
// input struct.
func Local(p Problem, initX []float64, settings *Settings, method Method) (*Result, error) {
startTime := time.Now()
dim := len(initX)
if method == nil {
method = getDefaultMethod(&p)
}
if settings == nil {
settings = DefaultSettings()
}
stats := &Stats{}
err := checkOptimization(p, dim, method, settings.Recorder)
if err != nil {
return nil, err
}
optLoc, err := getStartingLocation(&p, method, initX, stats, settings)
if err != nil {
return nil, err
}
if settings.FunctionConverge != nil {
settings.FunctionConverge.Init(optLoc.F)
}
stats.Runtime = time.Since(startTime)
// Send initial location to Recorder
if settings.Recorder != nil {
err = settings.Recorder.Record(optLoc, InitIteration, stats)
if err != nil {
return nil, err
}
}
// Check if the starting location satisfies the convergence criteria.
status := checkConvergence(optLoc, settings, true)
// Run optimization
if status == NotTerminated && err == nil {
// The starting location is not good enough, we need to perform a
// minimization. The optimal location will be stored in-place in
// optLoc.
status, err = minimize(&p, method, settings, stats, optLoc, startTime)
}
// Cleanup and collect results
if settings.Recorder != nil && err == nil {
// Send the optimal location to Recorder.
err = settings.Recorder.Record(optLoc, PostIteration, stats)
}
stats.Runtime = time.Since(startTime)
return &Result{
Location: *optLoc,
Stats: *stats,
Status: status,
}, err
}
func minimize(p *Problem, method Method, settings *Settings, stats *Stats, optLoc *Location, startTime time.Time) (status Status, err error) {
loc := &Location{}
copyLocation(loc, optLoc)
x := make([]float64, len(loc.X))
statuser, _ := method.(Statuser)
var op Operation
op, err = method.Init(loc)
if err != nil {
status = Failure
return
}
for {
// Sequentially call method.Iterate, performing the operations it has
// commanded, until convergence.
switch op {
case NoOperation:
case InitIteration:
panic("optimize: Method returned InitIteration")
case PostIteration:
panic("optimize: Method returned PostIteration")
case MajorIteration:
copyLocation(optLoc, loc)
stats.MajorIterations++
status = checkConvergence(optLoc, settings, true)
default: // Any of the Evaluation operations.
status, err = evaluate(p, loc, op, x)
updateStats(stats, op)
}
status, err = iterCleanup(status, err, stats, settings, statuser, startTime, loc, op)
if status != NotTerminated || err != nil {
return
}
op, err = method.Iterate(loc)
if err != nil {
status = Failure
return
}
}
panic("optimize: unreachable")
}
func getDefaultMethod(p *Problem) Method {
if p.Grad != nil {
return &BFGS{}
}
return &NelderMead{}
}
// getStartingLocation allocates and initializes the starting location for the minimization.
func getStartingLocation(p *Problem, method Method, initX []float64, stats *Stats, settings *Settings) (*Location, error) {
dim := len(initX)
loc := newLocation(dim, method)
copy(loc.X, initX)
if settings.UseInitialData {
loc.F = settings.InitialValue
if loc.Gradient != nil {
initG := settings.InitialGradient
if initG == nil {
panic("optimize: initial gradient is nil")
}
if len(initG) != dim {
panic("optimize: initial gradient size mismatch")
}
copy(loc.Gradient, initG)
}
if loc.Hessian != nil {
initH := settings.InitialHessian
if initH == nil {
panic("optimize: initial Hessian is nil")
}
if initH.Symmetric() != dim {
panic("optimize: initial Hessian size mismatch")
}
loc.Hessian.CopySym(initH)
}
} else {
eval := FuncEvaluation
if loc.Gradient != nil {
eval |= GradEvaluation
}
if loc.Hessian != nil {
eval |= HessEvaluation
}
x := make([]float64, len(loc.X))
evaluate(p, loc, eval, x)
updateStats(stats, eval)
}
if math.IsInf(loc.F, 1) || math.IsNaN(loc.F) {
return loc, ErrFunc(loc.F)
}
for i, v := range loc.Gradient {
if math.IsInf(v, 0) || math.IsNaN(v) {
return loc, ErrGrad{Grad: v, Index: i}
}
}
return loc, nil
}

View File

@@ -0,0 +1,43 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize_test
import (
"fmt"
"log"
"github.com/gonum/optimize"
"github.com/gonum/optimize/functions"
)
func ExampleLocal() {
p := optimize.Problem{
Func: functions.ExtendedRosenbrock{}.Func,
Grad: functions.ExtendedRosenbrock{}.Grad,
}
x := []float64{1.3, 0.7, 0.8, 1.9, 1.2}
settings := optimize.DefaultSettings()
settings.Recorder = nil
settings.GradientThreshold = 1e-12
settings.FunctionConverge = nil
result, err := optimize.Local(p, x, settings, &optimize.BFGS{})
if err != nil {
log.Fatal(err)
}
if err = result.Status.Err(); err != nil {
log.Fatal(err)
}
fmt.Printf("result.Status: %v\n", result.Status)
fmt.Printf("result.X: %v\n", result.X)
fmt.Printf("result.F: %v\n", result.F)
fmt.Printf("result.Stats.FuncEvaluations: %d\n", result.Stats.FuncEvaluations)
// Output:
// result.Status: GradientThreshold
// result.X: [1 1 1 1 1]
// result.F: 0
// result.Stats.FuncEvaluations: 35
}

202
optimize/minimize.go Normal file
View File

@@ -0,0 +1,202 @@
// Copyright ©2016 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"fmt"
"math"
"time"
"github.com/gonum/floats"
"github.com/gonum/matrix/mat64"
)
// newLocation allocates a new locatian structure of the appropriate size. It
// allocates memory based on the dimension and the values in Needs. The initial
// function value is set to math.Inf(1).
func newLocation(dim int, method Needser) *Location {
// TODO(btracey): combine this with Local.
loc := &Location{
X: make([]float64, dim),
}
loc.F = math.Inf(1)
if method.Needs().Gradient {
loc.Gradient = make([]float64, dim)
}
if method.Needs().Hessian {
loc.Hessian = mat64.NewSymDense(dim, nil)
}
return loc
}
func copyLocation(dst, src *Location) {
dst.X = resize(dst.X, len(src.X))
copy(dst.X, src.X)
dst.F = src.F
dst.Gradient = resize(dst.Gradient, len(src.Gradient))
copy(dst.Gradient, src.Gradient)
if src.Hessian != nil {
if dst.Hessian == nil || dst.Hessian.Symmetric() != len(src.X) {
dst.Hessian = mat64.NewSymDense(len(src.X), nil)
}
dst.Hessian.CopySym(src.Hessian)
}
}
func checkOptimization(p Problem, dim int, method Needser, recorder Recorder) error {
if p.Func == nil {
panic("optimize: objective function is undefined")
}
if dim <= 0 {
panic("optimize: impossible problem dimension")
}
if err := p.satisfies(method); err != nil {
return err
}
if p.Status != nil {
_, err := p.Status()
if err != nil {
return err
}
}
if recorder != nil {
err := recorder.Init()
if err != nil {
return err
}
}
return nil
}
// evaluate evaluates the routines specified by the Operation at loc.X, and stores
// the answer into loc. loc.X is copied into x before
// evaluating in order to prevent the routines from modifying it.
func evaluate(p *Problem, loc *Location, op Operation, x []float64) (Status, error) {
if !op.isEvaluation() {
panic(fmt.Sprintf("optimize: invalid evaluation %v", op))
}
if p.Status != nil {
status, err := p.Status()
if err != nil || status != NotTerminated {
return status, err
}
}
copy(x, loc.X)
if op&FuncEvaluation != 0 {
loc.F = p.Func(x)
}
if op&GradEvaluation != 0 {
p.Grad(loc.Gradient, x)
}
if op&HessEvaluation != 0 {
p.Hess(loc.Hessian, x)
}
return NotTerminated, nil
}
// checkConvergence returns NotTerminated if the Location does not satisfy the
// convergence criteria given by settings. Otherwise a corresponding status is
// returned.
// Unlike checkLimits, checkConvergence is called only at MajorIterations.
//
// If local is true, gradient convergence is also checked.
func checkConvergence(loc *Location, settings *Settings, local bool) Status {
if local && loc.Gradient != nil {
norm := floats.Norm(loc.Gradient, math.Inf(1))
if norm < settings.GradientThreshold {
return GradientThreshold
}
}
if loc.F < settings.FunctionThreshold {
return FunctionThreshold
}
if settings.FunctionConverge != nil {
return settings.FunctionConverge.FunctionConverged(loc.F)
}
return NotTerminated
}
// updateStats updates the statistics based on the operation.
func updateStats(stats *Stats, op Operation) {
if op&FuncEvaluation != 0 {
stats.FuncEvaluations++
}
if op&GradEvaluation != 0 {
stats.GradEvaluations++
}
if op&HessEvaluation != 0 {
stats.HessEvaluations++
}
}
// checkLimits returns NotTerminated status if the various limits given by
// settings have not been reached. Otherwise it returns a corresponding status.
// Unlike checkConvergence, checkLimits is called by Local and Global at _every_
// iteration.
func checkLimits(loc *Location, stats *Stats, settings *Settings) Status {
// Check the objective function value for negative infinity because it
// could break the linesearches and -inf is the best we can do anyway.
if math.IsInf(loc.F, -1) {
return FunctionNegativeInfinity
}
if settings.MajorIterations > 0 && stats.MajorIterations >= settings.MajorIterations {
return IterationLimit
}
if settings.FuncEvaluations > 0 && stats.FuncEvaluations >= settings.FuncEvaluations {
return FunctionEvaluationLimit
}
if settings.GradEvaluations > 0 && stats.GradEvaluations >= settings.GradEvaluations {
return GradientEvaluationLimit
}
if settings.HessEvaluations > 0 && stats.HessEvaluations >= settings.HessEvaluations {
return HessianEvaluationLimit
}
// TODO(vladimir-ch): It would be nice to update Runtime here.
if settings.Runtime > 0 && stats.Runtime >= settings.Runtime {
return RuntimeLimit
}
return NotTerminated
}
// TODO(btracey): better name
func iterCleanup(status Status, err error, stats *Stats, settings *Settings, statuser Statuser, startTime time.Time, loc *Location, op Operation) (Status, error) {
if status != NotTerminated || err != nil {
return status, err
}
if settings.Recorder != nil {
stats.Runtime = time.Since(startTime)
err = settings.Recorder.Record(loc, op, stats)
if err != nil {
if status == NotTerminated {
status = Failure
}
return status, err
}
}
stats.Runtime = time.Since(startTime)
status = checkLimits(loc, stats, settings)
if status != NotTerminated {
return status, nil
}
if statuser != nil {
status, err = statuser.Status()
if err != nil || status != NotTerminated {
return status, err
}
}
return status, nil
}

385
optimize/morethuente.go Normal file
View File

@@ -0,0 +1,385 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import "math"
// MoreThuente is a Linesearcher that finds steps that satisfy both the
// sufficient decrease and curvature conditions (the strong Wolfe conditions).
//
// References:
// - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient
// Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307
type MoreThuente struct {
// DecreaseFactor is the constant factor in the sufficient decrease
// (Armijo) condition.
// It must be in the interval [0, 1). The default value is 0.
DecreaseFactor float64
// CurvatureFactor is the constant factor in the Wolfe conditions. Smaller
// values result in a more exact line search.
// A set value must be in the interval (0, 1). If it is zero, it will be
// defaulted to 0.9.
CurvatureFactor float64
// StepTolerance sets the minimum acceptable width for the linesearch
// interval. If the relative interval length is less than this value,
// ErrLinesearcherFailure is returned.
// It must be non-negative. If it is zero, it will be defaulted to 1e-10.
StepTolerance float64
// MinimumStep is the minimum step that the linesearcher will take.
// It must be non-negative and less than MaximumStep. Defaults to no
// minimum (a value of 0).
MinimumStep float64
// MaximumStep is the maximum step that the linesearcher will take.
// It must be greater than MinimumStep. If it is zero, it will be defaulted
// to 1e20.
MaximumStep float64
bracketed bool // Indicates if a minimum has been bracketed.
fInit float64 // Function value at step = 0.
gInit float64 // Derivative value at step = 0.
// When stage is 1, the algorithm updates the interval given by x and y
// so that it contains a minimizer of the modified function
// psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0).
// When stage is 2, the interval is updated so that it contains a minimizer
// of f.
stage int
step float64 // Current step.
lower, upper float64 // Lower and upper bounds on the next step.
x float64 // Endpoint of the interval with a lower function value.
fx, gx float64 // Data at x.
y float64 // The other endpoint.
fy, gy float64 // Data at y.
width [2]float64 // Width of the interval at two previous iterations.
}
const (
mtMinGrowthFactor float64 = 1.1
mtMaxGrowthFactor float64 = 4
)
func (mt *MoreThuente) Init(f, g float64, step float64) Operation {
// Based on the original Fortran code that is available, for example, from
// http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/
// as part of
// MINPACK-2 Project. November 1993.
// Argonne National Laboratory and University of Minnesota.
// Brett M. Averick, Richard G. Carter, and Jorge J. Moré.
if g >= 0 {
panic("morethuente: initial derivative is non-negative")
}
if step <= 0 {
panic("morethuente: invalid initial step")
}
if mt.CurvatureFactor == 0 {
mt.CurvatureFactor = 0.9
}
if mt.StepTolerance == 0 {
mt.StepTolerance = 1e-10
}
if mt.MaximumStep == 0 {
mt.MaximumStep = 1e20
}
if mt.MinimumStep < 0 {
panic("morethuente: minimum step is negative")
}
if mt.MaximumStep <= mt.MinimumStep {
panic("morethuente: maximum step is not greater than minimum step")
}
if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 {
panic("morethuente: invalid decrease factor")
}
if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 {
panic("morethuente: invalid curvature factor")
}
if mt.StepTolerance <= 0 {
panic("morethuente: step tolerance is not positive")
}
if step < mt.MinimumStep {
step = mt.MinimumStep
}
if step > mt.MaximumStep {
step = mt.MaximumStep
}
mt.bracketed = false
mt.stage = 1
mt.fInit = f
mt.gInit = g
mt.x, mt.fx, mt.gx = 0, f, g
mt.y, mt.fy, mt.gy = 0, f, g
mt.lower = 0
mt.upper = step + mtMaxGrowthFactor*step
mt.width[0] = mt.MaximumStep - mt.MinimumStep
mt.width[1] = 2 * mt.width[0]
mt.step = step
return FuncEvaluation | GradEvaluation
}
func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) {
if mt.stage == 0 {
panic("morethuente: Init has not been called")
}
gTest := mt.DecreaseFactor * mt.gInit
fTest := mt.fInit + mt.step*gTest
if mt.bracketed {
if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
// step contains the best step found (see below).
return NoOperation, mt.step, ErrLinesearcherFailure
}
}
if mt.step == mt.MaximumStep && f <= fTest && g <= gTest {
return NoOperation, mt.step, ErrLinesearcherBound
}
if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) {
return NoOperation, mt.step, ErrLinesearcherFailure
}
// Test for convergence.
if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) {
mt.stage = 0
return MajorIteration, mt.step, nil
}
if mt.stage == 1 && f <= fTest && g >= 0 {
mt.stage = 2
}
if mt.stage == 1 && f <= mt.fx && f > fTest {
// Lower function value but the decrease is not sufficient .
// Compute values and derivatives of the modified function at step, x, y.
fm := f - mt.step*gTest
fxm := mt.fx - mt.x*gTest
fym := mt.fy - mt.y*gTest
gm := g - gTest
gxm := mt.gx - gTest
gym := mt.gy - gTest
// Update x, y and step.
mt.nextStep(fxm, gxm, fym, gym, fm, gm)
// Recover values and derivates of the non-modified function at x and y.
mt.fx = fxm + mt.x*gTest
mt.fy = fym + mt.y*gTest
mt.gx = gxm + gTest
mt.gy = gym + gTest
} else {
// Update x, y and step.
mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g)
}
if mt.bracketed {
// Monitor the length of the bracketing interval. If the interval has
// not been reduced sufficiently after two steps, use bisection to
// force its length to zero.
width := mt.y - mt.x
if math.Abs(width) >= 2.0/3*mt.width[1] {
mt.step = mt.x + 0.5*width
}
mt.width[0], mt.width[1] = math.Abs(width), mt.width[0]
}
if mt.bracketed {
mt.lower = math.Min(mt.x, mt.y)
mt.upper = math.Max(mt.x, mt.y)
} else {
mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x)
mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x)
}
// Force the step to be in [MinimumStep, MaximumStep].
mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep))
if mt.bracketed {
if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
// If further progress is not possible, set step to the best step
// obtained during the search.
mt.step = mt.x
}
}
return FuncEvaluation | GradEvaluation, mt.step, nil
}
// nextStep computes the next safeguarded step and updates the interval that
// contains a step that satisfies the sufficient decrease and curvature
// conditions.
func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) {
x := mt.x
y := mt.y
step := mt.step
gNeg := g < 0
if gx < 0 {
gNeg = !gNeg
}
var next float64
var bracketed bool
switch {
case f > fx:
// A higher function value. The minimum is bracketed between x and step.
// We want the next step to be closer to x because the function value
// there is lower.
theta := 3*(fx-f)/(step-x) + gx + g
s := math.Max(math.Abs(gx), math.Abs(g))
s = math.Max(s, math.Abs(theta))
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
if step < x {
gamma *= -1
}
p := gamma - gx + theta
q := gamma - gx + gamma + g
r := p / q
stpc := x + r*(step-x)
stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x)
if math.Abs(stpc-x) < math.Abs(stpq-x) {
// The cubic step is closer to x than the quadratic step.
// Take the cubic step.
next = stpc
} else {
// If f is much larger than fx, then the quadratic step may be too
// close to x. Therefore heuristically take the average of the
// cubic and quadratic steps.
next = stpc + (stpq-stpc)/2
}
bracketed = true
case gNeg:
// A lower function value and derivatives of opposite sign. The minimum
// is bracketed between x and step. If we choose a step that is far
// from step, the next iteration will also likely fall in this case.
theta := 3*(fx-f)/(step-x) + gx + g
s := math.Max(math.Abs(gx), math.Abs(g))
s = math.Max(s, math.Abs(theta))
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
if step > x {
gamma *= -1
}
p := gamma - g + theta
q := gamma - g + gamma + gx
r := p / q
stpc := step + r*(x-step)
stpq := step + g/(g-gx)*(x-step)
if math.Abs(stpc-step) > math.Abs(stpq-step) {
// The cubic step is farther from x than the quadratic step.
// Take the cubic step.
next = stpc
} else {
// Take the quadratic step.
next = stpq
}
bracketed = true
case math.Abs(g) < math.Abs(gx):
// A lower function value, derivatives of the same sign, and the
// magnitude of the derivative decreases. Extrapolate function values
// at x and step so that the next step lies between step and y.
theta := 3*(fx-f)/(step-x) + gx + g
s := math.Max(math.Abs(gx), math.Abs(g))
s = math.Max(s, math.Abs(theta))
gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s)))
if step > x {
gamma *= -1
}
p := gamma - g + theta
q := gamma + gx - g + gamma
r := p / q
var stpc float64
switch {
case r < 0 && gamma != 0:
stpc = step + r*(x-step)
case step > x:
stpc = mt.upper
default:
stpc = mt.lower
}
stpq := step + g/(g-gx)*(x-step)
if mt.bracketed {
// We are extrapolating so be cautious and take the step that
// is closer to step.
if math.Abs(stpc-step) < math.Abs(stpq-step) {
next = stpc
} else {
next = stpq
}
// Modify next if it is close to or beyond y.
if step > x {
next = math.Min(step+2.0/3*(y-step), next)
} else {
next = math.Max(step+2.0/3*(y-step), next)
}
} else {
// Minimum has not been bracketed so take the larger step...
if math.Abs(stpc-step) > math.Abs(stpq-step) {
next = stpc
} else {
next = stpq
}
// ...but within reason.
next = math.Max(mt.lower, math.Min(next, mt.upper))
}
default:
// A lower function value, derivatives of the same sign, and the
// magnitude of the derivative does not decrease. The function seems to
// decrease rapidly in the direction of the step.
switch {
case mt.bracketed:
theta := 3*(f-fy)/(y-step) + gy + g
s := math.Max(math.Abs(gy), math.Abs(g))
s = math.Max(s, math.Abs(theta))
gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s))
if step > y {
gamma *= -1
}
p := gamma - g + theta
q := gamma - g + gamma + gy
r := p / q
next = step + r*(y-step)
case step > x:
next = mt.upper
default:
next = mt.lower
}
}
if f > fx {
// x is still the best step.
mt.y = step
mt.fy = f
mt.gy = g
} else {
// step is the new best step.
if gNeg {
mt.y = x
mt.fy = fx
mt.gy = gx
}
mt.x = step
mt.fx = f
mt.gx = g
}
mt.bracketed = bracketed
mt.step = next
}

314
optimize/neldermead.go Normal file
View File

@@ -0,0 +1,314 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"sort"
"github.com/gonum/floats"
)
// nmIterType is a Nelder-Mead evaluation kind
type nmIterType int
const (
nmReflected = iota
nmExpanded
nmContractedInside
nmContractedOutside
nmInitialize
nmShrink
nmMajor
)
type nmVertexSorter struct {
vertices [][]float64
values []float64
}
func (n nmVertexSorter) Len() int {
return len(n.values)
}
func (n nmVertexSorter) Less(i, j int) bool {
return n.values[i] < n.values[j]
}
func (n nmVertexSorter) Swap(i, j int) {
n.values[i], n.values[j] = n.values[j], n.values[i]
n.vertices[i], n.vertices[j] = n.vertices[j], n.vertices[i]
}
// NelderMead is an implementation of the Nelder-Mead simplex algorithm for
// gradient-free nonlinear optimization (not to be confused with Danzig's
// simplex algorithm for linear programming). The implementation follows the
// algorithm described in
//
// http://epubs.siam.org/doi/pdf/10.1137/S1052623496303470
//
// If an initial simplex is provided, it is used and initLoc is ignored. If
// InitialVertices and InitialValues are both nil, an initial simplex will be
// generated automatically using the initial location as one vertex, and each
// additional vertex as SimplexSize away in one dimension.
//
// If the simplex update parameters (Reflection, etc.)
// are zero, they will be set automatically based on the dimension according to
// the recommendations in
//
// http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf
type NelderMead struct {
InitialVertices [][]float64
InitialValues []float64
Reflection float64 // Reflection parameter (>0)
Expansion float64 // Expansion parameter (>1)
Contraction float64 // Contraction parameter (>0, <1)
Shrink float64 // Shrink parameter (>0, <1)
SimplexSize float64 // size of auto-constructed initial simplex
reflection float64
expansion float64
contraction float64
shrink float64
vertices [][]float64 // location of the vertices sorted in ascending f
values []float64 // function values at the vertices sorted in ascending f
centroid []float64 // centroid of all but the worst vertex
fillIdx int // index for filling the simplex during initialization and shrinking
lastIter nmIterType // Last iteration
reflectedPoint []float64 // Storage of the reflected point location
reflectedValue float64 // Value at the last reflection point
}
func (n *NelderMead) Init(loc *Location) (Operation, error) {
dim := len(loc.X)
if cap(n.vertices) < dim+1 {
n.vertices = make([][]float64, dim+1)
}
n.vertices = n.vertices[:dim+1]
for i := range n.vertices {
n.vertices[i] = resize(n.vertices[i], dim)
}
n.values = resize(n.values, dim+1)
n.centroid = resize(n.centroid, dim)
n.reflectedPoint = resize(n.reflectedPoint, dim)
if n.SimplexSize == 0 {
n.SimplexSize = 0.05
}
// Default parameter choices are chosen in a dimension-dependent way
// from http://www.webpages.uidaho.edu/~fuchang/res/ANMS.pdf
n.reflection = n.Reflection
if n.reflection == 0 {
n.reflection = 1
}
n.expansion = n.Expansion
if n.expansion == 0 {
n.expansion = 1 + 2/float64(dim)
}
n.contraction = n.Contraction
if n.contraction == 0 {
n.contraction = 0.75 - 1/(2*float64(dim))
}
n.shrink = n.Shrink
if n.shrink == 0 {
n.shrink = 1 - 1/float64(dim)
}
if n.InitialVertices != nil {
// Initial simplex provided. Copy the locations and values, and sort them.
if len(n.InitialVertices) != dim+1 {
panic("neldermead: incorrect number of vertices in initial simplex")
}
if len(n.InitialValues) != dim+1 {
panic("neldermead: incorrect number of values in initial simplex")
}
for i := range n.InitialVertices {
if len(n.InitialVertices[i]) != dim {
panic("neldermead: vertex size mismatch")
}
copy(n.vertices[i], n.InitialVertices[i])
}
copy(n.values, n.InitialValues)
sort.Sort(nmVertexSorter{n.vertices, n.values})
computeCentroid(n.vertices, n.centroid)
return n.returnNext(nmMajor, loc)
}
// No simplex provided. Begin initializing initial simplex. First simplex
// entry is the initial location, then step 1 in every direction.
copy(n.vertices[dim], loc.X)
n.values[dim] = loc.F
n.fillIdx = 0
loc.X[n.fillIdx] += n.SimplexSize
n.lastIter = nmInitialize
return FuncEvaluation, nil
}
// computeCentroid computes the centroid of all the simplex vertices except the
// final one
func computeCentroid(vertices [][]float64, centroid []float64) {
dim := len(centroid)
for i := range centroid {
centroid[i] = 0
}
for i := 0; i < dim; i++ {
vertex := vertices[i]
for j, v := range vertex {
centroid[j] += v
}
}
for i := range centroid {
centroid[i] /= float64(dim)
}
}
func (n *NelderMead) Iterate(loc *Location) (Operation, error) {
dim := len(loc.X)
switch n.lastIter {
case nmInitialize:
n.values[n.fillIdx] = loc.F
copy(n.vertices[n.fillIdx], loc.X)
n.fillIdx++
if n.fillIdx == dim {
// Successfully finished building initial simplex.
sort.Sort(nmVertexSorter{n.vertices, n.values})
computeCentroid(n.vertices, n.centroid)
return n.returnNext(nmMajor, loc)
}
copy(loc.X, n.vertices[dim])
loc.X[n.fillIdx] += n.SimplexSize
return FuncEvaluation, nil
case nmMajor:
// Nelder Mead iterations start with Reflection step
return n.returnNext(nmReflected, loc)
case nmReflected:
n.reflectedValue = loc.F
switch {
case loc.F >= n.values[0] && loc.F < n.values[dim-1]:
n.replaceWorst(loc.X, loc.F)
return n.returnNext(nmMajor, loc)
case loc.F < n.values[0]:
return n.returnNext(nmExpanded, loc)
default:
if loc.F < n.values[dim] {
return n.returnNext(nmContractedOutside, loc)
}
return n.returnNext(nmContractedInside, loc)
}
case nmExpanded:
if loc.F < n.reflectedValue {
n.replaceWorst(loc.X, loc.F)
} else {
n.replaceWorst(n.reflectedPoint, n.reflectedValue)
}
return n.returnNext(nmMajor, loc)
case nmContractedOutside:
if loc.F <= n.reflectedValue {
n.replaceWorst(loc.X, loc.F)
return n.returnNext(nmMajor, loc)
}
n.fillIdx = 1
return n.returnNext(nmShrink, loc)
case nmContractedInside:
if loc.F < n.values[dim] {
n.replaceWorst(loc.X, loc.F)
return n.returnNext(nmMajor, loc)
}
n.fillIdx = 1
return n.returnNext(nmShrink, loc)
case nmShrink:
copy(n.vertices[n.fillIdx], loc.X)
n.values[n.fillIdx] = loc.F
n.fillIdx++
if n.fillIdx != dim+1 {
return n.returnNext(nmShrink, loc)
}
sort.Sort(nmVertexSorter{n.vertices, n.values})
computeCentroid(n.vertices, n.centroid)
return n.returnNext(nmMajor, loc)
default:
panic("unreachable")
}
}
// returnNext updates the location based on the iteration type and the current
// simplex, and returns the next operation.
func (n *NelderMead) returnNext(iter nmIterType, loc *Location) (Operation, error) {
n.lastIter = iter
switch iter {
case nmMajor:
// Fill loc with the current best point and value,
// and command a convergence check.
copy(loc.X, n.vertices[0])
loc.F = n.values[0]
return MajorIteration, nil
case nmReflected, nmExpanded, nmContractedOutside, nmContractedInside:
// x_new = x_centroid + scale * (x_centroid - x_worst)
var scale float64
switch iter {
case nmReflected:
scale = n.reflection
case nmExpanded:
scale = n.reflection * n.expansion
case nmContractedOutside:
scale = n.reflection * n.contraction
case nmContractedInside:
scale = -n.contraction
}
dim := len(loc.X)
floats.SubTo(loc.X, n.centroid, n.vertices[dim])
floats.Scale(scale, loc.X)
floats.Add(loc.X, n.centroid)
if iter == nmReflected {
copy(n.reflectedPoint, loc.X)
}
return FuncEvaluation, nil
case nmShrink:
// x_shrink = x_best + delta * (x_i + x_best)
floats.SubTo(loc.X, n.vertices[n.fillIdx], n.vertices[0])
floats.Scale(n.shrink, loc.X)
floats.Add(loc.X, n.vertices[0])
return FuncEvaluation, nil
default:
panic("unreachable")
}
}
// replaceWorst removes the worst location in the simplex and adds the new
// {x, f} pair maintaining sorting.
func (n *NelderMead) replaceWorst(x []float64, f float64) {
dim := len(x)
if f >= n.values[dim] {
panic("increase in simplex value")
}
copy(n.vertices[dim], x)
n.values[dim] = f
// Sort the newly-added value.
for i := dim - 1; i >= 0; i-- {
if n.values[i] < f {
break
}
n.vertices[i], n.vertices[i+1] = n.vertices[i+1], n.vertices[i]
n.values[i], n.values[i+1] = n.values[i+1], n.values[i]
}
// Update the location of the centroid. Only one point has been replaced, so
// subtract the worst point and add the new one.
floats.AddScaled(n.centroid, -1/float64(dim), n.vertices[dim])
floats.AddScaled(n.centroid, 1/float64(dim), x)
}
func (*NelderMead) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{false, false}
}

147
optimize/newton.go Normal file
View File

@@ -0,0 +1,147 @@
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/matrix/mat64"
)
const maxNewtonModifications = 20
// Newton implements a modified Newton's method for Hessian-based unconstrained
// minimization. It applies regularization when the Hessian is not positive
// definite, and it can converge to a local minimum from any starting point.
//
// Newton iteratively forms a quadratic model to the objective function f and
// tries to minimize this approximate model. It generates a sequence of
// locations x_k by means of
// solve H_k d_k = -∇f_k for d_k,
// x_{k+1} = x_k + α_k d_k,
// where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
// a line search.
//
// Away from a minimizer H_k may not be positive definite and d_k may not be a
// descent direction. Newton implements a Hessian modification strategy that
// adds successively larger multiples of identity to H_k until it becomes
// positive definite. Note that the repeated trial factorization of the
// modified Hessian involved in this process can be computationally expensive.
//
// If the Hessian matrix cannot be formed explicitly or if the computational
// cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
// can be used instead.
type Newton struct {
// Linesearcher is used for selecting suitable steps along the descent
// direction d. Accepted steps should satisfy at least one of the Wolfe,
// Goldstein or Armijo conditions.
// If Linesearcher == nil, an appropriate default is chosen.
Linesearcher Linesearcher
// Increase is the factor by which a scalar tau is successively increased
// so that (H + tau*I) is positive definite. Larger values reduce the
// number of trial Hessian factorizations, but also reduce the second-order
// information in H.
// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
Increase float64
ls *LinesearchMethod
hess *mat64.SymDense // Storage for a copy of the Hessian matrix.
chol mat64.Cholesky // Storage for the Cholesky factorization.
tau float64
}
func (n *Newton) Init(loc *Location) (Operation, error) {
if n.Increase == 0 {
n.Increase = 5
}
if n.Increase <= 1 {
panic("optimize: Newton.Increase must be greater than 1")
}
if n.Linesearcher == nil {
n.Linesearcher = &Bisection{}
}
if n.ls == nil {
n.ls = &LinesearchMethod{}
}
n.ls.Linesearcher = n.Linesearcher
n.ls.NextDirectioner = n
return n.ls.Init(loc)
}
func (n *Newton) Iterate(loc *Location) (Operation, error) {
return n.ls.Iterate(loc)
}
func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
n.hess = resizeSymDense(n.hess, dim)
n.tau = 0
return n.NextDirection(loc, dir)
}
func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
// the Identity) from Nocedal, Wright (2006), 2nd edition.
dim := len(loc.X)
d := mat64.NewVector(dim, dir)
grad := mat64.NewVector(dim, loc.Gradient)
n.hess.CopySym(loc.Hessian)
// Find the smallest diagonal entry of the Hessian.
minA := n.hess.At(0, 0)
for i := 1; i < dim; i++ {
a := n.hess.At(i, i)
if a < minA {
minA = a
}
}
// If the smallest diagonal entry is positive, the Hessian may be positive
// definite, and so first attempt to apply the Cholesky factorization to
// the un-modified Hessian. If the smallest entry is negative, use the
// final tau from the last iteration if regularization was needed,
// otherwise guess an appropriate value for tau.
if minA > 0 {
n.tau = 0
} else if n.tau == 0 {
n.tau = -minA + 0.001
}
for k := 0; k < maxNewtonModifications; k++ {
if n.tau != 0 {
// Add a multiple of identity to the Hessian.
for i := 0; i < dim; i++ {
n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
}
}
// Try to apply the Cholesky factorization.
pd := n.chol.Factorize(n.hess)
if pd {
// Store the solution in d's backing array, dir.
d.SolveCholeskyVec(&n.chol, grad)
d.ScaleVec(-1, d)
return 1
}
// Modified Hessian is not PD, so increase tau.
n.tau = math.Max(n.Increase*n.tau, 0.001)
}
// Hessian modification failed to get a PD matrix. Return the negative
// gradient as the descent direction.
d.ScaleVec(-1, grad)
return 1
}
func (n *Newton) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, true}
}

106
optimize/printer.go Normal file
View File

@@ -0,0 +1,106 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"fmt"
"io"
"math"
"os"
"time"
"github.com/gonum/floats"
)
var printerHeadings = [...]string{
"Iter",
"Runtime",
"FuncEvals",
"Func",
"GradEvals",
"|Gradient|∞",
"HessEvals",
}
const (
printerBaseTmpl = "%9v %16v %9v %22v" // Base template for headings and values that are always printed.
printerGradTmpl = " %9v %22v" // Appended to base template when loc.Gradient != nil.
printerHessTmpl = " %9v" // Appended to base template when loc.Hessian != nil.
)
// Printer writes column-format output to the specified writer as the optimization
// progresses. By default, it writes to os.Stdout.
type Printer struct {
Writer io.Writer
HeadingInterval int
ValueInterval time.Duration
lastHeading int
lastValue time.Time
}
func NewPrinter() *Printer {
return &Printer{
Writer: os.Stdout,
HeadingInterval: 30,
ValueInterval: 500 * time.Millisecond,
}
}
func (p *Printer) Init() error {
p.lastHeading = p.HeadingInterval // So the headings are printed the first time.
p.lastValue = time.Now().Add(-p.ValueInterval) // So the values are printed the first time.
return nil
}
func (p *Printer) Record(loc *Location, op Operation, stats *Stats) error {
if op != MajorIteration && op != InitIteration && op != PostIteration {
return nil
}
// Print values always on PostIteration or when ValueInterval has elapsed.
printValues := time.Since(p.lastValue) > p.ValueInterval || op == PostIteration
if !printValues {
// Return early if not printing anything.
return nil
}
// Print heading when HeadingInterval lines have been printed, but never on PostIteration.
printHeading := p.lastHeading >= p.HeadingInterval && op != PostIteration
if printHeading {
p.lastHeading = 1
} else {
p.lastHeading++
}
if printHeading {
headings := "\n" + fmt.Sprintf(printerBaseTmpl, printerHeadings[0], printerHeadings[1], printerHeadings[2], printerHeadings[3])
if loc.Gradient != nil {
headings += fmt.Sprintf(printerGradTmpl, printerHeadings[4], printerHeadings[5])
}
if loc.Hessian != nil {
headings += fmt.Sprintf(printerHessTmpl, printerHeadings[6])
}
_, err := fmt.Fprintln(p.Writer, headings)
if err != nil {
return err
}
}
values := fmt.Sprintf(printerBaseTmpl, stats.MajorIterations, stats.Runtime, stats.FuncEvaluations, loc.F)
if loc.Gradient != nil {
values += fmt.Sprintf(printerGradTmpl, stats.GradEvaluations, floats.Norm(loc.Gradient, math.Inf(1)))
}
if loc.Hessian != nil {
values += fmt.Sprintf(printerHessTmpl, stats.HessEvaluations)
}
_, err := fmt.Fprintln(p.Writer, values)
if err != nil {
return err
}
p.lastValue = time.Now()
return nil
}

185
optimize/stepsizers.go Normal file
View File

@@ -0,0 +1,185 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/floats"
)
const (
initialStepFactor = 1
quadraticMinimumStepSize = 1e-3
quadraticMaximumStepSize = 1
quadraticThreshold = 1e-12
firstOrderMinimumStepSize = quadraticMinimumStepSize
firstOrderMaximumStepSize = quadraticMaximumStepSize
)
// ConstantStepSize is a StepSizer that returns the same step size for
// every iteration.
type ConstantStepSize struct {
Size float64
}
func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
return c.Size
}
func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
return c.Size
}
// QuadraticStepSize estimates the initial line search step size as the minimum
// of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
// The step size is bounded away from zero.
type QuadraticStepSize struct {
// Threshold determines that the initial step size should be estimated by
// quadratic interpolation when the relative change in the objective
// function is larger than Threshold. Otherwise the initial step size is
// set to 2*previous step size.
// If Threshold is zero, it will be set to 1e-12.
Threshold float64
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
fPrev float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if q.Threshold == 0 {
q.Threshold = quadraticThreshold
}
if q.InitialStepFactor == 0 {
q.InitialStepFactor = initialStepFactor
}
if q.MinStepSize == 0 {
q.MinStepSize = quadraticMinimumStepSize
}
if q.MaxStepSize == 0 {
q.MaxStepSize = quadraticMaximumStepSize
}
if q.MaxStepSize <= q.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = floats.Dot(loc.Gradient, dir)
q.xPrev = resize(q.xPrev, len(loc.X))
copy(q.xPrev, loc.X)
return stepSize
}
func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = 2 * stepSizePrev
if !floats.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
// Two consecutive function values are not relatively equal, so
// computing the minimum of a quadratic interpolant might make sense
df := (loc.F - q.fPrev) / stepSizePrev
quadTest := df - q.projGradPrev
if quadTest > 0 {
// There is a chance of approximating the function well by a
// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
// is larger than ∇f_{k-1}⋅p_{k-1}
// Set the step size to the minimizer of the quadratic function that
// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
}
}
// Bound the step size to lie in [MinStepSize, MaxStepSize]
stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
q.fPrev = loc.F
q.dirPrevNorm = floats.Norm(dir, 2)
q.projGradPrev = projGrad
copy(q.xPrev, loc.X)
return stepSize
}
// FirstOrderStepSize estimates the initial line search step size based on the
// assumption that the first-order change in the function will be the same as
// that obtained at the previous iteration. That is, the initial step size s^0_k
// is chosen so that
// s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
// This is useful for line search methods that do not produce well-scaled
// descent directions, such as gradient descent or conjugate gradient methods.
type FirstOrderStepSize struct {
// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
// If InitialStepFactor is zero, it will be set to one.
InitialStepFactor float64
// MinStepSize is the lower bound on the estimated step size.
// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
// If MinStepSize is zero, it will be set to 1e-3.
MinStepSize float64
// MaxStepSize is the upper bound on the estimated step size.
// If MaxStepSize is zero, it will be set to 1.
MaxStepSize float64
dirPrevNorm float64
projGradPrev float64
xPrev []float64
}
func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
if fo.InitialStepFactor == 0 {
fo.InitialStepFactor = initialStepFactor
}
if fo.MinStepSize == 0 {
fo.MinStepSize = firstOrderMinimumStepSize
}
if fo.MaxStepSize == 0 {
fo.MaxStepSize = firstOrderMaximumStepSize
}
if fo.MaxStepSize <= fo.MinStepSize {
panic("optimize: MinStepSize not smaller than MaxStepSize")
}
gNorm := floats.Norm(loc.Gradient, math.Inf(1))
stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
fo.xPrev = resize(fo.xPrev, len(loc.X))
copy(fo.xPrev, loc.X)
return stepSize
}
func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
projGrad := floats.Dot(loc.Gradient, dir)
stepSize = stepSizePrev * fo.projGradPrev / projGrad
stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
fo.dirPrevNorm = floats.Norm(dir, 2)
fo.projGradPrev = floats.Dot(loc.Gradient, dir)
copy(fo.xPrev, loc.X)
return stepSize
}

119
optimize/termination.go Normal file
View File

@@ -0,0 +1,119 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import "errors"
// Status represents the status of the optimization. Programs
// should not rely on the underlying numeric value of the Status being constant.
type Status int
const (
NotTerminated Status = iota
Success
FunctionThreshold
FunctionConvergence
GradientThreshold
StepConvergence
FunctionNegativeInfinity
Failure
IterationLimit
RuntimeLimit
FunctionEvaluationLimit
GradientEvaluationLimit
HessianEvaluationLimit
)
func (s Status) String() string {
return statuses[s].name
}
// Early returns true if the status indicates the optimization ended before a
// minimum was found. As an example, if the maximum iterations was reached, a
// minimum was not found, but if the gradient norm was reached then a minimum
// was found.
func (s Status) Early() bool {
return statuses[s].early
}
// Err returns the error associated with an early ending to the minimization. If
// Early returns false, Err will return nil.
func (s Status) Err() error {
return statuses[s].err
}
var statuses = []struct {
name string
early bool
err error
}{
{
name: "NotTerminated",
},
{
name: "Success",
},
{
name: "FunctionThreshold",
},
{
name: "FunctionConvergence",
},
{
name: "GradientThreshold",
},
{
name: "StepConvergence",
},
{
name: "FunctionNegativeInfinity",
},
{
name: "Failure",
early: true,
err: errors.New("optimize: termination ended in failure"),
},
{
name: "IterationLimit",
early: true,
err: errors.New("optimize: maximum number of major iterations reached"),
},
{
name: "RuntimeLimit",
early: true,
err: errors.New("optimize: maximum runtime reached"),
},
{
name: "FunctionEvaluationLimit",
early: true,
err: errors.New("optimize: maximum number of function evaluations reached"),
},
{
name: "GradientEvaluationLimit",
early: true,
err: errors.New("optimize: maximum number of gradient evaluations reached"),
},
{
name: "HessianEvaluationLimit",
early: true,
err: errors.New("optimize: maximum number of Hessian evaluations reached"),
},
}
// NewStatus returns a unique Status variable to represent a custom status.
// NewStatus is intended to be called only during package initialization, and
// calls to NewStatus are not thread safe.
//
// NewStatus takes in three arguments, the string that should be output from
// Status.String, a boolean if the status indicates early optimization conclusion,
// and the error to return from Err (if any).
func NewStatus(name string, early bool, err error) Status {
statuses = append(statuses, struct {
name string
early bool
err error
}{name, early, err})
return Status(len(statuses) - 1)
}

262
optimize/types.go Normal file
View File

@@ -0,0 +1,262 @@
// Copyright ©2014 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"errors"
"fmt"
"math"
"time"
"github.com/gonum/matrix/mat64"
)
const defaultGradientAbsTol = 1e-6
// Operation represents the set of operations commanded by Method at each
// iteration. It is a bitmap of various Iteration and Evaluation constants.
// Individual constants must NOT be combined together by the binary OR operator
// except for the Evaluation operations.
type Operation uint64
// Supported Operations.
const (
// NoOperation specifies that no evaluation or convergence check should
// take place.
NoOperation Operation = 0
// InitIteration is sent to Recorder to indicate the initial location.
// All fields of the location to record must be valid.
// Method must not return it.
InitIteration Operation = 1 << (iota - 1)
// PostIteration is sent to Recorder to indicate the final location
// reached during an optimization run.
// All fields of the location to record must be valid.
// Method must not return it.
PostIteration
// MajorIteration indicates that the next candidate location for
// an optimum has been found and convergence should be checked.
MajorIteration
// FuncEvaluation specifies that the objective function
// should be evaluated.
FuncEvaluation
// GradEvaluation specifies that the gradient
// of the objective function should be evaluated.
GradEvaluation
// HessEvaluation specifies that the Hessian
// of the objective function should be evaluated.
HessEvaluation
// Mask for the evaluating operations.
evalMask = FuncEvaluation | GradEvaluation | HessEvaluation
)
func (op Operation) isEvaluation() bool {
return op&evalMask != 0 && op&^evalMask == 0
}
func (op Operation) String() string {
if op&evalMask != 0 {
return fmt.Sprintf("Evaluation(Func: %t, Grad: %t, Hess: %t, Extra: 0b%b)",
op&FuncEvaluation != 0,
op&GradEvaluation != 0,
op&HessEvaluation != 0,
op&^(evalMask))
}
s, ok := operationNames[op]
if ok {
return s
}
return fmt.Sprintf("Operation(%d)", op)
}
var operationNames = map[Operation]string{
NoOperation: "NoOperation",
InitIteration: "InitIteration",
MajorIteration: "MajorIteration",
PostIteration: "PostIteration",
}
// Location represents a location in the optimization procedure.
type Location struct {
X []float64
F float64
Gradient []float64
Hessian *mat64.SymDense
}
// Result represents the answer of an optimization run. It contains the optimum
// location as well as the Status at convergence and Statistics taken during the
// run.
type Result struct {
Location
Stats
Status Status
}
// Stats contains the statistics of the run.
type Stats struct {
MajorIterations int // Total number of major iterations
FuncEvaluations int // Number of evaluations of Func
GradEvaluations int // Number of evaluations of Grad
HessEvaluations int // Number of evaluations of Hess
Runtime time.Duration // Total runtime of the optimization
}
// complementEval returns an evaluating operation that evaluates fields of loc
// not evaluated by eval.
func complementEval(loc *Location, eval Operation) (complEval Operation) {
if eval&FuncEvaluation == 0 {
complEval = FuncEvaluation
}
if loc.Gradient != nil && eval&GradEvaluation == 0 {
complEval |= GradEvaluation
}
if loc.Hessian != nil && eval&HessEvaluation == 0 {
complEval |= HessEvaluation
}
return complEval
}
// Problem describes the optimization problem to be solved.
type Problem struct {
// Func evaluates the objective function at the given location. Func
// must not modify x.
Func func(x []float64) float64
// Grad evaluates the gradient at x and stores the result in-place in grad.
// Grad must not modify x.
Grad func(grad []float64, x []float64)
// Hess evaluates the Hessian at x and stores the result in-place in hess.
// Hess must not modify x.
Hess func(hess mat64.MutableSymmetric, x []float64)
// Status reports the status of the objective function being optimized and any
// error. This can be used to terminate early, for example when the function is
// not able to evaluate itself. The user can use one of the pre-provided Status
// constants, or may call NewStatus to create a custom Status value.
Status func() (Status, error)
}
// TODO(btracey): Think about making this an exported function when the
// constraint interface is designed.
func (p Problem) satisfies(method Needser) error {
if method.Needs().Gradient && p.Grad == nil {
return errors.New("optimize: problem does not provide needed Grad function")
}
if method.Needs().Hessian && p.Hess == nil {
return errors.New("optimize: problem does not provide needed Hess function")
}
return nil
}
// Settings represents settings of the optimization run. It contains initial
// settings, convergence information, and Recorder information. In general, users
// should use DefaultSettings rather than constructing a Settings literal.
//
// If UseInitData is true, InitialValue, InitialGradient and InitialHessian
// specify function information at the initial location.
//
// If Recorder is nil, no information will be recorded.
type Settings struct {
UseInitialData bool // Use supplied information about the conditions at the initial x.
InitialValue float64 // Function value at the initial x.
InitialGradient []float64 // Gradient at the initial x.
InitialHessian *mat64.SymDense // Hessian at the initial x.
// FunctionThreshold is the threshold for acceptably small values of the
// objective function. FunctionThreshold status is returned if
// the objective function is less than this value.
// The default value is -inf.
FunctionThreshold float64
// GradientThreshold determines the accuracy to which the minimum is found.
// GradientThreshold status is returned if the infinity norm of
// the gradient is less than this value.
// Has no effect if gradient information is not used.
// The default value is 1e-6.
GradientThreshold float64
// FunctionConverge tests that the function value decreases by a
// significant amount over the specified number of iterations.
//
// If f < f_best and
// f_best - f > FunctionConverge.Relative * maxabs(f, f_best) + FunctionConverge.Absolute
// then a significant decrease has occured, and f_best is updated.
//
// If there is no significant decrease for FunctionConverge.Iterations
// major iterations, FunctionConvergence status is returned.
//
// If this is nil or if FunctionConverge.Iterations == 0, it has no effect.
FunctionConverge *FunctionConverge
// MajorIterations is the maximum number of iterations allowed.
// IterationLimit status is returned if the number of major iterations
// equals or exceeds this value.
// If it equals zero, this setting has no effect.
// The default value is 0.
MajorIterations int
// Runtime is the maximum runtime allowed. RuntimeLimit status is returned
// if the duration of the run is longer than this value. Runtime is only
// checked at iterations of the Method.
// If it equals zero, this setting has no effect.
// The default value is 0.
Runtime time.Duration
// FuncEvaluations is the maximum allowed number of function evaluations.
// FunctionEvaluationLimit status is returned if the total number of calls
// to Func equals or exceeds this number.
// If it equals zero, this setting has no effect.
// The default value is 0.
FuncEvaluations int
// GradEvaluations is the maximum allowed number of gradient evaluations.
// GradientEvaluationLimit status is returned if the total number of calls
// to Grad equals or exceeds this number.
// If it equals zero, this setting has no effect.
// The default value is 0.
GradEvaluations int
// HessEvaluations is the maximum allowed number of Hessian evaluations.
// HessianEvaluationLimit status is returned if the total number of calls
// to Hess equals or exceeds this number.
// If it equals zero, this setting has no effect.
// The default value is 0.
HessEvaluations int
Recorder Recorder
// Concurrent represents how many concurrent evaluations are possible.
Concurrent int
}
// DefaultSettings returns a new Settings struct containing the default settings.
func DefaultSettings() *Settings {
return &Settings{
GradientThreshold: defaultGradientAbsTol,
FunctionThreshold: math.Inf(-1),
FunctionConverge: &FunctionConverge{
Absolute: 1e-10,
Iterations: 20,
},
}
}
// resize takes x and returns a slice of length dim. It returns a resliced x
// if cap(x) >= dim, and a new slice otherwise.
func resize(x []float64, dim int) []float64 {
if dim > cap(x) {
return make([]float64, dim)
}
return x[:dim]
}
func resizeSymDense(m *mat64.SymDense, dim int) *mat64.SymDense {
if m == nil || cap(m.RawSymmetric().Data) < dim*dim {
return mat64.NewSymDense(dim, nil)
}
return mat64.NewSymDense(dim, m.RawSymmetric().Data[:dim*dim])
}

File diff suppressed because it is too large Load Diff