mirror of
https://github.com/gonum/gonum.git
synced 2025-09-27 03:26:04 +08:00
193 lines
5.0 KiB
Go
193 lines
5.0 KiB
Go
// Copyright ©2014 The Gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package optimize
|
||
|
||
import (
|
||
"math"
|
||
|
||
"gonum.org/v1/gonum/mat"
|
||
)
|
||
|
||
var (
|
||
_ Method = (*BFGS)(nil)
|
||
_ localMethod = (*BFGS)(nil)
|
||
_ NextDirectioner = (*BFGS)(nil)
|
||
)
|
||
|
||
// BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It
|
||
// is a quasi-Newton method that performs successive rank-one updates to an
|
||
// estimate of the inverse Hessian of the objective function. It exhibits
|
||
// super-linear convergence when in proximity to a local minimum. It has memory
|
||
// cost that is O(n^2) relative to the input dimension.
|
||
type BFGS struct {
|
||
// Linesearcher selects suitable steps along the descent direction.
|
||
// Accepted steps should satisfy the strong Wolfe conditions.
|
||
// If Linesearcher == nil, an appropriate default is chosen.
|
||
Linesearcher Linesearcher
|
||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||
// if it is NaN the setting is not used.
|
||
GradStopThreshold float64
|
||
|
||
ls *LinesearchMethod
|
||
|
||
status Status
|
||
err error
|
||
|
||
dim int
|
||
x mat.VecDense // Location of the last major iteration.
|
||
grad mat.VecDense // Gradient at the last major iteration.
|
||
s mat.VecDense // Difference between locations in this and the previous iteration.
|
||
y mat.VecDense // Difference between gradients in this and the previous iteration.
|
||
tmp mat.VecDense
|
||
|
||
invHess *mat.SymDense
|
||
|
||
first bool // Indicator of the first iteration.
|
||
}
|
||
|
||
func (b *BFGS) Status() (Status, error) {
|
||
return b.status, b.err
|
||
}
|
||
|
||
func (*BFGS) Uses(has Available) (uses Available, err error) {
|
||
return has.gradient()
|
||
}
|
||
|
||
func (b *BFGS) Init(dim, tasks int) int {
|
||
b.status = NotTerminated
|
||
b.err = nil
|
||
return 1
|
||
}
|
||
|
||
func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||
b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks)
|
||
close(operation)
|
||
}
|
||
|
||
func (b *BFGS) initLocal(loc *Location) (Operation, error) {
|
||
if b.Linesearcher == nil {
|
||
b.Linesearcher = &Bisection{}
|
||
}
|
||
if b.ls == nil {
|
||
b.ls = &LinesearchMethod{}
|
||
}
|
||
b.ls.Linesearcher = b.Linesearcher
|
||
b.ls.NextDirectioner = b
|
||
|
||
return b.ls.Init(loc)
|
||
}
|
||
|
||
func (b *BFGS) iterateLocal(loc *Location) (Operation, error) {
|
||
return b.ls.Iterate(loc)
|
||
}
|
||
|
||
func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||
dim := len(loc.X)
|
||
b.dim = dim
|
||
b.first = true
|
||
|
||
x := mat.NewVecDense(dim, loc.X)
|
||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||
b.x.CloneFromVec(x)
|
||
b.grad.CloneFromVec(grad)
|
||
|
||
b.y.Reset()
|
||
b.s.Reset()
|
||
b.tmp.Reset()
|
||
|
||
if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
|
||
b.invHess = mat.NewSymDense(dim, nil)
|
||
} else {
|
||
b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
|
||
}
|
||
// The values of the inverse Hessian are initialized in the first call to
|
||
// NextDirection.
|
||
|
||
// Initial direction is just negative of the gradient because the Hessian
|
||
// is an identity matrix.
|
||
d := mat.NewVecDense(dim, dir)
|
||
d.ScaleVec(-1, grad)
|
||
return 1 / mat.Norm(d, 2)
|
||
}
|
||
|
||
func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||
dim := b.dim
|
||
if len(loc.X) != dim {
|
||
panic("bfgs: unexpected size mismatch")
|
||
}
|
||
if len(loc.Gradient) != dim {
|
||
panic("bfgs: unexpected size mismatch")
|
||
}
|
||
if len(dir) != dim {
|
||
panic("bfgs: unexpected size mismatch")
|
||
}
|
||
|
||
x := mat.NewVecDense(dim, loc.X)
|
||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||
|
||
// s = x_{k+1} - x_{k}
|
||
b.s.SubVec(x, &b.x)
|
||
// y = g_{k+1} - g_{k}
|
||
b.y.SubVec(grad, &b.grad)
|
||
|
||
sDotY := mat.Dot(&b.s, &b.y)
|
||
|
||
if b.first {
|
||
// Rescale the initial Hessian.
|
||
// From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed).
|
||
// Springer (2006), page 143, eq. 6.20.
|
||
yDotY := mat.Dot(&b.y, &b.y)
|
||
scale := sDotY / yDotY
|
||
for i := 0; i < dim; i++ {
|
||
for j := i; j < dim; j++ {
|
||
if i == j {
|
||
b.invHess.SetSym(i, i, scale)
|
||
} else {
|
||
b.invHess.SetSym(i, j, 0)
|
||
}
|
||
}
|
||
}
|
||
b.first = false
|
||
}
|
||
|
||
if math.Abs(sDotY) != 0 {
|
||
// Update the inverse Hessian according to the formula
|
||
//
|
||
// B_{k+1}^-1 = B_k^-1
|
||
// + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ)
|
||
// - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k).
|
||
//
|
||
// Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a
|
||
// rank-two update where B_k^-1 y_k is one vector and s_k is the other.
|
||
yBy := mat.Inner(&b.y, b.invHess, &b.y)
|
||
b.tmp.MulVec(b.invHess, &b.y)
|
||
scale := (1 + yBy/sDotY) / sDotY
|
||
b.invHess.SymRankOne(b.invHess, scale, &b.s)
|
||
b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s)
|
||
}
|
||
|
||
// Update the stored BFGS data.
|
||
b.x.CopyVec(x)
|
||
b.grad.CopyVec(grad)
|
||
|
||
// New direction is stored in dir.
|
||
d := mat.NewVecDense(dim, dir)
|
||
d.MulVec(b.invHess, grad)
|
||
d.ScaleVec(-1, d)
|
||
|
||
return 1
|
||
}
|
||
|
||
func (*BFGS) needs() struct {
|
||
Gradient bool
|
||
Hessian bool
|
||
} {
|
||
return struct {
|
||
Gradient bool
|
||
Hessian bool
|
||
}{true, false}
|
||
}
|