// Copyright ©2014 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package optimize import ( "gonum.org/v1/gonum/floats" ) var ( _ Method = (*LBFGS)(nil) _ localMethod = (*LBFGS)(nil) _ NextDirectioner = (*LBFGS)(nil) ) // LBFGS implements the limited-memory BFGS method for gradient-based // unconstrained minimization. // // It stores a modified version of the inverse Hessian approximation H // implicitly from the last Store iterations while the normal BFGS method // stores and manipulates H directly as a dense matrix. Therefore LBFGS is more // appropriate than BFGS for large problems as the cost of LBFGS scales as // O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of // LBFGS may also make it perform better than BFGS for functions with Hessians // that vary rapidly spatially. type LBFGS struct { // Linesearcher selects suitable steps along the descent direction. // Accepted steps should satisfy the strong Wolfe conditions. // If Linesearcher is nil, a reasonable default will be chosen. Linesearcher Linesearcher // Store is the size of the limited-memory storage. // If Store is 0, it will be defaulted to 15. Store int // GradStopThreshold sets the threshold for stopping if the gradient norm // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and // if it is NaN the setting is not used. GradStopThreshold float64 status Status err error ls *LinesearchMethod dim int // Dimension of the problem x []float64 // Location at the last major iteration grad []float64 // Gradient at the last major iteration // History oldest int // Index of the oldest element of the history y [][]float64 // Last Store values of y s [][]float64 // Last Store values of s rho []float64 // Last Store values of rho a []float64 // Cache of Hessian updates } func (l *LBFGS) Status() (Status, error) { return l.status, l.err } func (*LBFGS) Uses(has Available) (uses Available, err error) { return has.gradient() } func (l *LBFGS) Init(dim, tasks int) int { l.status = NotTerminated l.err = nil return 1 } func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks) close(operation) } func (l *LBFGS) initLocal(loc *Location) (Operation, error) { if l.Linesearcher == nil { l.Linesearcher = &Bisection{} } if l.Store == 0 { l.Store = 15 } if l.ls == nil { l.ls = &LinesearchMethod{} } l.ls.Linesearcher = l.Linesearcher l.ls.NextDirectioner = l return l.ls.Init(loc) } func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) { return l.ls.Iterate(loc) } func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { dim := len(loc.X) l.dim = dim l.oldest = 0 l.a = resize(l.a, l.Store) l.rho = resize(l.rho, l.Store) l.y = l.initHistory(l.y) l.s = l.initHistory(l.s) l.x = resize(l.x, dim) copy(l.x, loc.X) l.grad = resize(l.grad, dim) copy(l.grad, loc.Gradient) copy(dir, loc.Gradient) floats.Scale(-1, dir) return 1 / floats.Norm(dir, 2) } func (l *LBFGS) initHistory(hist [][]float64) [][]float64 { c := cap(hist) if c < l.Store { n := make([][]float64, l.Store-c) hist = append(hist[:c], n...) } hist = hist[:l.Store] for i := range hist { hist[i] = resize(hist[i], l.dim) for j := range hist[i] { hist[i][j] = 0 } } return hist } func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { // Uses two-loop correction as described in // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. if len(loc.X) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(loc.Gradient) != l.dim { panic("lbfgs: unexpected size mismatch") } if len(dir) != l.dim { panic("lbfgs: unexpected size mismatch") } y := l.y[l.oldest] floats.SubTo(y, loc.Gradient, l.grad) s := l.s[l.oldest] floats.SubTo(s, loc.X, l.x) sDotY := floats.Dot(s, y) l.rho[l.oldest] = 1 / sDotY l.oldest = (l.oldest + 1) % l.Store copy(l.x, loc.X) copy(l.grad, loc.Gradient) copy(dir, loc.Gradient) // Start with the most recent element and go backward, for i := 0; i < l.Store; i++ { idx := l.oldest - i - 1 if idx < 0 { idx += l.Store } l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) floats.AddScaled(dir, -l.a[idx], l.y[idx]) } // Scale the initial Hessian. gamma := sDotY / floats.Dot(y, y) floats.Scale(gamma, dir) // Start with the oldest element and go forward. for i := 0; i < l.Store; i++ { idx := i + l.oldest if idx >= l.Store { idx -= l.Store } beta := l.rho[idx] * floats.Dot(l.y[idx], dir) floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) } // dir contains H^{-1} * g, so flip the direction for minimization. floats.Scale(-1, dir) return 1 } func (*LBFGS) needs() struct { Gradient bool Hessian bool } { return struct { Gradient bool Hessian bool }{true, false} }