mirror of
https://github.com/gonum/gonum.git
synced 2025-09-27 03:26:04 +08:00

Changes made in dsp/fourier/internal/fftpack break the formatting used there, so these are reverted. There will be complaints in CI. [git-generate] gofmt -w . go generate gonum.org/v1/gonum/blas go generate gonum.org/v1/gonum/blas/gonum go generate gonum.org/v1/gonum/unit go generate gonum.org/v1/gonum/unit/constant go generate gonum.org/v1/gonum/graph/formats/dot go generate gonum.org/v1/gonum/graph/formats/rdf go generate gonum.org/v1/gonum/stat/card git checkout -- dsp/fourier/internal/fftpack
183 lines
5.3 KiB
Go
183 lines
5.3 KiB
Go
// Copyright ©2015 The Gonum Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package optimize
|
||
|
||
import (
|
||
"math"
|
||
|
||
"gonum.org/v1/gonum/mat"
|
||
)
|
||
|
||
const maxNewtonModifications = 20
|
||
|
||
var (
|
||
_ Method = (*Newton)(nil)
|
||
_ localMethod = (*Newton)(nil)
|
||
_ NextDirectioner = (*Newton)(nil)
|
||
)
|
||
|
||
// Newton implements a modified Newton's method for Hessian-based unconstrained
|
||
// minimization. It applies regularization when the Hessian is not positive
|
||
// definite, and it can converge to a local minimum from any starting point.
|
||
//
|
||
// Newton iteratively forms a quadratic model to the objective function f and
|
||
// tries to minimize this approximate model. It generates a sequence of
|
||
// locations x_k by means of
|
||
//
|
||
// solve H_k d_k = -∇f_k for d_k,
|
||
// x_{k+1} = x_k + α_k d_k,
|
||
//
|
||
// where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
|
||
// a line search.
|
||
//
|
||
// Away from a minimizer H_k may not be positive definite and d_k may not be a
|
||
// descent direction. Newton implements a Hessian modification strategy that
|
||
// adds successively larger multiples of identity to H_k until it becomes
|
||
// positive definite. Note that the repeated trial factorization of the
|
||
// modified Hessian involved in this process can be computationally expensive.
|
||
//
|
||
// If the Hessian matrix cannot be formed explicitly or if the computational
|
||
// cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
|
||
// can be used instead.
|
||
type Newton struct {
|
||
// Linesearcher is used for selecting suitable steps along the descent
|
||
// direction d. Accepted steps should satisfy at least one of the Wolfe,
|
||
// Goldstein or Armijo conditions.
|
||
// If Linesearcher == nil, an appropriate default is chosen.
|
||
Linesearcher Linesearcher
|
||
// Increase is the factor by which a scalar tau is successively increased
|
||
// so that (H + tau*I) is positive definite. Larger values reduce the
|
||
// number of trial Hessian factorizations, but also reduce the second-order
|
||
// information in H.
|
||
// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
|
||
Increase float64
|
||
// GradStopThreshold sets the threshold for stopping if the gradient norm
|
||
// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
|
||
// if it is NaN the setting is not used.
|
||
GradStopThreshold float64
|
||
|
||
status Status
|
||
err error
|
||
|
||
ls *LinesearchMethod
|
||
|
||
hess *mat.SymDense // Storage for a copy of the Hessian matrix.
|
||
chol mat.Cholesky // Storage for the Cholesky factorization.
|
||
tau float64
|
||
}
|
||
|
||
func (n *Newton) Status() (Status, error) {
|
||
return n.status, n.err
|
||
}
|
||
|
||
func (*Newton) Uses(has Available) (uses Available, err error) {
|
||
return has.hessian()
|
||
}
|
||
|
||
func (n *Newton) Init(dim, tasks int) int {
|
||
n.status = NotTerminated
|
||
n.err = nil
|
||
return 1
|
||
}
|
||
|
||
func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
|
||
n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks)
|
||
close(operation)
|
||
}
|
||
|
||
func (n *Newton) initLocal(loc *Location) (Operation, error) {
|
||
if n.Increase == 0 {
|
||
n.Increase = 5
|
||
}
|
||
if n.Increase <= 1 {
|
||
panic("optimize: Newton.Increase must be greater than 1")
|
||
}
|
||
if n.Linesearcher == nil {
|
||
n.Linesearcher = &Bisection{}
|
||
}
|
||
if n.ls == nil {
|
||
n.ls = &LinesearchMethod{}
|
||
}
|
||
n.ls.Linesearcher = n.Linesearcher
|
||
n.ls.NextDirectioner = n
|
||
return n.ls.Init(loc)
|
||
}
|
||
|
||
func (n *Newton) iterateLocal(loc *Location) (Operation, error) {
|
||
return n.ls.Iterate(loc)
|
||
}
|
||
|
||
func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
|
||
dim := len(loc.X)
|
||
n.hess = resizeSymDense(n.hess, dim)
|
||
n.tau = 0
|
||
return n.NextDirection(loc, dir)
|
||
}
|
||
|
||
func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
|
||
// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
|
||
// the Identity) from Nocedal, Wright (2006), 2nd edition.
|
||
|
||
dim := len(loc.X)
|
||
d := mat.NewVecDense(dim, dir)
|
||
grad := mat.NewVecDense(dim, loc.Gradient)
|
||
n.hess.CopySym(loc.Hessian)
|
||
|
||
// Find the smallest diagonal entry of the Hessian.
|
||
minA := n.hess.At(0, 0)
|
||
for i := 1; i < dim; i++ {
|
||
a := n.hess.At(i, i)
|
||
if a < minA {
|
||
minA = a
|
||
}
|
||
}
|
||
// If the smallest diagonal entry is positive, the Hessian may be positive
|
||
// definite, and so first attempt to apply the Cholesky factorization to
|
||
// the un-modified Hessian. If the smallest entry is negative, use the
|
||
// final tau from the last iteration if regularization was needed,
|
||
// otherwise guess an appropriate value for tau.
|
||
if minA > 0 {
|
||
n.tau = 0
|
||
} else if n.tau == 0 {
|
||
n.tau = -minA + 0.001
|
||
}
|
||
|
||
for k := 0; k < maxNewtonModifications; k++ {
|
||
if n.tau != 0 {
|
||
// Add a multiple of identity to the Hessian.
|
||
for i := 0; i < dim; i++ {
|
||
n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
|
||
}
|
||
}
|
||
// Try to apply the Cholesky factorization.
|
||
pd := n.chol.Factorize(n.hess)
|
||
if pd {
|
||
// Store the solution in d's backing array, dir.
|
||
err := n.chol.SolveVecTo(d, grad)
|
||
if err == nil {
|
||
d.ScaleVec(-1, d)
|
||
return 1
|
||
}
|
||
}
|
||
// Modified Hessian is not PD, so increase tau.
|
||
n.tau = math.Max(n.Increase*n.tau, 0.001)
|
||
}
|
||
|
||
// Hessian modification failed to get a PD matrix. Return the negative
|
||
// gradient as the descent direction.
|
||
d.ScaleVec(-1, grad)
|
||
return 1
|
||
}
|
||
|
||
func (n *Newton) needs() struct {
|
||
Gradient bool
|
||
Hessian bool
|
||
} {
|
||
return struct {
|
||
Gradient bool
|
||
Hessian bool
|
||
}{true, true}
|
||
}
|