optimize: Refactor gradient convergence and remove DefaultSettings (#772)

* optimize: Refactor gradient convergence and remove DefaultSettings The current API design makes it easy to make a mistake in not using the DefaultSettings. This change makes the zero value of Settings do the 'right thing'. The remaining setting that is used by the DefaultSettings is to change the behavior of the GradientTolerance. This was necessary because gradient-based Local methods (BFGS, LBFGS, CG, etc.) typically _define_ convergence by the value of the gradient, while Global methods (CMAES, GuessAndCheck) are defined by _not_ converging when the gradient is small. The problem is to have two completely different default behaviors without knowing the Method. The solution is to treat a very small value of the gradient as a method-based convergence, in the same way that a small spread of data is a convergence of CMAES. Thus, the default behavior, from the perspective of Settings, is never to converge based on the gradient, but all of the Local methods will converge when a value close to the minimum is found. This default value is set to a very small value, such that users should not want a smaller value. A user can thus still set a (more reasonable) convergence value through settings. Fixes 677.
2025-10-04 23:02:42 +08:00 · 2018-12-23 08:17:27 -05:00
parent ff0fdabd90
commit b545e3e77e
14 changed files with 110 additions and 106 deletions
--- a/optimize/bfgs.go
+++ b/optimize/bfgs.go
@@ -20,6 +20,10 @@ type BFGS struct {
 	// Accepted steps should satisfy the strong Wolfe conditions.
 	// If Linesearcher == nil, an appropriate default is chosen.
 	Linesearcher Linesearcher
+	// GradStopThreshold sets the threshold for stopping if the gradient norm
+	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
+	// if it is NaN the setting is not used.
+	GradStopThreshold float64

 	ls *LinesearchMethod

@@ -49,7 +53,7 @@ func (b *BFGS) Init(dim, tasks int) int {
 }

 func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	b.status, b.err = localOptimizer{}.run(b, operation, result, tasks)
+	b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/cg.go
+++ b/optimize/cg.go
@@ -87,6 +87,10 @@ type CG struct {
 	// If AngleRestartThreshold is 0, it will be set to -0.9.
 	// CG will panic if AngleRestartThreshold is not in the interval [-1, 0].
 	AngleRestartThreshold float64
+	// GradStopThreshold sets the threshold for stopping if the gradient norm
+	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
+	// if it is NaN the setting is not used.
+	GradStopThreshold float64

 	ls *LinesearchMethod

@@ -112,7 +116,7 @@ func (cg *CG) Init(dim, tasks int) int {
 }

 func (cg *CG) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	cg.status, cg.err = localOptimizer{}.run(cg, operation, result, tasks)
+	cg.status, cg.err = localOptimizer{}.run(cg, cg.GradStopThreshold, operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/cmaes_test.go
+++ b/optimize/cmaes_test.go
@@ -16,6 +16,19 @@ import (
 	"gonum.org/v1/gonum/optimize/functions"
 )

+type functionThresholdConverger struct {
+	Threshold float64
+}
+
+func (functionThresholdConverger) Init(dim int) {}
+
+func (f functionThresholdConverger) Converged(loc *Location) Status {
+	if loc.F < f.Threshold {
+		return FunctionThreshold
+	}
+	return NotTerminated
+}
+
 type cmaTestCase struct {
 	dim      int
 	problem  Problem
@@ -41,8 +54,7 @@ func cmaTestCases() []cmaTestCase {
 				StopLogDet: math.NaN(),
 			},
 			settings: &Settings{
-				FunctionThreshold: 0.01,
-				Converger:         NeverTerminate{},
+				Converger: functionThresholdConverger{0.01},
 			},
 			good: func(result *Result, err error, concurrent int) error {
 				if result.Status != FunctionThreshold {
@@ -63,7 +75,6 @@ func cmaTestCases() []cmaTestCase {
 			},
 			method: &CmaEsChol{},
 			settings: &Settings{
-				FunctionThreshold: math.Inf(-1),
 				Converger: NeverTerminate{},
 			},
 			good: func(result *Result, err error, concurrent int) error {
@@ -88,7 +99,6 @@ func cmaTestCases() []cmaTestCase {
 				ForgetBest: true, // Otherwise may get an update at the end.
 			},
 			settings: &Settings{
-				FunctionThreshold: math.Inf(-1),
 				MajorIterations: 10,
 				Converger:       NeverTerminate{},
 			},
@@ -118,7 +128,6 @@ func cmaTestCases() []cmaTestCase {
 				Population: 100,
 			},
 			settings: &Settings{
-				FunctionThreshold: math.Inf(-1),
 				FuncEvaluations: 250, // Somewhere in the middle of an iteration.
 				Converger:       NeverTerminate{},
 			},
@@ -150,7 +159,6 @@ func cmaTestCases() []cmaTestCase {
 				Population: 100, // Increase the population size to reduce noise.
 			},
 			settings: &Settings{
-				FunctionThreshold: math.Inf(-1),
 				Converger: NeverTerminate{},
 			},
 			good: func(result *Result, err error, concurrent int) error {
@@ -176,7 +184,6 @@ func cmaTestCases() []cmaTestCase {
 				ForgetBest:   true, // So that if it accidentally finds a better place we still converge to the minimum.
 			},
 			settings: &Settings{
-				FunctionThreshold: math.Inf(-1),
 				Converger: NeverTerminate{},
 			},
 			good: func(result *Result, err error, concurrent int) error {
--- a/optimize/global.go
+++ b/optimize/global.go
@@ -1,20 +0,0 @@
-// Copyright ©2016 The Gonum Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package optimize
-
-import (
-	"math"
-)
-
-// DefaultSettingsGlobal returns the default settings for a global optimization.
-func DefaultSettingsGlobal() *Settings {
-	return &Settings{
-		FunctionThreshold: math.Inf(-1),
-		Converger: &FunctionConverge{
-			Absolute:   1e-10,
-			Iterations: 100,
-		},
-	}
-}
--- a/optimize/gradientdescent.go
+++ b/optimize/gradientdescent.go
@@ -15,6 +15,10 @@ type GradientDescent struct {
 	// StepSizer determines the initial step size along each direction.
 	// If StepSizer is nil, a reasonable default will be chosen.
 	StepSizer StepSizer
+	// GradStopThreshold sets the threshold for stopping if the gradient norm
+	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
+	// if it is NaN the setting is not used.
+	GradStopThreshold float64

 	ls *LinesearchMethod

@@ -33,7 +37,7 @@ func (g *GradientDescent) Init(dim, tasks int) int {
 }

 func (g *GradientDescent) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	g.status, g.err = localOptimizer{}.run(g, operation, result, tasks)
+	g.status, g.err = localOptimizer{}.run(g, g.GradStopThreshold, operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/guessandcheck_test.go
+++ b/optimize/guessandcheck_test.go
@@ -29,7 +29,7 @@ func TestGuessAndCheck(t *testing.T) {
 	initX := make([]float64, dim)
 	Minimize(problem, initX, nil, &GuessAndCheck{Rander: d})

-	settings := DefaultSettingsGlobal()
+	settings := &Settings{}
 	settings.Concurrent = 5
 	settings.MajorIterations = 15
 	Minimize(problem, initX, settings, &GuessAndCheck{Rander: d})
--- a/optimize/lbfgs.go
+++ b/optimize/lbfgs.go
@@ -26,6 +26,10 @@ type LBFGS struct {
 	// Store is the size of the limited-memory storage.
 	// If Store is 0, it will be defaulted to 15.
 	Store int
+	// GradStopThreshold sets the threshold for stopping if the gradient norm
+	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
+	// if it is NaN the setting is not used.
+	GradStopThreshold float64

 	status Status
 	err    error
@@ -55,7 +59,7 @@ func (l *LBFGS) Init(dim, tasks int) int {
 }

 func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	l.status, l.err = localOptimizer{}.run(l, operation, result, tasks)
+	l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/local.go
+++ b/optimize/local.go
@@ -6,6 +6,8 @@ package optimize

 import (
 	"math"
+
+	"gonum.org/v1/gonum/floats"
 )

 // localOptimizer is a helper type for running an optimization using a LocalMethod.
@@ -15,7 +17,7 @@ type localOptimizer struct{}
 // must close the operation channel at the conclusion of the optimization. This
 // provides a happens before relationship between the return of status and the
 // closure of operation, and thus a call to method.Status (if necessary).
-func (l localOptimizer) run(method localMethod, operation chan<- Task, result <-chan Task, tasks []Task) (Status, error) {
+func (l localOptimizer) run(method localMethod, gradThresh float64, operation chan<- Task, result <-chan Task, tasks []Task) (Status, error) {
 	// Local methods start with a fully-specified initial location.
 	task := tasks[0]
 	task = l.initialLocation(operation, result, task, method)
@@ -23,7 +25,7 @@ func (l localOptimizer) run(method localMethod, operation chan<- Task, result <-
 		l.finish(operation, result)
 		return NotTerminated, nil
 	}
-	status, err := l.checkStartingLocation(task)
+	status, err := l.checkStartingLocation(task, gradThresh)
 	if err != nil {
 		l.finishMethodDone(operation, result, task)
 		return status, err
@@ -51,6 +53,14 @@ Loop:
 		switch r.Op {
 		case PostIteration:
 			break Loop
+		case MajorIteration:
+			// The last operation was a MajorIteration. Check if the gradient
+			// is below the threshold.
+			if status := l.checkGradientConvergence(r.Gradient, gradThresh); status != NotTerminated {
+				l.finishMethodDone(operation, result, task)
+				return GradientThreshold, nil
+			}
+			fallthrough
 		default:
 			op, err := method.iterateLocal(r.Location)
 			if err != nil {
@@ -91,7 +101,7 @@ func (l localOptimizer) initialLocation(operation chan<- Task, result <-chan Tas
 	return <-result
 }

-func (localOptimizer) checkStartingLocation(task Task) (Status, error) {
+func (l localOptimizer) checkStartingLocation(task Task, gradThresh float64) (Status, error) {
 	if math.IsInf(task.F, 1) || math.IsNaN(task.F) {
 		return Failure, ErrFunc(task.F)
 	}
@@ -100,7 +110,21 @@ func (localOptimizer) checkStartingLocation(task Task) (Status, error) {
 			return Failure, ErrGrad{Grad: v, Index: i}
 		}
 	}
-	return NotTerminated, nil
+	status := l.checkGradientConvergence(task.Gradient, gradThresh)
+	return status, nil
+}
+
+func (localOptimizer) checkGradientConvergence(gradient []float64, gradThresh float64) Status {
+	if gradient == nil || math.IsNaN(gradThresh) {
+		return NotTerminated
+	}
+	if gradThresh == 0 {
+		gradThresh = defaultGradientAbsTol
+	}
+	if norm := floats.Norm(gradient, math.Inf(1)); norm < gradThresh {
+		return GradientThreshold
+	}
+	return NotTerminated
 }

 // finish completes the channel operations to finish an optimization.
--- a/optimize/local_example_test.go
+++ b/optimize/local_example_test.go
@@ -19,12 +19,7 @@ func ExampleMinimize() {
 	}

 	x := []float64{1.3, 0.7, 0.8, 1.9, 1.2}
-	settings := optimize.DefaultSettingsLocal()
-	settings.Recorder = nil
-	settings.GradientThreshold = 1e-12
-	settings.Converger = optimize.NeverTerminate{}
-
-	result, err := optimize.Minimize(p, x, settings, &optimize.BFGS{})
+	result, err := optimize.Minimize(p, x, nil, nil)
 	if err != nil {
 		log.Fatal(err)
 	}
--- a/optimize/minimize.go
+++ b/optimize/minimize.go
@@ -101,15 +101,11 @@ type Method interface {
 // Some Methods do not require an initial location, but initX must still be
 // specified for the dimension of the optimization problem.
 //
-// The third argument contains the settings for the minimization. The
-// DefaultSettingsLocal and DefaultSettingsGlobal functions can be called for
-// different default settings depending on the optimization method. If
-// settings is nil, DefaultSettingsLocal is used. All settings will be honored
-// for all Methods, even if that setting is counter-productive to the method.
-// However, the information used to check the Settings, and the times at which
-// they are checked, are controlled by the Method. For example, if the Method
-// never evaluates the gradient of the function then GradientThreshold will not
-// be checked. Minimize cannot guarantee strict adherence to the bounds
+// The third argument contains the settings for the minimization. If settings
+// is nil, the zero value will be used, see the documentation of the Settings
+// type for more information, and see the warning below. All settings will be
+// honored for all Methods, even if that setting is counter-productive to the
+// method. Minimize cannot guarantee strict adherence to the evaluation bounds
 // specified when performing concurrent evaluations and updates.
 //
 // The final argument is the optimization method to use. If method == nil, then
@@ -131,7 +127,7 @@ func Minimize(p Problem, initX []float64, settings *Settings, method Method) (*R
 		method = getDefaultMethod(&p)
 	}
 	if settings == nil {
-		settings = DefaultSettingsLocal()
+		settings = &Settings{}
 	}
 	stats := &Stats{}
 	dim := len(initX)
@@ -147,10 +143,7 @@ func Minimize(p Problem, initX []float64, settings *Settings, method Method) (*R

 	converger := settings.Converger
 	if converger == nil {
-		converger = &FunctionConverge{
-			Absolute:   1e-10,
-			Iterations: 100,
-		}
+		converger = defaultFunctionConverge()
 	}
 	converger.Init(dim)

@@ -370,6 +363,13 @@ func minimize(prob *Problem, method Method, settings *Settings, converger Conver
 	return finalStatus, finalError
 }

+func defaultFunctionConverge() *FunctionConverge {
+	return &FunctionConverge{
+		Absolute:   1e-10,
+		Iterations: 100,
+	}
+}
+
 // newLocation allocates a new locatian structure of the appropriate size. It
 // allocates memory based on the dimension and the values in Needs.
 func newLocation(dim int, method Needser) *Location {
@@ -513,15 +513,12 @@ func checkLocationConvergence(loc *Location, settings *Settings, converger Conve
 	if math.IsInf(loc.F, -1) {
 		return FunctionNegativeInfinity
 	}
-	if loc.Gradient != nil {
+	if loc.Gradient != nil && settings.GradientThreshold > 0 {
 		norm := floats.Norm(loc.Gradient, math.Inf(1))
 		if norm < settings.GradientThreshold {
 			return GradientThreshold
 		}
 	}
-	if loc.F < settings.FunctionThreshold {
-		return FunctionThreshold
-	}
 	return converger.Converged(loc)
 }

--- a/optimize/neldermead.go
+++ b/optimize/neldermead.go
@@ -5,6 +5,7 @@
 package optimize

 import (
+	"math"
 	"sort"

 	"gonum.org/v1/gonum/floats"
@@ -96,7 +97,7 @@ func (n *NelderMead) Init(dim, tasks int) int {
 }

 func (n *NelderMead) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	n.status, n.err = localOptimizer{}.run(n, operation, result, tasks)
+	n.status, n.err = localOptimizer{}.run(n, math.NaN(), operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/newton.go
+++ b/optimize/newton.go
@@ -45,6 +45,10 @@ type Newton struct {
 	// information in H.
 	// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
 	Increase float64
+	// GradStopThreshold sets the threshold for stopping if the gradient norm
+	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
+	// if it is NaN the setting is not used.
+	GradStopThreshold float64

 	status Status
 	err    error
@@ -67,7 +71,7 @@ func (n *Newton) Init(dim, tasks int) int {
 }

 func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
-	n.status, n.err = localOptimizer{}.run(n, operation, result, tasks)
+	n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks)
 	close(operation)
 	return
 }
--- a/optimize/types.go
+++ b/optimize/types.go
@@ -7,13 +7,12 @@ package optimize
 import (
 	"errors"
 	"fmt"
-	"math"
 	"time"

 	"gonum.org/v1/gonum/mat"
 )

-const defaultGradientAbsTol = 1e-6
+const defaultGradientAbsTol = 1e-12

 // Operation represents the set of operations commanded by Method at each
 // iteration. It is a bitmap of various Iteration and Evaluation constants.
@@ -161,10 +160,9 @@ func (p Problem) satisfies(method Needser) error {
 }

 // Settings represents settings of the optimization run. It contains initial
-// settings, convergence information, and Recorder information. In general, users
-// should use DefaultSettings rather than constructing a Settings literal.
-//
-// If Recorder is nil, no information will be recorded.
+// settings, convergence information, and Recorder information. Convergence
+// settings are only checked at MajorIterations, while Evaluation thresholds
+// are checked at every Operation. See the field comments for default values.
 type Settings struct {
 	// InitValues specifies properties (function value, gradient, etc.) known
 	// at the initial location passed to Minimize. If InitValues is non-nil, then
@@ -172,17 +170,13 @@ type Settings struct {
 	// and other fields may be specified.
 	InitValues *Location

-	// FunctionThreshold is the threshold for acceptably small values of the
-	// objective function. FunctionThreshold status is returned if
-	// the objective function is less than this value.
-	// The default value is -inf.
-	FunctionThreshold float64
-
-	// GradientThreshold determines the accuracy to which the minimum is found.
-	// GradientThreshold status is returned if the infinity norm of
-	// the gradient is less than this value.
-	// Has no effect if gradient information is not used.
-	// The default value is 1e-6.
+	// GradientThreshold stops optimization with GradientThreshold status if the
+	// infinity norm of the gradient is less than this value. This defaults to
+	// a value of 0 (and so gradient convergence is not checked), however note
+	// that many Methods (LBFGS, CG, etc.) will converge with a small value of
+	// the gradient, and so to fully disable this setting the Method may need to
+	// be modified.
+	// This setting has no effect if the gradient is not used by the Method.
 	GradientThreshold float64

 	// Converger checks if the optimization has converged based on the (history
@@ -239,19 +233,6 @@ type Settings struct {
 	Concurrent int
 }

-// DefaultSettingsLocal returns a new Settings struct that contains default settings
-// for running a local optimization.
-func DefaultSettingsLocal() *Settings {
-	return &Settings{
-		GradientThreshold: defaultGradientAbsTol,
-		FunctionThreshold: math.Inf(-1),
-		Converger: &FunctionConverge{
-			Absolute:   1e-10,
-			Iterations: 20,
-		},
-	}
-}
-
 // resize takes x and returns a slice of length dim. It returns a resliced x
 // if cap(x) >= dim, and a new slice otherwise.
 func resize(x []float64, dim int) []float64 {
--- a/optimize/unconstrained_test.go
+++ b/optimize/unconstrained_test.go
@@ -1160,8 +1160,8 @@ func testLocal(t *testing.T, tests []unconstrainedTest, method Method) {
 			continue
 		}

-		settings := DefaultSettingsLocal()
-		settings.Recorder = nil
+		settings := &Settings{}
+		settings.Converger = defaultFunctionConverge()
 		if method != nil && method.Needs().Gradient {
 			// Turn off function convergence checks for gradient-based methods.
 			settings.Converger = NeverTerminate{}
@@ -1289,11 +1289,10 @@ func TestIssue76(t *testing.T) {
 	// Location very close to the minimum.
 	x := []float64{-11.594439904886773, 13.203630051265385, -0.40343948776868443, 0.2367787746745986}
 	s := &Settings{
-		FunctionThreshold: math.Inf(-1),
-		GradientThreshold: 1e-14,
 		MajorIterations: 1000000,
 	}
 	m := &GradientDescent{
+		GradStopThreshold: 1e-14,
 		Linesearcher:      &Backtracking{},
 	}
 	// We are not interested in the error, only in the returned status.
@@ -1312,7 +1311,7 @@ func TestNelderMeadOneD(t *testing.T) {
 	}
 	x := []float64{10}
 	m := &NelderMead{}
-	s := DefaultSettingsLocal()
+	var s *Settings
 	result, err := Minimize(p, x, s, m)
 	if err != nil {
 		t.Errorf(err.Error())