mirror of
				https://github.com/gonum/gonum.git
				synced 2025-10-31 10:36:30 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			386 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			386 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright ©2015 The Gonum Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package optimize
 | |
| 
 | |
| import "math"
 | |
| 
 | |
| // MoreThuente is a Linesearcher that finds steps that satisfy both the
 | |
| // sufficient decrease and curvature conditions (the strong Wolfe conditions).
 | |
| //
 | |
| // References:
 | |
| //  - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient
 | |
| //    Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307
 | |
| type MoreThuente struct {
 | |
| 	// DecreaseFactor is the constant factor in the sufficient decrease
 | |
| 	// (Armijo) condition.
 | |
| 	// It must be in the interval [0, 1). The default value is 0.
 | |
| 	DecreaseFactor float64
 | |
| 	// CurvatureFactor is the constant factor in the Wolfe conditions. Smaller
 | |
| 	// values result in a more exact line search.
 | |
| 	// A set value must be in the interval (0, 1). If it is zero, it will be
 | |
| 	// defaulted to 0.9.
 | |
| 	CurvatureFactor float64
 | |
| 	// StepTolerance sets the minimum acceptable width for the linesearch
 | |
| 	// interval. If the relative interval length is less than this value,
 | |
| 	// ErrLinesearcherFailure is returned.
 | |
| 	// It must be non-negative. If it is zero, it will be defaulted to 1e-10.
 | |
| 	StepTolerance float64
 | |
| 
 | |
| 	// MinimumStep is the minimum step that the linesearcher will take.
 | |
| 	// It must be non-negative and less than MaximumStep. Defaults to no
 | |
| 	// minimum (a value of 0).
 | |
| 	MinimumStep float64
 | |
| 	// MaximumStep is the maximum step that the linesearcher will take.
 | |
| 	// It must be greater than MinimumStep. If it is zero, it will be defaulted
 | |
| 	// to 1e20.
 | |
| 	MaximumStep float64
 | |
| 
 | |
| 	bracketed bool    // Indicates if a minimum has been bracketed.
 | |
| 	fInit     float64 // Function value at step = 0.
 | |
| 	gInit     float64 // Derivative value at step = 0.
 | |
| 
 | |
| 	// When stage is 1, the algorithm updates the interval given by x and y
 | |
| 	// so that it contains a minimizer of the modified function
 | |
| 	//  psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0).
 | |
| 	// When stage is 2, the interval is updated so that it contains a minimizer
 | |
| 	// of f.
 | |
| 	stage int
 | |
| 
 | |
| 	step         float64    // Current step.
 | |
| 	lower, upper float64    // Lower and upper bounds on the next step.
 | |
| 	x            float64    // Endpoint of the interval with a lower function value.
 | |
| 	fx, gx       float64    // Data at x.
 | |
| 	y            float64    // The other endpoint.
 | |
| 	fy, gy       float64    // Data at y.
 | |
| 	width        [2]float64 // Width of the interval at two previous iterations.
 | |
| }
 | |
| 
 | |
| const (
 | |
| 	mtMinGrowthFactor float64 = 1.1
 | |
| 	mtMaxGrowthFactor float64 = 4
 | |
| )
 | |
| 
 | |
| func (mt *MoreThuente) Init(f, g float64, step float64) Operation {
 | |
| 	// Based on the original Fortran code that is available, for example, from
 | |
| 	//  http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/
 | |
| 	// as part of
 | |
| 	//  MINPACK-2 Project. November 1993.
 | |
| 	//  Argonne National Laboratory and University of Minnesota.
 | |
| 	//  Brett M. Averick, Richard G. Carter, and Jorge J. Moré.
 | |
| 
 | |
| 	if g >= 0 {
 | |
| 		panic("morethuente: initial derivative is non-negative")
 | |
| 	}
 | |
| 	if step <= 0 {
 | |
| 		panic("morethuente: invalid initial step")
 | |
| 	}
 | |
| 
 | |
| 	if mt.CurvatureFactor == 0 {
 | |
| 		mt.CurvatureFactor = 0.9
 | |
| 	}
 | |
| 	if mt.StepTolerance == 0 {
 | |
| 		mt.StepTolerance = 1e-10
 | |
| 	}
 | |
| 	if mt.MaximumStep == 0 {
 | |
| 		mt.MaximumStep = 1e20
 | |
| 	}
 | |
| 
 | |
| 	if mt.MinimumStep < 0 {
 | |
| 		panic("morethuente: minimum step is negative")
 | |
| 	}
 | |
| 	if mt.MaximumStep <= mt.MinimumStep {
 | |
| 		panic("morethuente: maximum step is not greater than minimum step")
 | |
| 	}
 | |
| 	if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 {
 | |
| 		panic("morethuente: invalid decrease factor")
 | |
| 	}
 | |
| 	if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 {
 | |
| 		panic("morethuente: invalid curvature factor")
 | |
| 	}
 | |
| 	if mt.StepTolerance <= 0 {
 | |
| 		panic("morethuente: step tolerance is not positive")
 | |
| 	}
 | |
| 
 | |
| 	if step < mt.MinimumStep {
 | |
| 		step = mt.MinimumStep
 | |
| 	}
 | |
| 	if step > mt.MaximumStep {
 | |
| 		step = mt.MaximumStep
 | |
| 	}
 | |
| 
 | |
| 	mt.bracketed = false
 | |
| 	mt.stage = 1
 | |
| 	mt.fInit = f
 | |
| 	mt.gInit = g
 | |
| 
 | |
| 	mt.x, mt.fx, mt.gx = 0, f, g
 | |
| 	mt.y, mt.fy, mt.gy = 0, f, g
 | |
| 
 | |
| 	mt.lower = 0
 | |
| 	mt.upper = step + mtMaxGrowthFactor*step
 | |
| 
 | |
| 	mt.width[0] = mt.MaximumStep - mt.MinimumStep
 | |
| 	mt.width[1] = 2 * mt.width[0]
 | |
| 
 | |
| 	mt.step = step
 | |
| 	return FuncEvaluation | GradEvaluation
 | |
| }
 | |
| 
 | |
| func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) {
 | |
| 	if mt.stage == 0 {
 | |
| 		panic("morethuente: Init has not been called")
 | |
| 	}
 | |
| 
 | |
| 	gTest := mt.DecreaseFactor * mt.gInit
 | |
| 	fTest := mt.fInit + mt.step*gTest
 | |
| 
 | |
| 	if mt.bracketed {
 | |
| 		if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
 | |
| 			// step contains the best step found (see below).
 | |
| 			return NoOperation, mt.step, ErrLinesearcherFailure
 | |
| 		}
 | |
| 	}
 | |
| 	if mt.step == mt.MaximumStep && f <= fTest && g <= gTest {
 | |
| 		return NoOperation, mt.step, ErrLinesearcherBound
 | |
| 	}
 | |
| 	if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) {
 | |
| 		return NoOperation, mt.step, ErrLinesearcherFailure
 | |
| 	}
 | |
| 
 | |
| 	// Test for convergence.
 | |
| 	if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) {
 | |
| 		mt.stage = 0
 | |
| 		return MajorIteration, mt.step, nil
 | |
| 	}
 | |
| 
 | |
| 	if mt.stage == 1 && f <= fTest && g >= 0 {
 | |
| 		mt.stage = 2
 | |
| 	}
 | |
| 
 | |
| 	if mt.stage == 1 && f <= mt.fx && f > fTest {
 | |
| 		// Lower function value but the decrease is not sufficient .
 | |
| 
 | |
| 		// Compute values and derivatives of the modified function at step, x, y.
 | |
| 		fm := f - mt.step*gTest
 | |
| 		fxm := mt.fx - mt.x*gTest
 | |
| 		fym := mt.fy - mt.y*gTest
 | |
| 		gm := g - gTest
 | |
| 		gxm := mt.gx - gTest
 | |
| 		gym := mt.gy - gTest
 | |
| 		// Update x, y and step.
 | |
| 		mt.nextStep(fxm, gxm, fym, gym, fm, gm)
 | |
| 		// Recover values and derivates of the non-modified function at x and y.
 | |
| 		mt.fx = fxm + mt.x*gTest
 | |
| 		mt.fy = fym + mt.y*gTest
 | |
| 		mt.gx = gxm + gTest
 | |
| 		mt.gy = gym + gTest
 | |
| 	} else {
 | |
| 		// Update x, y and step.
 | |
| 		mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g)
 | |
| 	}
 | |
| 
 | |
| 	if mt.bracketed {
 | |
| 		// Monitor the length of the bracketing interval. If the interval has
 | |
| 		// not been reduced sufficiently after two steps, use bisection to
 | |
| 		// force its length to zero.
 | |
| 		width := mt.y - mt.x
 | |
| 		if math.Abs(width) >= 2.0/3*mt.width[1] {
 | |
| 			mt.step = mt.x + 0.5*width
 | |
| 		}
 | |
| 		mt.width[0], mt.width[1] = math.Abs(width), mt.width[0]
 | |
| 	}
 | |
| 
 | |
| 	if mt.bracketed {
 | |
| 		mt.lower = math.Min(mt.x, mt.y)
 | |
| 		mt.upper = math.Max(mt.x, mt.y)
 | |
| 	} else {
 | |
| 		mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x)
 | |
| 		mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x)
 | |
| 	}
 | |
| 
 | |
| 	// Force the step to be in [MinimumStep, MaximumStep].
 | |
| 	mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep))
 | |
| 
 | |
| 	if mt.bracketed {
 | |
| 		if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper {
 | |
| 			// If further progress is not possible, set step to the best step
 | |
| 			// obtained during the search.
 | |
| 			mt.step = mt.x
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return FuncEvaluation | GradEvaluation, mt.step, nil
 | |
| }
 | |
| 
 | |
| // nextStep computes the next safeguarded step and updates the interval that
 | |
| // contains a step that satisfies the sufficient decrease and curvature
 | |
| // conditions.
 | |
| func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) {
 | |
| 	x := mt.x
 | |
| 	y := mt.y
 | |
| 	step := mt.step
 | |
| 
 | |
| 	gNeg := g < 0
 | |
| 	if gx < 0 {
 | |
| 		gNeg = !gNeg
 | |
| 	}
 | |
| 
 | |
| 	var next float64
 | |
| 	var bracketed bool
 | |
| 	switch {
 | |
| 	case f > fx:
 | |
| 		// A higher function value. The minimum is bracketed between x and step.
 | |
| 		// We want the next step to be closer to x because the function value
 | |
| 		// there is lower.
 | |
| 
 | |
| 		theta := 3*(fx-f)/(step-x) + gx + g
 | |
| 		s := math.Max(math.Abs(gx), math.Abs(g))
 | |
| 		s = math.Max(s, math.Abs(theta))
 | |
| 		gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
 | |
| 		if step < x {
 | |
| 			gamma *= -1
 | |
| 		}
 | |
| 		p := gamma - gx + theta
 | |
| 		q := gamma - gx + gamma + g
 | |
| 		r := p / q
 | |
| 		stpc := x + r*(step-x)
 | |
| 		stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x)
 | |
| 
 | |
| 		if math.Abs(stpc-x) < math.Abs(stpq-x) {
 | |
| 			// The cubic step is closer to x than the quadratic step.
 | |
| 			// Take the cubic step.
 | |
| 			next = stpc
 | |
| 		} else {
 | |
| 			// If f is much larger than fx, then the quadratic step may be too
 | |
| 			// close to x. Therefore heuristically take the average of the
 | |
| 			// cubic and quadratic steps.
 | |
| 			next = stpc + (stpq-stpc)/2
 | |
| 		}
 | |
| 		bracketed = true
 | |
| 
 | |
| 	case gNeg:
 | |
| 		// A lower function value and derivatives of opposite sign. The minimum
 | |
| 		// is bracketed between x and step. If we choose a step that is far
 | |
| 		// from step, the next iteration will also likely fall in this case.
 | |
| 
 | |
| 		theta := 3*(fx-f)/(step-x) + gx + g
 | |
| 		s := math.Max(math.Abs(gx), math.Abs(g))
 | |
| 		s = math.Max(s, math.Abs(theta))
 | |
| 		gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s))
 | |
| 		if step > x {
 | |
| 			gamma *= -1
 | |
| 		}
 | |
| 		p := gamma - g + theta
 | |
| 		q := gamma - g + gamma + gx
 | |
| 		r := p / q
 | |
| 		stpc := step + r*(x-step)
 | |
| 		stpq := step + g/(g-gx)*(x-step)
 | |
| 
 | |
| 		if math.Abs(stpc-step) > math.Abs(stpq-step) {
 | |
| 			// The cubic step is farther from x than the quadratic step.
 | |
| 			// Take the cubic step.
 | |
| 			next = stpc
 | |
| 		} else {
 | |
| 			// Take the quadratic step.
 | |
| 			next = stpq
 | |
| 		}
 | |
| 		bracketed = true
 | |
| 
 | |
| 	case math.Abs(g) < math.Abs(gx):
 | |
| 		// A lower function value, derivatives of the same sign, and the
 | |
| 		// magnitude of the derivative decreases. Extrapolate function values
 | |
| 		// at x and step so that the next step lies between step and y.
 | |
| 
 | |
| 		theta := 3*(fx-f)/(step-x) + gx + g
 | |
| 		s := math.Max(math.Abs(gx), math.Abs(g))
 | |
| 		s = math.Max(s, math.Abs(theta))
 | |
| 		gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s)))
 | |
| 		if step > x {
 | |
| 			gamma *= -1
 | |
| 		}
 | |
| 		p := gamma - g + theta
 | |
| 		q := gamma + gx - g + gamma
 | |
| 		r := p / q
 | |
| 		var stpc float64
 | |
| 		switch {
 | |
| 		case r < 0 && gamma != 0:
 | |
| 			stpc = step + r*(x-step)
 | |
| 		case step > x:
 | |
| 			stpc = mt.upper
 | |
| 		default:
 | |
| 			stpc = mt.lower
 | |
| 		}
 | |
| 		stpq := step + g/(g-gx)*(x-step)
 | |
| 
 | |
| 		if mt.bracketed {
 | |
| 			// We are extrapolating so be cautious and take the step that
 | |
| 			// is closer to step.
 | |
| 			if math.Abs(stpc-step) < math.Abs(stpq-step) {
 | |
| 				next = stpc
 | |
| 			} else {
 | |
| 				next = stpq
 | |
| 			}
 | |
| 			// Modify next if it is close to or beyond y.
 | |
| 			if step > x {
 | |
| 				next = math.Min(step+2.0/3*(y-step), next)
 | |
| 			} else {
 | |
| 				next = math.Max(step+2.0/3*(y-step), next)
 | |
| 			}
 | |
| 		} else {
 | |
| 			// Minimum has not been bracketed so take the larger step...
 | |
| 			if math.Abs(stpc-step) > math.Abs(stpq-step) {
 | |
| 				next = stpc
 | |
| 			} else {
 | |
| 				next = stpq
 | |
| 			}
 | |
| 			// ...but within reason.
 | |
| 			next = math.Max(mt.lower, math.Min(next, mt.upper))
 | |
| 		}
 | |
| 
 | |
| 	default:
 | |
| 		// A lower function value, derivatives of the same sign, and the
 | |
| 		// magnitude of the derivative does not decrease. The function seems to
 | |
| 		// decrease rapidly in the direction of the step.
 | |
| 
 | |
| 		switch {
 | |
| 		case mt.bracketed:
 | |
| 			theta := 3*(f-fy)/(y-step) + gy + g
 | |
| 			s := math.Max(math.Abs(gy), math.Abs(g))
 | |
| 			s = math.Max(s, math.Abs(theta))
 | |
| 			gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s))
 | |
| 			if step > y {
 | |
| 				gamma *= -1
 | |
| 			}
 | |
| 			p := gamma - g + theta
 | |
| 			q := gamma - g + gamma + gy
 | |
| 			r := p / q
 | |
| 			next = step + r*(y-step)
 | |
| 		case step > x:
 | |
| 			next = mt.upper
 | |
| 		default:
 | |
| 			next = mt.lower
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if f > fx {
 | |
| 		// x is still the best step.
 | |
| 		mt.y = step
 | |
| 		mt.fy = f
 | |
| 		mt.gy = g
 | |
| 	} else {
 | |
| 		// step is the new best step.
 | |
| 		if gNeg {
 | |
| 			mt.y = x
 | |
| 			mt.fy = fx
 | |
| 			mt.gy = gx
 | |
| 		}
 | |
| 		mt.x = step
 | |
| 		mt.fx = f
 | |
| 		mt.gx = g
 | |
| 	}
 | |
| 	mt.bracketed = bracketed
 | |
| 	mt.step = next
 | |
| }
 | 
