// Copyright ©2015 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package optimize import "math" // MoreThuente is a Linesearcher that finds steps that satisfy both the // sufficient decrease and curvature conditions (the strong Wolfe conditions). // // References: // - More, J.J. and D.J. Thuente: Line Search Algorithms with Guaranteed Sufficient // Decrease. ACM Transactions on Mathematical Software 20(3) (1994), 286-307 type MoreThuente struct { // DecreaseFactor is the constant factor in the sufficient decrease // (Armijo) condition. // It must be in the interval [0, 1). The default value is 0. DecreaseFactor float64 // CurvatureFactor is the constant factor in the Wolfe conditions. Smaller // values result in a more exact line search. // A set value must be in the interval (0, 1). If it is zero, it will be // defaulted to 0.9. CurvatureFactor float64 // StepTolerance sets the minimum acceptable width for the linesearch // interval. If the relative interval length is less than this value, // ErrLinesearcherFailure is returned. // It must be non-negative. If it is zero, it will be defaulted to 1e-10. StepTolerance float64 // MinimumStep is the minimum step that the linesearcher will take. // It must be non-negative and less than MaximumStep. Defaults to no // minimum (a value of 0). MinimumStep float64 // MaximumStep is the maximum step that the linesearcher will take. // It must be greater than MinimumStep. If it is zero, it will be defaulted // to 1e20. MaximumStep float64 bracketed bool // Indicates if a minimum has been bracketed. fInit float64 // Function value at step = 0. gInit float64 // Derivative value at step = 0. // When stage is 1, the algorithm updates the interval given by x and y // so that it contains a minimizer of the modified function // psi(step) = f(step) - f(0) - DecreaseFactor * step * f'(0). // When stage is 2, the interval is updated so that it contains a minimizer // of f. stage int step float64 // Current step. lower, upper float64 // Lower and upper bounds on the next step. x float64 // Endpoint of the interval with a lower function value. fx, gx float64 // Data at x. y float64 // The other endpoint. fy, gy float64 // Data at y. width [2]float64 // Width of the interval at two previous iterations. } const ( mtMinGrowthFactor float64 = 1.1 mtMaxGrowthFactor float64 = 4 ) func (mt *MoreThuente) Init(f, g float64, step float64) Operation { // Based on the original Fortran code that is available, for example, from // http://ftp.mcs.anl.gov/pub/MINPACK-2/csrch/ // as part of // MINPACK-2 Project. November 1993. // Argonne National Laboratory and University of Minnesota. // Brett M. Averick, Richard G. Carter, and Jorge J. Moré. if g >= 0 { panic("morethuente: initial derivative is non-negative") } if step <= 0 { panic("morethuente: invalid initial step") } if mt.CurvatureFactor == 0 { mt.CurvatureFactor = 0.9 } if mt.StepTolerance == 0 { mt.StepTolerance = 1e-10 } if mt.MaximumStep == 0 { mt.MaximumStep = 1e20 } if mt.MinimumStep < 0 { panic("morethuente: minimum step is negative") } if mt.MaximumStep <= mt.MinimumStep { panic("morethuente: maximum step is not greater than minimum step") } if mt.DecreaseFactor < 0 || mt.DecreaseFactor >= 1 { panic("morethuente: invalid decrease factor") } if mt.CurvatureFactor <= 0 || mt.CurvatureFactor >= 1 { panic("morethuente: invalid curvature factor") } if mt.StepTolerance <= 0 { panic("morethuente: step tolerance is not positive") } if step < mt.MinimumStep { step = mt.MinimumStep } if step > mt.MaximumStep { step = mt.MaximumStep } mt.bracketed = false mt.stage = 1 mt.fInit = f mt.gInit = g mt.x, mt.fx, mt.gx = 0, f, g mt.y, mt.fy, mt.gy = 0, f, g mt.lower = 0 mt.upper = step + mtMaxGrowthFactor*step mt.width[0] = mt.MaximumStep - mt.MinimumStep mt.width[1] = 2 * mt.width[0] mt.step = step return FuncEvaluation | GradEvaluation } func (mt *MoreThuente) Iterate(f, g float64) (Operation, float64, error) { if mt.stage == 0 { panic("morethuente: Init has not been called") } gTest := mt.DecreaseFactor * mt.gInit fTest := mt.fInit + mt.step*gTest if mt.bracketed { if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { // step contains the best step found (see below). return NoOperation, mt.step, ErrLinesearcherFailure } } if mt.step == mt.MaximumStep && f <= fTest && g <= gTest { return NoOperation, mt.step, ErrLinesearcherBound } if mt.step == mt.MinimumStep && (f > fTest || g >= gTest) { return NoOperation, mt.step, ErrLinesearcherFailure } // Test for convergence. if f <= fTest && math.Abs(g) <= mt.CurvatureFactor*(-mt.gInit) { mt.stage = 0 return MajorIteration, mt.step, nil } if mt.stage == 1 && f <= fTest && g >= 0 { mt.stage = 2 } if mt.stage == 1 && f <= mt.fx && f > fTest { // Lower function value but the decrease is not sufficient . // Compute values and derivatives of the modified function at step, x, y. fm := f - mt.step*gTest fxm := mt.fx - mt.x*gTest fym := mt.fy - mt.y*gTest gm := g - gTest gxm := mt.gx - gTest gym := mt.gy - gTest // Update x, y and step. mt.nextStep(fxm, gxm, fym, gym, fm, gm) // Recover values and derivates of the non-modified function at x and y. mt.fx = fxm + mt.x*gTest mt.fy = fym + mt.y*gTest mt.gx = gxm + gTest mt.gy = gym + gTest } else { // Update x, y and step. mt.nextStep(mt.fx, mt.gx, mt.fy, mt.gy, f, g) } if mt.bracketed { // Monitor the length of the bracketing interval. If the interval has // not been reduced sufficiently after two steps, use bisection to // force its length to zero. width := mt.y - mt.x if math.Abs(width) >= 2.0/3*mt.width[1] { mt.step = mt.x + 0.5*width } mt.width[0], mt.width[1] = math.Abs(width), mt.width[0] } if mt.bracketed { mt.lower = math.Min(mt.x, mt.y) mt.upper = math.Max(mt.x, mt.y) } else { mt.lower = mt.step + mtMinGrowthFactor*(mt.step-mt.x) mt.upper = mt.step + mtMaxGrowthFactor*(mt.step-mt.x) } // Force the step to be in [MinimumStep, MaximumStep]. mt.step = math.Max(mt.MinimumStep, math.Min(mt.step, mt.MaximumStep)) if mt.bracketed { if mt.step <= mt.lower || mt.step >= mt.upper || mt.upper-mt.lower <= mt.StepTolerance*mt.upper { // If further progress is not possible, set step to the best step // obtained during the search. mt.step = mt.x } } return FuncEvaluation | GradEvaluation, mt.step, nil } // nextStep computes the next safeguarded step and updates the interval that // contains a step that satisfies the sufficient decrease and curvature // conditions. func (mt *MoreThuente) nextStep(fx, gx, fy, gy, f, g float64) { x := mt.x y := mt.y step := mt.step gNeg := g < 0 if gx < 0 { gNeg = !gNeg } var next float64 var bracketed bool switch { case f > fx: // A higher function value. The minimum is bracketed between x and step. // We want the next step to be closer to x because the function value // there is lower. theta := 3*(fx-f)/(step-x) + gx + g s := math.Max(math.Abs(gx), math.Abs(g)) s = math.Max(s, math.Abs(theta)) gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) if step < x { gamma *= -1 } p := gamma - gx + theta q := gamma - gx + gamma + g r := p / q stpc := x + r*(step-x) stpq := x + gx/((fx-f)/(step-x)+gx)/2*(step-x) if math.Abs(stpc-x) < math.Abs(stpq-x) { // The cubic step is closer to x than the quadratic step. // Take the cubic step. next = stpc } else { // If f is much larger than fx, then the quadratic step may be too // close to x. Therefore heuristically take the average of the // cubic and quadratic steps. next = stpc + (stpq-stpc)/2 } bracketed = true case gNeg: // A lower function value and derivatives of opposite sign. The minimum // is bracketed between x and step. If we choose a step that is far // from step, the next iteration will also likely fall in this case. theta := 3*(fx-f)/(step-x) + gx + g s := math.Max(math.Abs(gx), math.Abs(g)) s = math.Max(s, math.Abs(theta)) gamma := s * math.Sqrt((theta/s)*(theta/s)-(gx/s)*(g/s)) if step > x { gamma *= -1 } p := gamma - g + theta q := gamma - g + gamma + gx r := p / q stpc := step + r*(x-step) stpq := step + g/(g-gx)*(x-step) if math.Abs(stpc-step) > math.Abs(stpq-step) { // The cubic step is farther from x than the quadratic step. // Take the cubic step. next = stpc } else { // Take the quadratic step. next = stpq } bracketed = true case math.Abs(g) < math.Abs(gx): // A lower function value, derivatives of the same sign, and the // magnitude of the derivative decreases. Extrapolate function values // at x and step so that the next step lies between step and y. theta := 3*(fx-f)/(step-x) + gx + g s := math.Max(math.Abs(gx), math.Abs(g)) s = math.Max(s, math.Abs(theta)) gamma := s * math.Sqrt(math.Max(0, (theta/s)*(theta/s)-(gx/s)*(g/s))) if step > x { gamma *= -1 } p := gamma - g + theta q := gamma + gx - g + gamma r := p / q var stpc float64 switch { case r < 0 && gamma != 0: stpc = step + r*(x-step) case step > x: stpc = mt.upper default: stpc = mt.lower } stpq := step + g/(g-gx)*(x-step) if mt.bracketed { // We are extrapolating so be cautious and take the step that // is closer to step. if math.Abs(stpc-step) < math.Abs(stpq-step) { next = stpc } else { next = stpq } // Modify next if it is close to or beyond y. if step > x { next = math.Min(step+2.0/3*(y-step), next) } else { next = math.Max(step+2.0/3*(y-step), next) } } else { // Minimum has not been bracketed so take the larger step... if math.Abs(stpc-step) > math.Abs(stpq-step) { next = stpc } else { next = stpq } // ...but within reason. next = math.Max(mt.lower, math.Min(next, mt.upper)) } default: // A lower function value, derivatives of the same sign, and the // magnitude of the derivative does not decrease. The function seems to // decrease rapidly in the direction of the step. switch { case mt.bracketed: theta := 3*(f-fy)/(y-step) + gy + g s := math.Max(math.Abs(gy), math.Abs(g)) s = math.Max(s, math.Abs(theta)) gamma := s * math.Sqrt((theta/s)*(theta/s)-(gy/s)*(g/s)) if step > y { gamma *= -1 } p := gamma - g + theta q := gamma - g + gamma + gy r := p / q next = step + r*(y-step) case step > x: next = mt.upper default: next = mt.lower } } if f > fx { // x is still the best step. mt.y = step mt.fy = f mt.gy = g } else { // step is the new best step. if gNeg { mt.y = x mt.fy = fx mt.gy = gx } mt.x = step mt.fx = f mt.gx = g } mt.bracketed = bracketed mt.step = next }