// Copyright ©2014 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package optimize import ( "math" "gonum.org/v1/gonum/floats" ) const ( initialStepFactor = 1 quadraticMinimumStepSize = 1e-3 quadraticMaximumStepSize = 1 quadraticThreshold = 1e-12 firstOrderMinimumStepSize = quadraticMinimumStepSize firstOrderMaximumStepSize = quadraticMaximumStepSize ) // ConstantStepSize is a StepSizer that returns the same step size for // every iteration. type ConstantStepSize struct { Size float64 } func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 { return c.Size } func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 { return c.Size } // QuadraticStepSize estimates the initial line search step size as the minimum // of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k. // This is useful for line search methods that do not produce well-scaled // descent directions, such as gradient descent or conjugate gradient methods. // The step size is bounded away from zero. type QuadraticStepSize struct { // Threshold determines that the initial step size should be estimated by // quadratic interpolation when the relative change in the objective // function is larger than Threshold. Otherwise the initial step size is // set to 2*previous step size. // If Threshold is zero, it will be set to 1e-12. Threshold float64 // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. // If InitialStepFactor is zero, it will be set to one. InitialStepFactor float64 // MinStepSize is the lower bound on the estimated step size. // MinStepSize times GradientAbsTol should always be greater than machine epsilon. // If MinStepSize is zero, it will be set to 1e-3. MinStepSize float64 // MaxStepSize is the upper bound on the estimated step size. // If MaxStepSize is zero, it will be set to 1. MaxStepSize float64 fPrev float64 dirPrevNorm float64 projGradPrev float64 xPrev []float64 } func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) { if q.Threshold == 0 { q.Threshold = quadraticThreshold } if q.InitialStepFactor == 0 { q.InitialStepFactor = initialStepFactor } if q.MinStepSize == 0 { q.MinStepSize = quadraticMinimumStepSize } if q.MaxStepSize == 0 { q.MaxStepSize = quadraticMaximumStepSize } if q.MaxStepSize <= q.MinStepSize { panic("optimize: MinStepSize not smaller than MaxStepSize") } gNorm := floats.Norm(loc.Gradient, math.Inf(1)) stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize)) q.fPrev = loc.F q.dirPrevNorm = floats.Norm(dir, 2) q.projGradPrev = floats.Dot(loc.Gradient, dir) q.xPrev = resize(q.xPrev, len(loc.X)) copy(q.xPrev, loc.X) return stepSize } func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm projGrad := floats.Dot(loc.Gradient, dir) stepSize = 2 * stepSizePrev if !floats.EqualWithinRel(q.fPrev, loc.F, q.Threshold) { // Two consecutive function values are not relatively equal, so // computing the minimum of a quadratic interpolant might make sense df := (loc.F - q.fPrev) / stepSizePrev quadTest := df - q.projGradPrev if quadTest > 0 { // There is a chance of approximating the function well by a // quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev // is larger than ∇f_{k-1}⋅p_{k-1} // Set the step size to the minimizer of the quadratic function that // interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2 } } // Bound the step size to lie in [MinStepSize, MaxStepSize] stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize)) q.fPrev = loc.F q.dirPrevNorm = floats.Norm(dir, 2) q.projGradPrev = projGrad copy(q.xPrev, loc.X) return stepSize } // FirstOrderStepSize estimates the initial line search step size based on the // assumption that the first-order change in the function will be the same as // that obtained at the previous iteration. That is, the initial step size s^0_k // is chosen so that // s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1} // This is useful for line search methods that do not produce well-scaled // descent directions, such as gradient descent or conjugate gradient methods. type FirstOrderStepSize struct { // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. // If InitialStepFactor is zero, it will be set to one. InitialStepFactor float64 // MinStepSize is the lower bound on the estimated step size. // MinStepSize times GradientAbsTol should always be greater than machine epsilon. // If MinStepSize is zero, it will be set to 1e-3. MinStepSize float64 // MaxStepSize is the upper bound on the estimated step size. // If MaxStepSize is zero, it will be set to 1. MaxStepSize float64 dirPrevNorm float64 projGradPrev float64 xPrev []float64 } func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) { if fo.InitialStepFactor == 0 { fo.InitialStepFactor = initialStepFactor } if fo.MinStepSize == 0 { fo.MinStepSize = firstOrderMinimumStepSize } if fo.MaxStepSize == 0 { fo.MaxStepSize = firstOrderMaximumStepSize } if fo.MaxStepSize <= fo.MinStepSize { panic("optimize: MinStepSize not smaller than MaxStepSize") } gNorm := floats.Norm(loc.Gradient, math.Inf(1)) stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize)) fo.dirPrevNorm = floats.Norm(dir, 2) fo.projGradPrev = floats.Dot(loc.Gradient, dir) fo.xPrev = resize(fo.xPrev, len(loc.X)) copy(fo.xPrev, loc.X) return stepSize } func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm projGrad := floats.Dot(loc.Gradient, dir) stepSize = stepSizePrev * fo.projGradPrev / projGrad stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize)) fo.dirPrevNorm = floats.Norm(dir, 2) fo.projGradPrev = floats.Dot(loc.Gradient, dir) copy(fo.xPrev, loc.X) return stepSize }