diff --git a/.travis.yml b/.travis.yml index e7bc78bc5..31e995163 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,8 @@ os: - linux - osx julia: - - 0.4 - 0.5 + - 0.6 - nightly notifications: email: false diff --git a/NEWS.md b/NEWS.md index 122f2c0bd..9ba8ffd8d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# Optim v1.0.0 +* Significant changes to the Non-, Once-, and TwiceDifferentiable setup; these now hold temporaries relevant to the evaluation of objectives, gradients, and Hessians. They also hold f-, g-, and h_calls counters +* Refactor tests +* Drop v0.4 support +* Add limits to f-, g-, and h_calls + # Optim v0.7.6 release notes * Fix deprecations for *Function constructors * Fix depwarns on Julia master (v0.6) diff --git a/REQUIRE b/REQUIRE index facbd9d36..65bac343e 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.4 +julia 0.5 Calculus ForwardDiff 0.3.0 0.4.0 PositiveFactorizations diff --git a/docs/src/algo/autodiff.md b/docs/src/algo/autodiff.md index ac3455946..8602d8a92 100644 --- a/docs/src/algo/autodiff.md +++ b/docs/src/algo/autodiff.md @@ -47,8 +47,7 @@ julia> Optim.minimizer(optimize(f, g!, h!, initial_x, Newton())) 1.0 1.0 ``` -This is indeed the case. Now let us use finite differences for BFGS (we cannot - get finite difference Hessians in Optim). +This is indeed the case. Now let us use finite differences for BFGS. ```jlcon julia> Optim.minimizer(optimize(f, initial_x, BFGS())) 2-element Array{Float64,1}: @@ -56,15 +55,19 @@ julia> Optim.minimizer(optimize(f, initial_x, BFGS())) 1.0 ``` Still looks good. Returning to automatic differentiation, let us try both solvers using this -method. We enable automatic differentiation by adding `autodiff = true` to our -`Optim.Options`. +method. To use automatic differentiation, we have to construct a `OnceDifferentiable` +instance and use the `autodiff` keyword. ```jlcon -julia> Optim.minimizer(optimize(f, initial_x, BFGS(), Optim.Options(autodiff = true))) +julia> od = OnceDifferentiable(f, initial_x; autodiff = :forwarddiff); + +julia> Optim.minimizer(optimize(od, initial_x, BFGS())) 2-element Array{Float64,1}: 1.0 1.0 -julia> Optim.minimizer(optimize(f, initial_x, Newton(), Optim.Options(autodiff = true))) +julia> td = TwiceDifferentiable(f, initial_x; autodiff = :forwarddiff) + +julia> Optim.minimizer(optimize(td, initial_x, Newton())) 2-element Array{Float64,1}: 1.0 1.0 diff --git a/docs/src/dev/contributing.md b/docs/src/dev/contributing.md index 240aa5278..55416abdd 100644 --- a/docs/src/dev/contributing.md +++ b/docs/src/dev/contributing.md @@ -32,7 +32,6 @@ Minim(; linesearch = LineSearches.hagerzhang!, minim_parameter = 1.0) = type MinimState{T,N,G} @add_generic_fields() x_previous::Array{T,N} - g::G f_x_previous::T s::Array{T,N} @add_linesearch_fields() diff --git a/docs/src/user/config.md b/docs/src/user/config.md index a9757df40..5a1641b78 100644 --- a/docs/src/user/config.md +++ b/docs/src/user/config.md @@ -40,12 +40,17 @@ In addition to the solver, you can alter the behavior of the Optim package by us * `x_tol`: What is the threshold for determining convergence in the input vector? Defaults to `1e-32`. * `f_tol`: What is the threshold for determining convergence in the objective value? Defaults to `1e-32`. * `g_tol`: What is the threshold for determining convergence in the gradient? Defaults to `1e-8`. For gradient free methods, this will control the main convergence tolerance, which is solver specific. +* `f_calls_limit`: A soft upper limit on the number of objective calls. Defaults to `0` (unlimited). +* `g_calls_limit`: A soft upper limit on the number of gradient calls. Defaults to `0` (unlimited). +* `h_calls_limit`: A soft upper limit on the number of Hessian calls. Defaults to `0` (unlimited). +* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `false`. * `iterations`: How many iterations will run before the algorithm gives up? Defaults to `1_000`. * `store_trace`: Should a trace of the optimization algorithm's state be stored? Defaults to `false`. * `show_trace`: Should a trace of the optimization algorithm's state be shown on `STDOUT`? Defaults to `false`. -* `extended_trace`: Save additional information. Solver dependent. -* `autodiff`: When only an objective function is provided, use automatic differentiation to compute exact numerical gradients. If not, finite differencing will be used. This functionality is experimental. Defaults to `false`. +* `extended_trace`: Save additional information. Solver dependent. Defaults to `false`. * `show_every`: Trace output is printed every `show_every`th iteration. +* `callback`: A function to be called during tracing. A return value of `true` stops the `optimize` call. +* `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited). We currently recommend the statically dispatched interface by using the `Optim.Options` constructor: diff --git a/docs/src/user/minimization.md b/docs/src/user/minimization.md index 45311a2a9..3042f7556 100644 --- a/docs/src/user/minimization.md +++ b/docs/src/user/minimization.md @@ -25,11 +25,6 @@ using central finite differencing: ```jl optimize(f, [0.0, 0.0], LBFGS()) ``` -Alternatively, the `autodiff` keyword will use atomatic differentiation to construct -the gradient. -```jl -optimize(f, [0.0, 0.0], LBFGS(), Optim.Options(autodiff = true)) -``` For better performance and greater precision, you can pass your own gradient function. For the Rosenbrock example, the analytical gradient can be shown to be: ```jl function g!(x, storage) diff --git a/src/Optim.jl b/src/Optim.jl index 50ff4967f..9bd74f4b0 100644 --- a/src/Optim.jl +++ b/src/Optim.jl @@ -44,6 +44,7 @@ module Optim # Types include("types.jl") + include("objective_types.jl") # API include("api.jl") @@ -80,9 +81,6 @@ module Optim # Constrained optimization include("fminbox.jl") - # trust region methods - include("levenberg_marquardt.jl") - # Heuristic Optimization Methods include("nelder_mead.jl") include("simulated_annealing.jl") diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl index 75692ebcc..6932c5c13 100644 --- a/src/accelerated_gradient_descent.jl +++ b/src/accelerated_gradient_descent.jl @@ -14,16 +14,13 @@ end AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!) = AcceleratedGradientDescent(linesearch) =# -function AcceleratedGradientDescent(; linesearch! = nothing, - linesearch = LineSearches.hagerzhang!) - linesearch = get_linesearch(linesearch!, linesearch) +function AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!) AcceleratedGradientDescent(linesearch) end -type AcceleratedGradientDescentState{T,N,G} +type AcceleratedGradientDescentState{T,N} @add_generic_fields() x_previous::Array{T,N} - g::G f_x_previous::T iteration::Int y::Array{T,N} @@ -33,18 +30,12 @@ type AcceleratedGradientDescentState{T,N,G} end function initial_state{T}(method::AcceleratedGradientDescent, options, d, initial_x::Array{T}) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) + value_grad!(d, initial_x) AcceleratedGradientDescentState("Accelerated Gradient Descent", length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls copy(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g T(NaN), # Store previous f in state.f_x_previous 0, # Iteration copy(initial_x), # Maintain intermediary current state in state.y @@ -57,7 +48,7 @@ function update_state!{T}(d, state::AcceleratedGradientDescentState{T}, method:: state.iteration += 1 # Search direction is always the negative gradient @simd for i in 1:state.n - @inbounds state.s[i] = -state.g[i] + @inbounds state.s[i] = -gradient(d, i) end # Determine the distance of movement along the search line diff --git a/src/api.jl b/src/api.jl index 414f1c0b7..fa1ca238a 100644 --- a/src/api.jl +++ b/src/api.jl @@ -34,9 +34,17 @@ g_norm_trace(r::OptimizationResults) = error("g_norm_trace is not implemented fo g_norm_trace(r::MultivariateOptimizationResults) = [ state.g_norm for state in trace(r) ] f_calls(r::OptimizationResults) = r.f_calls +f_calls(d) = d.f_calls[1] g_calls(r::OptimizationResults) = error("g_calls is not implemented for $(method(r)).") g_calls(r::MultivariateOptimizationResults) = r.g_calls +g_calls(d::NonDifferentiable) = 0 +g_calls(d) = d.g_calls[1] + +h_calls(r::OptimizationResults) = error("h_calls is not implemented for $(method(r)).") +h_calls(r::MultivariateOptimizationResults) = r.h_calls +h_calls(d::Union{NonDifferentiable, OnceDifferentiable}) = 0 +h_calls(d) = d.h_calls[1] converged(r::UnivariateOptimizationResults) = r.converged converged(r::MultivariateOptimizationResults) = r.x_converged || r.f_converged || r.g_converged diff --git a/src/bfgs.jl b/src/bfgs.jl index c42718f65..2da2c705e 100644 --- a/src/bfgs.jl +++ b/src/bfgs.jl @@ -10,18 +10,15 @@ end BFGS(; linesearch = LineSearches.hagerzhang!, initial_invH = x -> eye(eltype(x), length(x))) = BFGS(linesearch, initial_invH) =# -function BFGS(; linesearch! = nothing, - linesearch = LineSearches.hagerzhang!, - initial_invH = x -> eye(eltype(x), length(x)), - resetalpha = true) - linesearch = get_linesearch(linesearch!, linesearch) +function BFGS(; linesearch = LineSearches.hagerzhang!, + initial_invH = x -> eye(eltype(x), length(x)), + resetalpha = true) BFGS(linesearch, initial_invH, resetalpha) end type BFGSState{T,N,G} @add_generic_fields() x_previous::Array{T,N} - g::G g_previous::G f_x_previous::T dx::Array{T,N} @@ -34,20 +31,14 @@ end function initial_state{T}(method::BFGS, options, d, initial_x::Array{T}) n = length(initial_x) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) + value_grad!(d, initial_x) # Maintain a cache for line search results # Trace the history of states visited BFGSState("BFGS", n, copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g - copy(g), # Store previous gradient in state.g_previous + copy(gradient(d)), # Store previous gradient in state.g_previous T(NaN), # Store previous f in state.f_x_previous similar(initial_x), # Store changes in position in state.dx similar(initial_x), # Store changes in gradient in state.dg @@ -61,7 +52,7 @@ end function update_state!{T}(d, state::BFGSState{T}, method::BFGS) # Set the search direction # Search direction is the negative gradient divided by the approximate Hessian - A_mul_B!(state.s, state.invH, state.g) + A_mul_B!(state.s, state.invH, gradient(d)) scale!(state.s, -1) # Determine the distance of movement along the search line @@ -79,14 +70,14 @@ function update_state!{T}(d, state::BFGSState{T}, method::BFGS) end # Maintain a record of the previous gradient - copy!(state.g_previous, state.g) + copy!(state.g_previous, gradient(d)) lssuccess == false # break on linesearch error end -function update_h!(d, state, mehtod::BFGS) +function update_h!(d, state, method::BFGS) # Measure the change in the gradient @simd for i in 1:state.n - @inbounds state.dg[i] = state.g[i] - state.g_previous[i] + @inbounds state.dg[i] = gradient(d, i) - state.g_previous[i] end # Update the inverse Hessian approximation using Sherman-Morrison diff --git a/src/cg.jl b/src/cg.jl index c588e1415..6b4e5bd69 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -70,15 +70,11 @@ function ConjugateGradient(; linesearch) end =# -function ConjugateGradient(; linesearch! = nothing, - linesearch = LineSearches.hagerzhang!, +function ConjugateGradient(; linesearch = LineSearches.hagerzhang!, eta::Real = 0.4, P::Any = nothing, - precondprep! = nothing, precondprep = (P, x) -> nothing) - linesearch = get_linesearch(linesearch!, linesearch) - precondprep = get_precondprep(precondprep!, precondprep) ConjugateGradient(Float64(eta), P, precondprep, linesearch) @@ -87,7 +83,6 @@ end type ConjugateGradientState{T,N,G} @add_generic_fields() x_previous::Array{T,N} - g::G g_previous::G f_x_previous::T y::Array{T,N} @@ -99,17 +94,16 @@ end function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Array{T}) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) - pg = copy(g) - @assert typeof(f_x) == T + value_grad!(d, initial_x) + pg = copy(gradient(d)) + @assert typeof(value(d)) == T # Output messages - if !isfinite(f_x) + if !isfinite(value(d)) error("Must have finite starting value") end - if !all(isfinite, g) - @show g - @show find(!isfinite.(g)) + if !all(isfinite, gradient(d)) + @show gradient(d) + @show find(!isfinite.(gradient(d))) error("Gradient must have all finite values at starting point") end @@ -117,18 +111,13 @@ function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Arra # if we don't precondition, then this is an extra superfluous copy # TODO: consider allowing a reference for pg instead of a copy method.precondprep!(method.P, initial_x) - A_ldiv_B!(pg, method.P, g) + A_ldiv_B!(pg, method.P, gradient(d)) ConjugateGradientState("Conjugate Gradient", length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g - similar(g), # Store previous gradient in state.g_previous + similar(gradient(d)), # Store previous gradient in state.g_previous T(NaN), # Store previous f in state.f_x_previous similar(initial_x), # Intermediate value in CG calculation similar(initial_x), # Preconditioned intermediate value in CG calculation @@ -137,11 +126,11 @@ function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Arra @initial_linesearch()...) # Maintain a cache for line search results in state.lsr end -function update_state!{T}(df, state::ConjugateGradientState{T}, method::ConjugateGradient) +function update_state!{T}(d, state::ConjugateGradientState{T}, method::ConjugateGradient) # Search direction is predetermined # Determine the distance of movement along the search line - lssuccess = perform_linesearch!(state, method, df) + lssuccess = perform_linesearch!(state, method, d) # Maintain a record of previous position copy!(state.x_previous, state.x) @@ -150,15 +139,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat LinAlg.axpy!(state.alpha, state.s, state.x) # Maintain a record of the previous gradient - copy!(state.g_previous, state.g) + copy!(state.g_previous, gradient(d)) # Update the function value and gradient - state.f_x_previous, state.f_x = state.f_x, df.fg!(state.x, state.g) - state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1 + state.f_x_previous = value(d) + value_grad!(d, state.x) # Check sanity of function and gradient - if !isfinite(state.f_x) - error("Function must finite function values") + if !isfinite(value(d)) + error("Function value must be finite") end # Determine the next search direction using HZ's CG rule @@ -174,15 +163,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat dPd = dot(state.s, method.P, state.s) etak::T = method.eta * vecdot(state.s, state.g_previous) / dPd @simd for i in 1:state.n - @inbounds state.y[i] = state.g[i] - state.g_previous[i] + @inbounds state.y[i] = gradient(d, i) - state.g_previous[i] end ydots = vecdot(state.y, state.s) copy!(state.py, state.pg) # below, store pg - pg_previous in py - A_ldiv_B!(state.pg, method.P, state.g) + A_ldiv_B!(state.pg, method.P, gradient(d)) @simd for i in 1:state.n # py = pg - py @inbounds state.py[i] = state.pg[i] - state.py[i] end - betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(state.g, state.s) / ydots) / ydots + betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(gradient(d), state.s) / ydots) / ydots beta = max(betak, etak) @simd for i in 1:state.n @inbounds state.s[i] = beta * state.s[i] - state.pg[i] diff --git a/src/deprecate.jl b/src/deprecate.jl index 0d6bc2b0f..e69de29bb 100644 --- a/src/deprecate.jl +++ b/src/deprecate.jl @@ -1,79 +0,0 @@ -const has_deprecated_linesearch! = Ref(false) -const has_deprecated_neighbor! = Ref(false) -const has_deprecated_precondprep! = Ref(false) -const has_deprecated_levenberg_marquardt = Ref(false) - -@deprecate MultivariateOptimizationResults(method, - initial_x, minimizer, minimum, iterations, - iteration_converged, x_converged, x_tol, - f_converged, f_tol, g_converged, g_tol, - trace, f_calls, g_calls) MultivariateOptimizationResults(method, - initial_x, minimizer, minimum, iterations, - iteration_converged, x_converged, x_tol, - f_converged, f_tol, g_converged, g_tol, false, - trace, f_calls, g_calls, 0) - -@deprecate MultivariateOptimizationResults(method, - initial_x, minimizer, minimum, iterations, - iteration_converged, x_converged, x_tol, - f_converged, f_tol, g_converged, g_tol, - trace, f_calls, g_calls, h_calls) MultivariateOptimizationResults(method, - initial_x, minimizer, minimum, iterations, - iteration_converged, x_converged, x_tol, - f_converged, f_tol, g_converged, g_tol, false, - trace, f_calls, g_calls, h_calls) - -# LineSearches deprecation -for name in names(LineSearches) - if name in (:LineSearches, :hz_linesearch!, :mt_linesearch!, - :interpolating_linesearch!, :backtracking_linesearch!, - :interpbacktracking_linesearch!) - continue - end - eval(:(@deprecate $name LineSearches.$name)) -end - -@deprecate hz_linesearch! LineSearches.hagerzhang! -@deprecate mt_linesearch! LineSearches.morethuente! -@deprecate interpolating_linesearch! LineSearches.strongwolfe! -@deprecate backtracking_linesearch! LineSearches.backtracking! -@deprecate interpbacktracking_linesearch! LineSearches.interpbacktracking! - -@deprecate OptimizationOptions(args...; kwargs...) Optim.Options(args...; kwargs...) - -function get_linesearch(linesearch!, linesearch) - if linesearch! != nothing - if !has_deprecated_linesearch![] - warn("linesearch! keyword is deprecated, use linesearch instead (without !)") - has_deprecated_linesearch![] = true - end - return linesearch! - end - linesearch -end - -function get_precondprep(precondprep!, precondprep) - if precondprep! != nothing - if !has_deprecated_precondprep![] - warn("precondprep! keyword is deprecated, use precondprep instead (without !)") - has_deprecated_precondprep![] = true - end - return precondprep! - end - precondprep -end - -function get_neighbor(neighbor!, neighbor) - if neighbor! != nothing - if !has_deprecated_neighbor![] - warn("neighbor! keyword is deprecated, use neighbor instead (without !)") - has_deprecated_neighbor![] = true - end - return neighbor! - end - neighbor -end - -@deprecate NonDifferentiableFunction(args...) NonDifferentiable(args...) -@deprecate DifferentiableFunction(args...) OnceDifferentiable(args...) -@deprecate TwiceDifferentiableFunction(args...) TwiceDifferentiable(args...) diff --git a/src/fminbox.jl b/src/fminbox.jl index 7e2fb6dcc..797ea2c77 100644 --- a/src/fminbox.jl +++ b/src/fminbox.jl @@ -118,12 +118,10 @@ function optimize{T<:AbstractFloat}( extended_trace::Bool = false, callback = nothing, show_every::Integer = 1, - linesearch! = nothing, linesearch = LineSearches.hagerzhang!, eta::Real = convert(T,0.4), mu0::T = convert(T, NaN), mufactor::T = convert(T, 0.001), - precondprep! = nothing, precondprep = (P, x, l, u, mu) -> precondprepbox!(P, x, l, u, mu), optimizer = ConjugateGradient, optimizer_o = Options(store_trace = store_trace, @@ -131,11 +129,6 @@ function optimize{T<:AbstractFloat}( extended_trace = extended_trace), nargs...) - # remove in v0.8.0 - - linesearch = get_linesearch(linesearch!, linesearch) - precondprep = get_precondprep(precondprep!, precondprep) - optimizer == Newton && warning("Newton is not supported as the inner optimizer. Defaulting to ConjugateGradient.") x = copy(initial_x) fbarrier = (x, gbarrier) -> barrier_box(x, gbarrier, l, u) @@ -183,7 +176,7 @@ function optimize{T<:AbstractFloat}( # Optimize with current setting of mu funcc = (x, g) -> barrier_combined(x, g, gfunc, gbarrier, fb, mu) fval0 = funcc(x, nothing) - dfbox = OnceDifferentiable(x->funcc(x,nothing), (x,g)->(funcc(x,g); g), funcc) + dfbox = OnceDifferentiable(x->funcc(x,nothing), (x,g)->(funcc(x,g); g), funcc, initial_x) if show_trace > 0 println("#### Calling optimizer with mu = ", mu, " ####") end diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl index c672c760c..34087f5b1 100644 --- a/src/gradient_descent.jl +++ b/src/gradient_descent.jl @@ -9,38 +9,27 @@ GradientDescent(; linesearch = LineSearches.hagerzhang!, P = nothing, precondprep = (P, x) -> nothing) = GradientDescent(linesearch, P, precondprep) =# -function GradientDescent(; linesearch! = nothing, - linesearch = LineSearches.hagerzhang!, +function GradientDescent(; linesearch = LineSearches.hagerzhang!, P = nothing, - precondprep! = nothing, precondprep = (P, x) -> nothing) - linesearch = get_linesearch(linesearch!, linesearch) - precondprep = get_precondprep(precondprep!, precondprep) GradientDescent(linesearch, P, precondprep) end -type GradientDescentState{T,N,G} +type GradientDescentState{T,N} @add_generic_fields() x_previous::Array{T,N} - g::G f_x_previous::T s::Array{T,N} @add_linesearch_fields() end function initial_state{T}(method::GradientDescent, options, d, initial_x::Array{T}) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) + value_grad!(d, initial_x) GradientDescentState("Gradient Descent", length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g T(NaN), # Store previous f in state.f_x_previous similar(initial_x), # Maintain current search direction in state.s @initial_linesearch()...) # Maintain a cache for line search results in state.lsr @@ -49,7 +38,7 @@ end function update_state!{T}(d, state::GradientDescentState{T}, method::GradientDescent) # Search direction is always the negative preconditioned gradient method.precondprep!(method.P, state.x) - A_ldiv_B!(state.s, method.P, state.g) + A_ldiv_B!(state.s, method.P, gradient(d)) @simd for i in 1:state.n @inbounds state.s[i] = -state.s[i] end diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl index 9cdccc9ef..78a9b6034 100644 --- a/src/l_bfgs.jl +++ b/src/l_bfgs.jl @@ -75,24 +75,19 @@ LBFGS(; m::Integer = 10, linesearch = LineSearches.hagerzhang!, LBFGS(Int(m), linesearch, P, precondprep, extrapolate, snap2one) =# -function LBFGS(; linesearch! = nothing, - m::Integer = 10, +function LBFGS(; m::Integer = 10, linesearch = LineSearches.hagerzhang!, P=nothing, - precondprep! = nothing, precondprep = (P, x) -> nothing, extrapolate::Bool=false, snap2one = (0.75, Inf)) - linesearch = get_linesearch(linesearch!, linesearch) - precondprep = get_precondprep(precondprep!, precondprep) LBFGS(Int(m), linesearch, P, precondprep, extrapolate, snap2one) end type LBFGSState{T,N,M} @add_generic_fields() x_previous::Array{T,N} - g::Array{T,N} g_previous::Array{T,N} rho::Array{T,N} dx_history::Array{T,M} @@ -110,20 +105,12 @@ end function initial_state{T}(method::LBFGS, options, d, initial_x::Array{T}) n = length(initial_x) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) - # Maintain a cache for line search results - # Trace the history of states visited + value_grad!(d, initial_x) LBFGSState("L-BFGS", n, copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g - similar(g), # Store previous gradient in state.g_previous + similar(gradient(d)), # Store previous gradient in state.g_previous Array{T}(method.m), # state.rho Array{T}(n, method.m), # Store changes in position in state.dx_history Array{T}(n, method.m), # Store changes in gradient in state.dg_history @@ -147,7 +134,7 @@ function update_state!{T}(d, state::LBFGSState{T}, method::LBFGS) method.precondprep!(method.P, state.x) # Determine the L-BFGS search direction # FIXME just pass state and method? - twoloop!(state.s, state.g, state.rho, state.dx_history, state.dg_history, + twoloop!(state.s, gradient(d), state.rho, state.dx_history, state.dg_history, method.m, state.pseudo_iteration, state.twoloop_alpha, state.twoloop_q, method.P) @@ -165,8 +152,8 @@ function update_state!{T}(d, state::LBFGSState{T}, method::LBFGS) end # Save old f and g values to prepare for update_g! call - state.f_x_previous = state.f_x - copy!(state.g_previous, state.g) + state.f_x_previous = value(d) + copy!(state.g_previous, gradient(d)) lssuccess == false # break on linesearch error end @@ -174,7 +161,7 @@ end function update_h!(d, state, method::LBFGS) # Measure the change in the gradient @simd for i in 1:state.n - @inbounds state.dg[i] = state.g[i] - state.g_previous[i] + @inbounds state.dg[i] = gradient(d, i) - state.g_previous[i] end # Update the L-BFGS history of positions and gradients diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl deleted file mode 100644 index fa89e611e..000000000 --- a/src/levenberg_marquardt.jl +++ /dev/null @@ -1,174 +0,0 @@ -immutable LevenbergMarquardt <: Optimizer end - -""" - `levenberg_marquardt(f, g, initial_x; ` - -Returns the argmin over x of `sum(f(x).^2)` using the Levenberg-Marquardt algorithm, and an -estimate of the Jacobian of `f` at x. - -The function `f` should take an input vector of length n and return an output vector of length m. -The function `g` is the Jacobian of f, and should be an m x n matrix. -`initial_x` is an initial guess for the solution. - -Implements box constraints as described in Kanzow, Yamashita, Fukushima (2004; J Comp & Applied Math). - -# Keyword arguments -* `tolX::Real=1e-8`: search tolerance in x -* `tolG::Real=1e-12`: search tolerance in gradient -* `maxIter::Integer=100`: maximum number of iterations -* `lambda::Real=10.0`: (inverse of) initial trust region radius -* `show_trace::Bool=false`: print a status summary on each iteration if true -* `lower,upper=[]`: bound solution to these limits -""" -function levenberg_marquardt{F<:Function, G<:Function, T}(f::F, g::G, initial_x::AbstractVector{T}; - tolX::Real = 1e-8, tolG::Real = 1e-12, maxIter::Integer = 100, - lambda::Real = 10.0, show_trace::Bool = false, lower::Vector{T} = Array{T}(0), upper::Vector{T} = Array{T}(0)) - - if !has_deprecated_levenberg_marquardt[] - warn("levenberg_marquardt has been moved out of Optim.jl and into LsqFit.jl. Please adjust your code, and change your dependency to match this migration.") - has_deprecated_levenberg_marquardt[] = true - end - # check parameters - ((isempty(lower) || length(lower)==length(initial_x)) && (isempty(upper) || length(upper)==length(initial_x))) || - throw(ArgumentError("Bounds must either be empty or of the same length as the number of parameters.")) - ((isempty(lower) || all(initial_x .>= lower)) && (isempty(upper) || all(initial_x .<= upper))) || - throw(ArgumentError("Initial guess must be within bounds.")) - - # other constants - const MAX_LAMBDA = 1e16 # minimum trust region radius - const MIN_LAMBDA = 1e-16 # maximum trust region radius - const MIN_STEP_QUALITY = 1e-3 - const GOOD_STEP_QUALITY = 0.75 - const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step - - - converged = false - x_converged = false - g_converged = false - need_jacobian = true - iterCt = 0 - x = copy(initial_x) - delta_x = copy(initial_x) - f_calls = 0 - g_calls = 0 - - fcur = f(x) - f_calls += 1 - residual = sum(abs2,fcur) - - # Create buffers - m = length(fcur) - n = length(x) - JJ = Matrix{T}(n, n) - n_buffer = Vector{T}(n) - m_buffer = Vector{T}(m) - - # Maintain a trace of the system. - tr = OptimizationTrace{LevenbergMarquardt}() - if show_trace - d = Dict("lambda" => lambda) - os = OptimizationState{LevenbergMarquardt}(iterCt, sum(abs2,fcur), NaN, d) - push!(tr, os) - println(os) - end - - while (~converged && iterCt < maxIter) - if need_jacobian - J = g(x) - g_calls += 1 - need_jacobian = false - end - # we want to solve: - # argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2 - # Solving for the minimum gives: - # (J'*J + lambda*diagm(DtD)) * delta_x == -J^T * f(x), where DtD = sum(abs2,J,1) - # Where we have used the equivalence: diagm(J'*J) = diagm(sum(abs2,J,1)) - # It is additionally useful to bound the elements of DtD below to help - # prevent "parameter evaporation". - DtD = vec(sum(abs2,J, 1)) - for i in 1:length(DtD) - if DtD[i] <= MIN_DIAGONAL - DtD[i] = MIN_DIAGONAL - end - end - # delta_x = ( J'*J + lambda * diagm(DtD) ) \ ( -J'*fcur ) - At_mul_B!(JJ, J, J) - @simd for i in 1:n - @inbounds JJ[i, i] += lambda * DtD[i] - end - At_mul_B!(n_buffer, J, fcur) - scale!(n_buffer, -1) - delta_x = JJ \ n_buffer - - # apply box constraints - if !isempty(lower) - @simd for i in 1:n - @inbounds delta_x[i] = max(x[i] + delta_x[i], lower[i]) - x[i] - end - end - if !isempty(upper) - @simd for i in 1:n - @inbounds delta_x[i] = min(x[i] + delta_x[i], upper[i]) - x[i] - end - end - - # if the linear assumption is valid, our new residual should be: - # predicted_residual = sum(abs2,J*delta_x + fcur) - A_mul_B!(m_buffer, J, delta_x) - LinAlg.axpy!(1, fcur, m_buffer) - predicted_residual = sum(abs2,m_buffer) - # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller - # than the current residual - if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual)) - warn("""Problem solving for delta_x: predicted residual increase. - $predicted_residual (predicted_residual) > - $residual (residual) + $(eps(predicted_residual)) (eps)""") - end - # try the step and compute its quality - @simd for i in 1:n - @inbounds n_buffer[i] = x[i] + delta_x[i] - end - trial_f = f(n_buffer) - f_calls += 1 - trial_residual = sum(abs2,trial_f) - # step quality = residual change / predicted residual change - rho = (trial_residual - residual) / (predicted_residual - residual) - if rho > MIN_STEP_QUALITY - x += delta_x - fcur = trial_f - residual = trial_residual - if rho > GOOD_STEP_QUALITY - # increase trust region radius - lambda = max(0.1*lambda, MIN_LAMBDA) - end - need_jacobian = true - else - # decrease trust region radius - lambda = min(10*lambda, MAX_LAMBDA) - end - iterCt += 1 - - # show state - if show_trace - At_mul_B!(n_buffer, J, fcur) - g_norm = norm(n_buffer, Inf) - d = @compat Dict("g(x)" => g_norm, "dx" => delta_x, "lambda" => lambda) - os = OptimizationState{LevenbergMarquardt}(iterCt, sum(abs2,fcur), g_norm, d) - push!(tr, os) - println(os) - end - - # check convergence criteria: - # 1. Small gradient: norm(J^T * fcur, Inf) < tolG - # 2. Small step size: norm(delta_x) < tolX - At_mul_B!(n_buffer, J, fcur) - if norm(n_buffer, Inf) < tolG - g_converged = true - elseif norm(delta_x) < tolX*(tolX + norm(x)) - x_converged = true - end - converged = g_converged | x_converged - end - - MultivariateOptimizationResults("Levenberg-Marquardt", initial_x, x, sum(abs2,fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, false, tr, f_calls, g_calls, 0) -end diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl index bb866e651..74e126b1e 100644 --- a/src/momentum_gradient_descent.jl +++ b/src/momentum_gradient_descent.jl @@ -11,34 +11,25 @@ MomentumGradientDescent(; mu::Real = 0.01, linesearch = LineSearches.hagerzhang! MomentumGradientDescent(Float64(mu), linesearch) =# -function MomentumGradientDescent(; mu::Real = 0.01, linesearch! = nothing, - linesearch = LineSearches.hagerzhang!) - linesearch = get_linesearch(linesearch!, linesearch) +function MomentumGradientDescent(; mu::Real = 0.01, linesearch = LineSearches.hagerzhang!) MomentumGradientDescent(Float64(mu), linesearch) end -type MomentumGradientDescentState{T,N,G} +type MomentumGradientDescentState{T,N} @add_generic_fields() x_previous::Array{T,N} - g::G f_x_previous::T s::Array{T,N} @add_linesearch_fields() end function initial_state{T}(method::MomentumGradientDescent, options, d, initial_x::Array{T}) - g = similar(initial_x) - f_x = d.fg!(initial_x, g) + value_grad!(d, initial_x) MomentumGradientDescentState("Momentum Gradient Descent", length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - 1, # Track f calls in state.f_calls - 1, # Track g calls in state.g_calls - 0, # Track h calls in state.h_calls copy(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g T(NaN), # Store previous f in state.f_x_previous similar(initial_x), # Maintain current search direction in state.s @initial_linesearch()...) # Maintain a cache for line search results in state.lsr @@ -47,7 +38,7 @@ end function update_state!{T}(d, state::MomentumGradientDescentState{T}, method::MomentumGradientDescent) # Search direction is always the negative gradient @simd for i in 1:state.n - @inbounds state.s[i] = -state.g[i] + @inbounds state.s[i] = -gradient(d, i) end # Determine the distance of movement along the search line diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl index ca4e9fb66..d5705ad03 100644 --- a/src/nelder_mead.jl +++ b/src/nelder_mead.jl @@ -124,14 +124,13 @@ type NelderMeadState{T, N} step_type::String end -initial_state(method::NelderMead, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x) -function initial_state{F<:Function, T}(method::NelderMead, options, f::F, initial_x::Array{T}) +function initial_state{F, T}(method::NelderMead, options, f::F, initial_x::Array{T}) n = length(initial_x) m = n + 1 simplex = simplexer(method.initial_simplex, initial_x) f_simplex = zeros(T, m) @inbounds for i in 1:length(simplex) - f_simplex[i] = f(simplex[i]) + f_simplex[i] = value(f, simplex[i]) end # Get the indeces that correspond to the ordering of the f values # at the vertices. i_order[1] is the index in the simplex of the vertex @@ -144,10 +143,6 @@ function initial_state{F<:Function, T}(method::NelderMead, options, f::F, initia NelderMeadState("Nelder-Mead", n, # Dimensionality of the problem similar(initial_x), # Variable to hold final minimizer value for MultivariateOptimizationResults - f_simplex[i_order[1]], # Store Nelder Mead objective in state.f_x = state.f_lowest - m, - 0, - 0, m, # Number of vertices in the simplex simplex, # Maintain simplex in state.simplex centroid(simplex, i_order[m]), # Maintain centroid in state.centroid @@ -167,8 +162,7 @@ NelderMeadState("Nelder-Mead", "initial") end -update_state!(d, state::NelderMeadState, method::NelderMead) = update_state!(d.f, state, method) -function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::NelderMead) +function update_state!{F, T}(f::F, state::NelderMeadState{T}, method::NelderMead) # Augment the iteration counter shrink = false n, m = state.n, state.m @@ -186,15 +180,13 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method:: state.x_reflect[j] = state.x_centroid[j] + state.α * (state.x_centroid[j]-state.x_highest[j]) end - f_reflect = f(state.x_reflect) - state.f_calls += 1 + f_reflect = value(f, state.x_reflect) if f_reflect < state.f_lowest # Compute an expansion @inbounds for j in 1:n state.x_cache[j] = state.x_centroid[j] + state.β *(state.x_reflect[j] - state.x_centroid[j]) end - f_expand = f(state.x_cache) - state.f_calls += 1 + f_expand = value(f, state.x_cache) if f_expand < f_reflect copy!(state.simplex[state.i_order[m]], state.x_cache) @@ -222,7 +214,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method:: @simd for j in 1:n @inbounds state.x_cache[j] = state.x_centroid[j] + state.γ * (state.x_reflect[j]-state.x_centroid[j]) end - f_outside_contraction = f(state.x_cache) + f_outside_contraction = value(f, state.x_cache) if f_outside_contraction < f_reflect copy!(state.simplex[state.i_order[m]], state.x_cache) @inbounds state.f_simplex[state.i_order[m]] = f_outside_contraction @@ -237,7 +229,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method:: @simd for j in 1:n @inbounds state.x_cache[j] = state.x_centroid[j] - γ *(state.x_reflect[j] - state.x_centroid[j]) end - f_inside_contraction = f(state.x_cache) + f_inside_contraction = value(f, state.x_cache) if f_inside_contraction < f_highest copy!(state.simplex[state.i_order[m]], state.x_cache) @inbounds state.f_simplex[state.i_order[m]] = f_inside_contraction @@ -253,7 +245,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method:: for i = 2:m ord = state.i_order[i] copy!(state.simplex[ord], state.x_lowest + state.δ*(state.simplex[ord]-state.x_lowest)) - state.f_simplex[ord] = f(state.simplex[ord]) + state.f_simplex[ord] = value(f, state.simplex[ord]) end step_type = "shrink" sortperm!(state.i_order, state.f_simplex) @@ -263,18 +255,16 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method:: false end -after_while!(d, state, method::NelderMead, options) = after_while!(d.f, state, method::NelderMead, options) -function after_while!{F<:Function}(f::F, state, method::NelderMead, options) +function after_while!{F}(f::F, state, method::NelderMead, options) sortperm!(state.i_order, state.f_simplex) x_centroid_min = centroid(state.simplex, state.i_order[state.m]) - f_centroid_min = f(x_centroid_min) - state.f_calls += 1 + f_centroid_min = value(f, x_centroid_min) f_min, i_f_min = findmin(state.f_simplex) x_min = state.simplex[i_f_min] if f_centroid_min < f_min x_min = x_centroid_min f_min = f_centroid_min end - state.f_x = f_min + f.f_x = f_min state.x[:] = x_min end diff --git a/src/newton.jl b/src/newton.jl index f95a0077c..6bff2cdd6 100644 --- a/src/newton.jl +++ b/src/newton.jl @@ -1,23 +1,19 @@ -immutable Newton <: Optimizer - linesearch!::Function +immutable Newton{L} <: Optimizer + linesearch!::L resetalpha::Bool end #= uncomment v0.8.0 Newton(; linesearch::Function = LineSearches.hagerzhang!) = Newton(linesearch) =# -function Newton(; linesearch! = nothing, linesearch::Function = LineSearches.hagerzhang!, - resetalpha = true) - linesearch = get_linesearch(linesearch!, linesearch) +function Newton(; linesearch::Function = LineSearches.hagerzhang!, resetalpha = true) Newton(linesearch,resetalpha) end type NewtonState{T, N, F<:Base.LinAlg.Cholesky, Thd} @add_generic_fields() x_previous::Array{T, N} - g::Array{T, N} f_x_previous::T - H::Matrix{T} F::F Hd::Thd s::Array{T, N} @@ -27,26 +23,15 @@ end function initial_state{T}(method::Newton, options, d, initial_x::Array{T}) n = length(initial_x) # Maintain current gradient in gr - g = similar(initial_x) - s = similar(g) - x_ls, g_ls = similar(g), similar(g) - f_x_previous, f_x = NaN, d.fg!(initial_x, g) - f_calls, g_calls = 1, 1 - H = Array{T}(n, n) - d.h!(initial_x, H) - h_calls = 1 - + s = similar(initial_x) + x_ls, g_ls = similar(initial_x), similar(initial_x) + value_grad!(d, initial_x) + hessian!(d, initial_x) NewtonState("Newton's Method", length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - f_calls, # Track f calls in state.f_calls - g_calls, # Track g calls in state.g_calls - h_calls, similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g T(NaN), # Store previous f in state.f_x_previous - H, Base.LinAlg.Cholesky(Matrix{T}(0, 0), :U), Vector{Int8}(), similar(initial_x), # Maintain current search direction in state.s @@ -59,8 +44,8 @@ function update_state!{T}(d, state::NewtonState{T}, method::Newton) # represented by H. It deviates from the usual "add a scaled # identity matrix" version of the modified Newton method. More # information can be found in the discussion at issue #153. - state.F, state.Hd = ldltfact!(Positive, state.H) - state.s[:] = -(state.F\state.g) + state.F, state.Hd = ldltfact!(Positive, hessian(d)) + state.s[:] = -(state.F\gradient(d)) # Determine the distance of movement along the search line lssuccess = perform_linesearch!(state, method, d) diff --git a/src/newton_trust_region.jl b/src/newton_trust_region.jl index 6daef8dcd..9479f06d0 100644 --- a/src/newton_trust_region.jl +++ b/src/newton_trust_region.jl @@ -208,11 +208,9 @@ NewtonTrustRegion(; initial_delta::Real = 1.0, type NewtonTrustRegionState{T,N,G} @add_generic_fields() x_previous::Array{T,N} - g::G g_previous::G f_x_previous::T s::Array{T,N} - H::Matrix{T} hard_case::Bool reached_subproblem_solution::Bool interior::Bool @@ -238,33 +236,22 @@ function initial_state{T}(method::NewtonTrustRegion, options, d, initial_x::Arra reached_subproblem_solution = true interior = true lambda = NaN - g = similar(initial_x) - f_x_previous, f_x = NaN, d.fg!(initial_x, g) - f_calls, g_calls = 1, 1 - H = Array{T}(n, n) - d.h!(initial_x, H) - h_calls = 1 - + value_grad!(d, initial_x) + hessian!(d, initial_x) NewtonTrustRegionState("Newton's Method (Trust Region)", # Store string with model name in state.method length(initial_x), copy(initial_x), # Maintain current state in state.x - f_x, # Store current f in state.f_x - f_calls, # Track f calls in state.f_calls - g_calls, # Track g calls in state.g_calls - h_calls, similar(initial_x), # Maintain previous state in state.x_previous - g, # Store current gradient in state.g - similar(g), # Store previous gradient in state.g_previous + similar(gradient(d)), # Store previous gradient in state.g_previous T(NaN), # Store previous f in state.f_x_previous similar(initial_x), # Maintain current search direction in state.s - H, hard_case, reached_subproblem_solution, interior, T(delta), lambda, method.eta, # eta - zero(T)) # Maintain a cache for line search results in state.lsr + zero(T)) # rho end @@ -273,24 +260,25 @@ function update_state!{T}(d, state::NewtonTrustRegionState{T}, method::NewtonTru # Find the next step direction. m, state.interior, state.lambda, state.hard_case, state.reached_subproblem_solution = - solve_tr_subproblem!(state.g, state.H, state.delta, state.s) + solve_tr_subproblem!(gradient(d), hessian(d), state.delta, state.s) # Maintain a record of previous position copy!(state.x_previous, state.x) # Update current position - for i in 1:state.n + @simd for i in 1:state.n @inbounds state.x[i] = state.x[i] + state.s[i] end # Update the function value and gradient - copy!(state.g_previous, state.g) - state.f_x_previous, state.f_x = state.f_x, d.fg!(state.x, state.g) - state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1 + copy!(state.g_previous, gradient(d)) + state.f_x_previous = value(d) + value_grad!(d, state.x) + # Update the trust region size based on the discrepancy between # the predicted and actual function values. (Algorithm 4.1 in N&W) - f_x_diff = state.f_x_previous - state.f_x + f_x_diff = state.f_x_previous - value(d) if abs(m) <= eps(T) # This should only happen when the step is very small, in which case # we should accept the step and assess_convergence(). @@ -322,9 +310,9 @@ function update_state!{T}(d, state::NewtonTrustRegionState{T}, method::NewtonTru x_diff = state.x - state.x_previous delta = 0.25 * sqrt(vecdot(x_diff, x_diff)) - state.f_x = state.f_x_previous + d.f_x = state.f_x_previous copy!(state.x, state.x_previous) - copy!(state.g, state.g_previous) + copy!(gradient(d), state.g_previous) end false diff --git a/src/objective_types.jl b/src/objective_types.jl new file mode 100644 index 000000000..6abe630c9 --- /dev/null +++ b/src/objective_types.jl @@ -0,0 +1,275 @@ +@compat abstract type AbstractObjective end +type NonDifferentiable{T} <: AbstractObjective + f + f_x::T + last_x_f::Array{T} + f_calls::Vector{Int} +end +NonDifferentiable{T}(f, x_seed::Array{T}) = NonDifferentiable(f, f(x_seed), copy(x_seed), [1]) + +type OnceDifferentiable{T, Tgrad} <: AbstractObjective + f + g! + fg! + f_x::T + g::Tgrad + last_x_f::Array{T} + last_x_g::Array{T} + f_calls::Vector{Int} + g_calls::Vector{Int} +end +function OnceDifferentiable(f, g!, fg!, x_seed) + g = similar(x_seed) + g!(x_seed, g) + OnceDifferentiable(f, g!, fg!, f(x_seed), g, copy(x_seed), copy(x_seed), [1], [1]) +end +function OnceDifferentiable(f, g!, x_seed) + function fg!(x, storage) + g!(x, storage) + return f(x) + end + return OnceDifferentiable(f, g!, fg!, x_seed) +end +function OnceDifferentiable{T}(f, x_seed::Vector{T}; autodiff = :finitediff) + n_x = length(x_seed) + f_calls = [1] + g_calls = [1] + if autodiff == :finitediff + function g!(x, storage) + Calculus.finite_difference!(x->(f_calls[1]+=1;f(x)), x, storage, :central) + return + end + function fg!(x, storage) + g!(x, storage) + return f(x) + end + elseif autodiff == :forwarddiff + gcfg = ForwardDiff.GradientConfig(x_seed) + g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg) + + fg! = (x, out) -> begin + gr_res = DiffBase.DiffResult(zero(T), out) + ForwardDiff.gradient!(gr_res, f, x, gcfg) + DiffBase.value(gr_res) + end + end + g = similar(x_seed) + g!(x_seed, g) + return OnceDifferentiable(f, g!, fg!, f(x_seed), g, copy(x_seed), copy(x_seed), f_calls, g_calls) +end + +type TwiceDifferentiable{T<:Real} <: AbstractObjective + f + g! + fg! + h! + f_x::T + g::Vector{T} + H::Matrix{T} + last_x_f::Vector{T} + last_x_g::Vector{T} + last_x_h::Vector{T} + f_calls::Vector{Int} + g_calls::Vector{Int} + h_calls::Vector{Int} +end +function TwiceDifferentiable{T}(f, g!, fg!, h!, x_seed::Array{T}) + n_x = length(x_seed) + g = similar(x_seed) + H = Array{T}(n_x, n_x) + g!(x_seed, g) + h!(x_seed, H) + TwiceDifferentiable(f, g!, fg!, h!, f(x_seed), + g, H, copy(x_seed), + copy(x_seed), copy(x_seed), [1], [1], [1]) +end + +function TwiceDifferentiable{T}(f, + g!, + h!, + x_seed::Array{T}) + function fg!(x::Vector, storage::Vector) + g!(x, storage) + return f(x) + end + return TwiceDifferentiable(f, g!, fg!, h!, x_seed) +end +function TwiceDifferentiable{T}(f, x_seed::Vector{T}; autodiff = :finitediff) + n_x = length(x_seed) + f_calls = [1] + g_calls = [1] + h_calls = [1] + if autodiff == :finitediff + function g!(x::Vector, storage::Vector) + Calculus.finite_difference!(x->(f_calls[1]+=1;f(x)), x, storage, :central) + return + end + function fg!(x::Vector, storage::Vector) + g!(x, storage) + return f(x) + end + function h!(x::Vector, storage::Matrix) + Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage) + return + end + elseif autodiff == :forwarddiff + gcfg = ForwardDiff.GradientConfig(x_seed) + g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg) + + fg! = (x, out) -> begin + gr_res = DiffBase.DiffResult(zero(T), out) + ForwardDiff.gradient!(gr_res, f, x, gcfg) + DiffBase.value(gr_res) + end + + hcfg = ForwardDiff.HessianConfig(x_seed) + h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg) + end + g = similar(x_seed) + H = Array{T}(n_x, n_x) + g!(x_seed, g) + h!(x_seed, H) + return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed), + g, H, copy(x_seed), + copy(x_seed), copy(x_seed), f_calls, g_calls, h_calls) +end + + +function TwiceDifferentiable{T}(f, g!, x_seed::Array{T}; autodiff = :finitediff) + n_x = length(x_seed) + f_calls = [1] + function fg!(x, storage) + g!(x, storage) + return f(x) + end + if autodiff == :finitediff + function h!(x, storage) + Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage) + return + end + elseif autodiff == :forwarddiff + hcfg = ForwardDiff.HessianConfig(similar(x_seed)) + h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg) + end + g = similar(x_seed) + H = Array{T}(n_x, n_x) + g!(x_seed, g) + h!(x_seed, H) + return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed), + g, H, copy(x_seed), + copy(x_seed), copy(x_seed), f_calls, [1], [1]) +end +#= +function TwiceDifferentiable{T}(f, g!, fg!, x_seed::Array{T}; autodiff = :finitediff) + n_x = length(x_seed) + f_calls = [1] + if autodiff == :finitediff + function h!(x::Vector, storage::Matrix) + Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage) + return + end + elseif autodiff == :forwarddiff + hcfg = ForwardDiff.HessianConfig(similar(gradient(d))) + h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg) + end + g = similar(x_seed) + g!(x_seed, g) + return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed), + g, Array{T}(n_x, n_x), copy(x_seed), f_calls, [1], [0]) +end +=# +function TwiceDifferentiable(d::OnceDifferentiable; autodiff = :finitediff) + n_x = length(d.last_x_f) + T = eltype(d.last_x_f) + if autodiff == :finitediff + function h!(x::Vector, storage::Matrix) + Calculus.finite_difference_hessian!(x->(d.f_calls[1]+=1;d.f(x)), x, storage) + return + end + elseif autodiff == :forwarddiff + hcfg = ForwardDiff.HessianConfig(similar(gradient(d))) + h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg) + end + H = Array{T}(n_x, n_x) + h!(d.last_x_g, H) + return TwiceDifferentiable(d.f, d.g!, d.fg!, h!, d.f_x, + gradient(d), H, d.last_x_f, + d.last_x_g, copy(d.last_x_g), d.f_calls, d.g_calls, [1]) +end + +function _unchecked_value!(obj, x) + obj.f_calls .+= 1 + copy!(obj.last_x_f, x) + obj.f_x = obj.f(x) +end +function value(obj, x) + if x != obj.last_x_f + obj.f_calls += 1 + return obj.f(x) + end + obj.f_x +end +function value!(obj, x) + if x != obj.last_x_f + _unchecked_value!(obj, x) + end + obj.f_x +end + + +function _unchecked_grad!(obj, x) + obj.g_calls .+= 1 + copy!(obj.last_x_g, x) + obj.g!(x) +end +function gradient!(obj, x) + if x != obj.last_x_g + _unchecked_gradient!(obj, x) + end +end + +function value_grad!(obj, x) + if x != obj.last_x_f && x != obj.last_x_g + obj.f_calls .+= 1 + obj.g_calls .+= 1 + obj.last_x_f[:], obj.last_x_g[:] = copy(x), copy(x) + obj.f_x = obj.fg!(x, obj.g) + elseif x != obj.last_x_f + _unchecked_value!(obj, x) + elseif x != obj.last_x_g + _unchecked_grad!(obj, x) + end + obj.f_x +end + +function _unchecked_hessian!(obj, x) + obj.h_calls .+= 1 + copy!(obj.last_x_h, x) + obj.h!(x, obj.H) +end +function hessian!(obj, x) + if x != obj.last_x_h + _unchecked_hessian!(obj, x) + end +end + +# Getters are without ! and accept only an objective and index or just an objective +value(obj) = obj.f_x +gradient(obj) = obj.g +gradient(obj, i::Integer) = obj.g[i] +hessian(obj) = obj.H +#= +# This can be used when LineSearches switches to value_grad! and family +# Remember to change all fg!'s to "nothing" for finite differences +# and when f and g! are passed but no fg!. Can potentially avoid more calls +# than current setup. +function value_grad!(obj::Union{OnceDifferentiable{Void}, TwiceDifferentiable{Void}}, x) + if x != obj.last_x_f + _unchecked_value!(obj, x) + end + if x != obj.last_x_g + _unchecked_grad!(obj, x) + end + obj.f_x +end +=# diff --git a/src/optimize.jl b/src/optimize.jl index 8bc8e6d05..223b4ecd9 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -46,23 +46,24 @@ function optimize(d::TwiceDifferentiable, initial_x::Array; kwargs...) optimize(d, initial_x, method, Options(;kwargs...)) end -optimize(d::Function, initial_x, options::Options) = optimize(d, initial_x, NelderMead(), options) -optimize(d::OnceDifferentiable, initial_x, options::Options) = optimize(d, initial_x, BFGS(), options) -optimize(d::TwiceDifferentiable, initial_x, options::Options) = optimize(d, initial_x, Newton(), options) +optimize(f::Function, initial_x::Array, options::Options) = optimize(NonDifferentiable(f, initial_x), initial_x, NelderMead(), options) +optimize(f::Function, initial_x::Array, method::Optimizer, options::Options = Options()) = optimize(NonDifferentiable(f, initial_x), initial_x, method, options) +optimize(d::OnceDifferentiable, initial_x::Array, options::Options) = optimize(d, initial_x, BFGS(), options) +optimize(d::TwiceDifferentiable, initial_x::Array, options::Options) = optimize(d, initial_x, Newton(), options) function optimize{F<:Function, G<:Function}(f::F, g!::G, initial_x::Array, method::Optimizer, options::Options = Options()) - d = OnceDifferentiable(f, g!) + d = OnceDifferentiable(f, g!, initial_x) optimize(d, initial_x, method, options) end function optimize{F<:Function, G<:Function}(f::F, g!::G, initial_x::Array, options::Options) - d = OnceDifferentiable(f, g!) + d = OnceDifferentiable(f, g!, initial_x) optimize(d, initial_x, BFGS(), options) end @@ -72,7 +73,7 @@ function optimize{F<:Function, G<:Function, H<:Function}(f::F, initial_x::Array, method::Optimizer, options::Options = Options()) - d = TwiceDifferentiable(f, g!, h!) + d = TwiceDifferentiable(f, g!, h!, initial_x) optimize(d, initial_x, method, options) end function optimize{F<:Function, G<:Function, H<:Function}(f::F, @@ -80,7 +81,7 @@ function optimize{F<:Function, G<:Function, H<:Function}(f::F, h!::H, initial_x::Array, options) - d = TwiceDifferentiable(f, g!, h!) + d = TwiceDifferentiable(f, g!, h!, initial_x) optimize(d, initial_x, Newton(), options) end @@ -88,79 +89,34 @@ function optimize{F<:Function, T, M <: Union{FirstOrderSolver, SecondOrderSolver initial_x::Array{T}, method::M, options::Options) - if !options.autodiff - if M <: FirstOrderSolver - d = OnceDifferentiable(f) - else - error("No gradient or Hessian was provided. Either provide a gradient and Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.") - end + if M <: FirstOrderSolver + d = OnceDifferentiable(f, initial_x) else - gcfg = ForwardDiff.GradientConfig(initial_x) - g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg) - - fg! = (x, out) -> begin - gr_res = DiffBase.DiffResult(zero(T), out) - ForwardDiff.gradient!(gr_res, f, x, gcfg) - DiffBase.value(gr_res) - end - - if M <: FirstOrderSolver - d = OnceDifferentiable(f, g!, fg!) - else - hcfg = ForwardDiff.HessianConfig(initial_x) - h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg) - d = TwiceDifferentiable(f, g!, fg!, h!) - end + d = TwiceDifferentiable(f, initial_x) end - optimize(d, initial_x, method, options) end function optimize(d::OnceDifferentiable, initial_x::Array, - method::Newton, - options::Options) - if !options.autodiff - error("No Hessian was provided. Either provide a Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.") - else - hcfg = ForwardDiff.HessianConfig(initial_x) - h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg) - end - optimize(TwiceDifferentiable(d.f, d.g!, d.fg!, h!), initial_x, method, options) -end - -function optimize(d::OnceDifferentiable, - initial_x::Array, - method::NewtonTrustRegion, + method::SecondOrderSolver, options::Options) - if !options.autodiff - error("No Hessian was provided. Either provide a Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.") - else - hcfg = ForwardDiff.HessianConfig(initial_x) - h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg) - end - optimize(TwiceDifferentiable(d.f, d.g!, d.fg!, h!), initial_x, method, options) + optimize(TwiceDifferentiable(d), initial_x, method, options) end update_g!(d, state, method) = nothing - function update_g!{M<:Union{FirstOrderSolver, Newton}}(d, state, method::M) # Update the function value and gradient - state.f_x_previous, state.f_x = state.f_x, d.fg!(state.x, state.g) - state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1 + value_grad!(d, state.x) end -update_h!(d, state, method) = nothing - # Update the Hessian -function update_h!(d, state, method::SecondOrderSolver) - d.h!(state.x, state.H) - state.h_calls += 1 -end +update_h!(d, state, method) = nothing +update_h!(d, state, method::SecondOrderSolver) = hessian!(d, state.x) after_while!(d, state, method, options) = nothing -function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, +function optimize{D<:AbstractObjective, T, M<:Optimizer}(d::D, initial_x::Array{T}, method::M, options::Options = Options()) t0 = time() # Initial time stamp used to control early stopping by options.time_limit @@ -168,27 +124,26 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, error("Use optimize(f, scalar, scalar) for 1D problems") end - state = initial_state(method, options, d, initial_x) tr = OptimizationTrace{typeof(method)}() tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing stopped, stopped_by_callback, stopped_by_time_limit = false, false, false - + f_limit_reached, g_limit_reached, h_limit_reached = false, false, false x_converged, f_converged, f_increased = false, false, false g_converged = if typeof(method) <: NelderMead nmobjective(state.f_simplex, state.m, state.n) < options.g_tol elseif typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing g_converged = false else - vecnorm(state.g, Inf) < options.g_tol + vecnorm(gradient(d), Inf) < options.g_tol end converged = g_converged iteration = 0 options.show_trace && print_header(method) - trace!(tr, state, iteration, method, options) + trace!(tr, d, state, iteration, method, options) while !converged && !stopped && iteration < options.iterations iteration += 1 @@ -196,29 +151,24 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, update_state!(d, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS) update_g!(d, state, method) x_converged, f_converged, - g_converged, converged, f_increased = assess_convergence(state, options) - # We don't use the Hessian for anything if we have declared convergence, - # so we might as well not make the (expensive) update if converged == true - !converged && update_h!(d, state, method) + g_converged, converged, f_increased = assess_convergence(state, d, options) + + !converged && update_h!(d, state, method) # only relevant if not converged - # If tracing, update trace with trace!. If a callback is provided, it - # should have boolean return value that controls the variable stopped_by_callback. - # This allows for early stopping controlled by the callback. if tracing - stopped_by_callback = trace!(tr, state, iteration, method, options) + # update trace; callbacks can stop routine early by returning true + stopped_by_callback = trace!(tr, d, state, iteration, method, options) end - # Check time_limit; if none is provided it is NaN and the comparison - # will always return false. stopped_by_time_limit = time()-t0 > options.time_limit ? true : false + f_limit_reached = options.f_calls_limit > 0 && f_calls(d) >= options.f_calls_limit ? true : false + g_limit_reached = options.g_calls_limit > 0 && g_calls(d) >= options.g_calls_limit ? true : false + h_limit_reached = options.h_calls_limit > 0 && h_calls(d) >= options.h_calls_limit ? true : false - # Combine the two, so see if the stopped flag should be changed to true - # and stop the while loop - stopped = stopped_by_callback || stopped_by_time_limit ? true : false - - # Did the iteration provide a non-decreasing step? - f_increased && !options.allow_f_increases && break - + if (f_increased && !options.allow_f_increases) || stopped_by_callback || + stopped_by_time_limit || f_limit_reached || g_limit_reached || h_limit_reached + stopped = true + end end # while after_while!(d, state, method, options) @@ -226,7 +176,7 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, return MultivariateOptimizationResults(state.method_string, initial_x, f_increased ? state.x_previous : state.x, - f_increased ? state.f_x_previous : state.f_x, + f_increased ? state.f_x_previous : value(d), iteration, iteration == options.iterations, x_converged, @@ -237,9 +187,9 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M, options.g_tol, f_increased, tr, - state.f_calls, - state.g_calls, - state.h_calls) + f_calls(d), + g_calls(d), + h_calls(d)) end # Univariate Options diff --git a/src/particle_swarm.jl b/src/particle_swarm.jl index 48c0785bd..a9e5a4f17 100644 --- a/src/particle_swarm.jl +++ b/src/particle_swarm.jl @@ -26,9 +26,7 @@ type ParticleSwarmState{T,N} iterations::Int end -initial_state(method::ParticleSwarm, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x) - -function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x::Array{T}) +function initial_state{T}(method::ParticleSwarm, options, f, initial_x::Array{T}) #= Variable X represents the whole swarm of solutions with @@ -83,10 +81,8 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x best_score = zeros(T, n_particles) x_learn = zeros(T, n) - f_calls = 0 current_state = 0 - f_x = f(initial_x) - f_calls += 1 + value!(f, initial_x) # if search space is limited, spread the initial population # uniformly over the whole search space @@ -123,10 +119,6 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x ParticleSwarmState("Particle Swarm", n, x, - f_x, - f_calls, # f call - 0, # g calls - 0, # h calls 0, lower, upper, @@ -145,24 +137,22 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x options.iterations) end -update_state!(d, state::ParticleSwarmState, method::ParticleSwarm) = update_state!(d.f, state, method) -function update_state!{T}(f::Function, state::ParticleSwarmState{T}, method::ParticleSwarm) +function update_state!{T}(f, state::ParticleSwarmState{T}, method::ParticleSwarm) if state.limit_search_space limit_X!(state.X, state.lower, state.upper, state.n_particles, state.n) end compute_cost!(f, state.n_particles, state.X, state.score) - state.f_calls += state.n_particles if state.iteration == 0 copy!(state.best_score, state.score) - state.f_x = Base.minimum(state.score) + f.f_x = Base.minimum(state.score) end - state.f_x = housekeeping!(state.score, + f.f_x = housekeeping!(state.score, state.best_score, state.X, state.X_best, state.x, - state.f_x, + value(f), state.n_particles) # Elitist Learning: # find a new solution named 'x_learn' which is the current best @@ -195,9 +185,8 @@ function update_state!{T}(f::Function, state::ParticleSwarmState{T}, method::Par end end - score_learn = f(state.x_learn) - state.f_calls += 1 - if score_learn < state.f_x + score_learn = value(f, state.x_learn) + if score_learn < f.f_x state.f_x = score_learn * 1.0 for j in 1:state.n state.X_best[j, i_worst] = state.x_learn[j] @@ -440,13 +429,13 @@ function limit_X!(X, lower, upper, n_particles, n) nothing end -function compute_cost!(f::Function, +function compute_cost!(f, n_particles::Int, X::Matrix, score::Vector) for i in 1:n_particles - score[i] = f(X[:, i]) + score[i] = value(f, X[:, i]) end nothing end diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl index a217f041a..71aee7997 100644 --- a/src/simulated_annealing.jl +++ b/src/simulated_annealing.jl @@ -16,11 +16,10 @@ immutable SimulatedAnnealing{Tn, Ttemp} <: Optimizer keep_best::Bool # not used!? end -SimulatedAnnealing(;neighbor! = nothing, - neighbor = default_neighbor!, +SimulatedAnnealing(;neighbor = default_neighbor!, temperature = log_temperature, keep_best::Bool = true) = - SimulatedAnnealing(get_neighbor(neighbor!, neighbor), temperature, keep_best) + SimulatedAnnealing(neighbor, temperature, keep_best) type SimulatedAnnealingState{T, N} @add_generic_fields() @@ -31,24 +30,17 @@ type SimulatedAnnealingState{T, N} f_proposal::T end -initial_state(method::SimulatedAnnealing, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x) - -function initial_state{T}(method::SimulatedAnnealing, options, f::Function, initial_x::Array{T}) +function initial_state{T}(method::SimulatedAnnealing, options, f, initial_x::Array{T}) # Count number of parameters n = length(initial_x) - - # Store f(x) in f_x - f_x = f(initial_x) - f_calls = 1 + value!(f, initial_x) # Store the best state ever visited best_x = copy(initial_x) - best_f_x = f_x - SimulatedAnnealingState("Simulated Annealing", n, copy(best_x), f_x, f_calls, 0, 0, 1, best_x, similar(initial_x), best_f_x, f_x) + SimulatedAnnealingState("Simulated Annealing", n, copy(best_x), 1, best_x, similar(initial_x), f.f_x, f.f_x) end -update_state!(d, state::SimulatedAnnealingState, method::SimulatedAnnealing) = update_state!(d.f, state, method) -function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method::SimulatedAnnealing) +function update_state!{T}(nd, state::SimulatedAnnealingState{T}, method::SimulatedAnnealing) # Determine the temperature for current iteration t = method.temperature(state.iteration) @@ -57,8 +49,7 @@ function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method method.neighbor!(state.x_current, state.x_proposal) # Evaluate the cost function at the proposed state - state.f_proposal = f(state.x_proposal) - state.f_calls += 1 + state.f_proposal = value(nd, state.x_proposal) if state.f_proposal <= state.f_x_current # If proposal is superior, we always move to it @@ -66,8 +57,8 @@ function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method state.f_x_current = state.f_proposal # If the new state is the best state yet, keep a record of it - if state.f_proposal < state.f_x - state.f_x = state.f_proposal + if state.f_proposal < nd.f_x + nd.f_x = state.f_proposal copy!(state.x, state.x_proposal) end else diff --git a/src/types.jl b/src/types.jl index b19640e1a..b18cbd860 100644 --- a/src/types.jl +++ b/src/types.jl @@ -3,12 +3,14 @@ immutable Options{TCallback <: Union{Void, Function}} x_tol::Float64 f_tol::Float64 g_tol::Float64 + f_calls_limit::Int + g_calls_limit::Int + h_calls_limit::Int allow_f_increases::Bool iterations::Int store_trace::Bool show_trace::Bool extended_trace::Bool - autodiff::Bool show_every::Int callback::TCallback time_limit::Float64 @@ -18,12 +20,14 @@ function Options(; x_tol::Real = 1e-32, f_tol::Real = 1e-32, g_tol::Real = 1e-8, + f_calls_limit::Int = 0, + g_calls_limit::Int = 0, + h_calls_limit::Int = 0, allow_f_increases::Bool = false, iterations::Integer = 1_000, store_trace::Bool = false, show_trace::Bool = false, extended_trace::Bool = false, - autodiff::Bool = false, show_every::Integer = 1, callback = nothing, time_limit = NaN) @@ -32,9 +36,9 @@ function Options(; show_trace = true end Options{typeof(callback)}( - Float64(x_tol), Float64(f_tol), Float64(g_tol), allow_f_increases, Int(iterations), - store_trace, show_trace, extended_trace, autodiff, Int(show_every), - callback, time_limit) + Float64(x_tol), Float64(f_tol), Float64(g_tol), f_calls_limit, g_calls_limit, h_calls_limit, + allow_f_increases, Int(iterations), store_trace, show_trace, extended_trace, + Int(show_every), callback, time_limit) end function print_header(options::Options) @@ -93,23 +97,6 @@ type UnivariateOptimizationResults{T,M} <: OptimizationResults f_calls::Int end -immutable NonDifferentiable - f::Function -end - -immutable OnceDifferentiable - f::Function - g!::Function - fg!::Function -end - -immutable TwiceDifferentiable - f::Function - g!::Function - fg!::Function - h!::Function -end - function Base.show(io::IO, t::OptimizationState) @printf io "%6d %14e %14e\n" t.iteration t.value t.g_norm if !isempty(t.metadata) @@ -154,10 +141,13 @@ function Base.show(io::IO, r::MultivariateOptimizationResults) @printf io " * f(x) > f(x'): %s\n" f_increased(r) end @printf io " * Reached Maximum Number of Iterations: %s\n" iteration_limit_reached(r) - @printf io " * Objective Function Calls: %d" f_calls(r) + @printf io " * Objective Calls: %d" f_calls(r) if !(r.method in ("Nelder-Mead", "Simulated Annealing")) @printf io "\n * Gradient Calls: %d" g_calls(r) end + if r.method in ("Newton's Method", "Newton's Method (Trust Region)") + @printf io "\n * Hessian Calls: %d" h_calls(r) + end return end @@ -185,50 +175,3 @@ function Base.append!(a::MultivariateOptimizationResults, b::MultivariateOptimiz a.f_calls += f_calls(b) a.g_calls += g_calls(b) end - -# TODO: Expose ability to do forward and backward differencing -function OnceDifferentiable(f::Function) - function g!(x::Array, storage::Array) - Calculus.finite_difference!(f, x, storage, :central) - return - end - function fg!(x::Array, storage::Array) - g!(x, storage) - return f(x) - end - return OnceDifferentiable(f, g!, fg!) -end - -function OnceDifferentiable(f::Function, g!::Function) - function fg!(x::Array, storage::Array) - g!(x, storage) - return f(x) - end - return OnceDifferentiable(f, g!, fg!) -end - -function TwiceDifferentiable(f::Function) - function g!(x::Vector, storage::Vector) - Calculus.finite_difference!(f, x, storage, :central) - return - end - function fg!(x::Vector, storage::Vector) - g!(x, storage) - return f(x) - end - function h!(x::Vector, storage::Matrix) - Calculus.finite_difference_hessian!(f, x, storage) - return - end - return TwiceDifferentiable(f, g!, fg!, h!) -end - -function TwiceDifferentiable(f::Function, - g!::Function, - h!::Function) - function fg!(x::Vector, storage::Vector) - g!(x, storage) - return f(x) - end - return TwiceDifferentiable(f, g!, fg!, h!) -end diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl index 3bee1f3f1..cd09d3a35 100644 --- a/src/utilities/assess_convergence.jl +++ b/src/utilities/assess_convergence.jl @@ -33,7 +33,7 @@ function assess_convergence(x::Array, end -function assess_convergence(state, options) +function assess_convergence(state, d, options) x_converged, f_converged, f_increased, g_converged = false, false, false, false if maxdiff(state.x, state.x_previous) < options.x_tol @@ -43,15 +43,15 @@ function assess_convergence(state, options) # Absolute Tolerance # if abs(f_x - f_x_previous) < f_tol # Relative Tolerance - if abs(state.f_x - state.f_x_previous) < max(options.f_tol * (abs(state.f_x) + options.f_tol), eps(abs(state.f_x)+abs(state.f_x_previous))) + if abs(value(d) - state.f_x_previous) < max(options.f_tol * (abs(value(d)) + options.f_tol), eps(abs(value(d))+abs(state.f_x_previous))) f_converged = true end - if state.f_x > state.f_x_previous + if value(d) > state.f_x_previous f_increased = true end - if vecnorm(state.g, Inf) < options.g_tol + if vecnorm(gradient(d), Inf) < options.g_tol g_converged = true end @@ -60,19 +60,19 @@ function assess_convergence(state, options) return x_converged, f_converged, g_converged, converged, f_increased end -function assess_convergence(state::NelderMeadState, options) +function assess_convergence(state::NelderMeadState, d, options) g_converged = state.nm_x <= options.g_tol # Hijact g_converged for NM stopping criterior return false, false, g_converged, g_converged, false end -function assess_convergence(state::Union{ParticleSwarmState, SimulatedAnnealingState}, options) +function assess_convergence(state::Union{ParticleSwarmState, SimulatedAnnealingState}, d, options) false, false, false, false, false end -function assess_convergence(state::NewtonTrustRegionState, options) +function assess_convergence(state::NewtonTrustRegionState, d, options) x_converged, f_converged, g_converged, converged, f_increased = false, false, false, false, false if state.rho > state.eta # Accept the point and check convergence @@ -82,9 +82,9 @@ function assess_convergence(state::NewtonTrustRegionState, options) converged, f_increased = assess_convergence(state.x, state.x_previous, - state.f_x, + value(d), state.f_x_previous, - state.g, + gradient(d), options.x_tol, options.f_tol, options.g_tol) diff --git a/src/utilities/generic.jl b/src/utilities/generic.jl index c89b54083..d186db578 100644 --- a/src/utilities/generic.jl +++ b/src/utilities/generic.jl @@ -11,10 +11,6 @@ end method_string::String n::Int x::Array{T,N} - f_x::T - f_calls::Int - g_calls::Int - h_calls::Int end @def add_linesearch_fields begin @@ -28,7 +24,7 @@ end @def initial_linesearch begin (similar(initial_x), # Buffer of x for line search in state.x_ls similar(initial_x), # Buffer of g for line search in state.g_ls - LineSearches.alphainit(one(T), initial_x, g, f_x), # Keep track of step size in state.alpha + LineSearches.alphainit(one(T), initial_x, gradient(d), value(d)), # Keep track of step size in state.alpha false, # state.mayterminate LineSearches.LineSearchResults(T)) end diff --git a/src/utilities/perform_linesearch.jl b/src/utilities/perform_linesearch.jl index 2d4f25f08..1e4e8a314 100644 --- a/src/utilities/perform_linesearch.jl +++ b/src/utilities/perform_linesearch.jl @@ -1,7 +1,7 @@ -checked_dphi0!(state, method) = vecdot(state.g, state.s) -function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, method::M) +checked_dphi0!(state, d, method) = vecdot(gradient(d), state.s) +function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, d, method::M) # If invH is not positive definite, reset it - dphi0 = vecdot(state.g, state.s) + dphi0 = vecdot(gradient(d), state.s) if dphi0 > 0.0 # "reset" Hessian approximation if M <: BFGS @@ -12,32 +12,32 @@ function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, method::M) # Re-calculate direction @simd for i in 1:state.n - @inbounds state.s[i] = -state.g[i] + @inbounds state.s[i] = -gradient(d, i) end - dphi0 = vecdot(state.g, state.s) + dphi0 = vecdot(gradient(d), state.s) end return dphi0 end -function checked_dphi0!(state, method::ConjugateGradient) +function checked_dphi0!(state, d, method::ConjugateGradient) # Reset the search direction if it becomes corrupted - dphi0 = vecdot(state.g, state.s) + dphi0 = vecdot(gradient(d), state.s) if dphi0 >= 0 @simd for i in 1:state.n @inbounds state.s[i] = -state.pg[i] end - dphi0 = vecdot(state.g, state.s) + dphi0 = vecdot(gradient(d), state.s) end dphi0 end -alphaguess!(state, method, dphi0, df) = nothing -function alphaguess!(state, method::LBFGS, dphi0, df) +alphaguess!(state, method, dphi0, d) = nothing +function alphaguess!(state, method::LBFGS, dphi0, d) # compute an initial guess for the linesearch based on # Nocedal/Wright, 2nd ed, (3.60) # TODO: this is a temporary fix, but should eventually be split off into # a separate type and possibly live in LineSearches; see #294 if method.extrapolate && state.pseudo_iteration > 1 - alphaguess = 2.0 * (state.f_x - state.f_x_previous) / dphi0 + alphaguess = 2.0 * (value(d) - state.f_x_previous) / dphi0 alphaguess = max(alphaguess, state.alpha/4.0) # not too much reduction # if alphaguess ≈ 1, then make it 1 (Newton-type behaviour) if method.snap2one[1] < alphaguess < method.snap2one[2] @@ -47,24 +47,24 @@ function alphaguess!(state, method::LBFGS, dphi0, df) end end # Reset to 1 for BFGS and Newton if resetalpha -function alphaguess!(state, method::Union{BFGS, Newton}, dphi0, df) +function alphaguess!(state, method::Union{BFGS, Newton}, dphi0, d) if method.resetalpha == true state.alpha = one(state.alpha) end end -function alphaguess!(state, method::ConjugateGradient, dphi0, df) +function alphaguess!(state, method::ConjugateGradient, dphi0, d) # Pick the initial step size (HZ #I1-I2) state.alpha, state.mayterminate, f_update, g_update = - LineSearches.alphatry(state.alpha, df, state.x, state.s, state.x_ls, state.g_ls, state.lsr) - state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update + LineSearches.alphatry(state.alpha, d, state.x, state.s, state.x_ls, state.g_ls, state.lsr) + d.f_calls, d.g_calls = d.f_calls + f_update, d.g_calls + g_update end function perform_linesearch!{M}(state, method::M, d) # Calculate search direction dphi0 - dphi0 = checked_dphi0!(state, method) + dphi0 = checked_dphi0!(state, d, method) # Refresh the line search cache LineSearches.clear!(state.lsr) - push!(state.lsr, zero(state.f_x), state.f_x, dphi0) + push!(state.lsr, zero(value(d)), value(d), dphi0) # Guess an alpha alphaguess!(state, method, dphi0, d) @@ -74,11 +74,11 @@ function perform_linesearch!{M}(state, method::M, d) state.alpha, f_update, g_update = method.linesearch!(d, state.x, state.s, state.x_ls, state.g_ls, state.lsr, state.alpha, state.mayterminate) - state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update + d.f_calls, d.g_calls = d.f_calls + f_update, d.g_calls + g_update return true catch ex if isa(ex, LineSearches.LineSearchException) - state.f_calls, state.g_calls = state.f_calls + ex.f_update, state.g_calls + ex.g_update + d.f_calls, d.g_calls = d.f_calls + ex.f_update, d.g_calls + ex.g_update state.alpha = ex.alpha Base.warn("Linesearch failed, using alpha = $(state.alpha) and exiting optimization.") return false #lssuccess = false diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl index 556cb297c..93abf4a1a 100644 --- a/src/utilities/trace.jl +++ b/src/utilities/trace.jl @@ -1,4 +1,4 @@ -function trace!(tr, state, iteration, method::NelderMead, options) +function trace!(tr, d, state, iteration, method::NelderMead, options) dt = Dict() if options.extended_trace dt["centroid"] = state.x_centroid @@ -16,14 +16,14 @@ function trace!(tr, state, iteration, method::NelderMead, options) end -function trace!(tr, state, iteration, method::Union{ParticleSwarm, SimulatedAnnealing}, options) +function trace!(tr, d, state, iteration, method::Union{ParticleSwarm, SimulatedAnnealing}, options) dt = Dict() if options.extended_trace dt["x"] = copy(state.x) end update!(tr, state.iteration, - state.f_x, + d.f_x, NaN, dt, options.store_trace, @@ -32,18 +32,18 @@ function trace!(tr, state, iteration, method::Union{ParticleSwarm, SimulatedAnne options.callback) end -function trace!(tr, state, iteration, method::BFGS, options) +function trace!(tr, d, state, iteration, method::BFGS, options) dt = Dict() if options.extended_trace dt["x"] = copy(state.x) - dt["g(x)"] = copy(state.g) + dt["g(x)"] = copy(gradient(d)) dt["~inv(H)"] = copy(state.invH) dt["Current step size"] = state.alpha end - g_norm = vecnorm(state.g, Inf) + g_norm = vecnorm(gradient(d), Inf) update!(tr, iteration, - state.f_x, + value(d), g_norm, dt, options.store_trace, @@ -52,17 +52,17 @@ function trace!(tr, state, iteration, method::BFGS, options) options.callback) end -function trace!(tr, state, iteration, method::Union{LBFGS, AcceleratedGradientDescent, GradientDescent, MomentumGradientDescent, ConjugateGradient}, options) +function trace!(tr, d, state, iteration, method::Union{LBFGS, AcceleratedGradientDescent, GradientDescent, MomentumGradientDescent, ConjugateGradient}, options) dt = Dict() if options.extended_trace dt["x"] = copy(state.x) - dt["g(x)"] = copy(state.g) + dt["g(x)"] = copy(gradient(d)) dt["Current step size"] = state.alpha end - g_norm = vecnorm(state.g, Inf) + g_norm = vecnorm(gradient(d), Inf) update!(tr, iteration, - state.f_x, + value(d), g_norm, dt, options.store_trace, @@ -71,17 +71,17 @@ function trace!(tr, state, iteration, method::Union{LBFGS, AcceleratedGradientDe options.callback) end -function trace!(tr, state, iteration, method::Newton, options) +function trace!(tr, d, state, iteration, method::Newton, options) dt = Dict() if options.extended_trace dt["x"] = copy(state.x) - dt["g(x)"] = copy(state.g) - dt["h(x)"] = copy(state.H) + dt["g(x)"] = copy(gradient(d)) + dt["h(x)"] = copy(hessian(d)) end - g_norm = vecnorm(state.g, Inf) + g_norm = vecnorm(gradient(d), Inf) update!(tr, iteration, - state.f_x, + value(d), g_norm, dt, options.store_trace, @@ -90,22 +90,22 @@ function trace!(tr, state, iteration, method::Newton, options) options.callback) end -function trace!(tr, state, iteration, method::NewtonTrustRegion, options) +function trace!(tr, d, state, iteration, method::NewtonTrustRegion, options) dt = Dict() if options.extended_trace dt["x"] = copy(state.x) - dt["g(x)"] = copy(state.g) - dt["h(x)"] = copy(state.H) + dt["g(x)"] = copy(gradient(d)) + dt["h(x)"] = copy(hessian(H)) dt["delta"] = copy(state.delta) dt["interior"] = state.interior dt["hard case"] = state.hard_case dt["reached_subproblem_solution"] = state.reached_subproblem_solution dt["lambda"] = state.lambda end - g_norm = norm(state.g, Inf) + g_norm = norm(gradient(d), Inf) update!(tr, iteration, - state.f_x, + value(d), g_norm, dt, options.store_trace, diff --git a/test/accelerated_gradient_descent.jl b/test/accelerated_gradient_descent.jl index fc2519ac7..16762c396 100644 --- a/test/accelerated_gradient_descent.jl +++ b/test/accelerated_gradient_descent.jl @@ -10,15 +10,8 @@ results = Optim.optimize(f, g!, initial_x, AcceleratedGradientDescent(), options) @test norm(Optim.minimum(results)) < 1e-6 - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable - if !(name in ("Large Polynomial", "Parabola")) - results = Optim.optimize(prob.f, prob.g!, prob.initial_x, AcceleratedGradientDescent(), Optim.Options(allow_f_increases=true)) - if !(name in ("Rosenbrock", "Polynomial", "Powell")) - @test Optim.converged(results) - end - @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2 - end - end - end + run_optim_tests(AcceleratedGradientDescent(); skip = ("Large Polynomial","Parabola"), + convergence_exceptions = (("Rosenbrock", 1),("Rosenbrock", 2)), + iteration_exceptions = (("Powell", 1100), + ("Polynomial", 1500))) end diff --git a/test/api.jl b/test/api.jl index 43a725d90..bfa4fd126 100644 --- a/test/api.jl +++ b/test/api.jl @@ -6,9 +6,9 @@ h! = rosenbrock.h! initial_x = rosenbrock.initial_x - d1 = OnceDifferentiable(f) - d2 = OnceDifferentiable(f, g!) - d3 = TwiceDifferentiable(f, g!, h!) + d1 = OnceDifferentiable(f, initial_x) + d2 = OnceDifferentiable(f, g!, initial_x) + d3 = TwiceDifferentiable(f, g!, h!, initial_x) Optim.optimize(f, initial_x, BFGS()) Optim.optimize(f, g!, initial_x, BFGS()) diff --git a/test/bfgs.jl b/test/bfgs.jl index ed54523bb..c5abadd91 100644 --- a/test/bfgs.jl +++ b/test/bfgs.jl @@ -1,14 +1,3 @@ @testset "BFGS" begin - for use_autodiff in (false, true) - for (name, prob) in Optim.UnconstrainedProblems.examples - opt_allow_f_increases = name == "Polynomial" ? true : false - if prob.isdifferentiable - results = Optim.optimize(prob.f, prob.initial_x, BFGS(), Optim.Options(autodiff = use_autodiff, allow_f_increases = opt_allow_f_increases)) - if name != "Polynomial" && use_autodiff == false - @test Optim.converged(results) - end - @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2 - end - end - end + run_optim_tests(BFGS(); convergence_exceptions = (("Polynomial",1),)) end diff --git a/test/callbacks.jl b/test/callbacks.jl index 08ad8d3ec..a087fed30 100644 --- a/test/callbacks.jl +++ b/test/callbacks.jl @@ -5,8 +5,8 @@ g! = problem.g! h! = problem.h! initial_x = problem.initial_x - d2 = OnceDifferentiable(f, g!) - d3 = TwiceDifferentiable(f, g!, h!) + d2 = OnceDifferentiable(f, g!, initial_x) + d3 = TwiceDifferentiable(f, g!, h!, initial_x) for method in (NelderMead(), SimulatedAnnealing()) ot_run = false diff --git a/test/cg.jl b/test/cg.jl index 82739a0e0..9499de960 100644 --- a/test/cg.jl +++ b/test/cg.jl @@ -1,11 +1,7 @@ -@testset "Conjugate Gradient" begin + @testset "Conjugate Gradient" begin # Test Optim.cg for all differentiable functions in Optim.UnconstrainedProblems.examples - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable - res = Optim.optimize(prob.f, prob.g!, prob.initial_x, ConjugateGradient()) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - end - end + run_optim_tests(ConjugateGradient(), + convergence_exceptions = (("Powell", 1), ("Powell", 2), ("Polynomial", 1))) @testset "matrix input" begin objective(X, B) = sum((X.-B).^2)/2 @@ -18,8 +14,7 @@ srand(1) B = rand(2,2) - df = Optim.OnceDifferentiable(X -> objective(X, B), (X, G) -> objective_gradient!(X, G, B)) - results = Optim.optimize(df, rand(2,2), ConjugateGradient()) + results = Optim.optimize(X -> objective(X, B), (X, G) -> objective_gradient!(X, G, B), rand(2,2), ConjugateGradient()) @test Optim.converged(results) @test Optim.minimum(results) < 1e-8 end diff --git a/test/constrained.jl b/test/constrained.jl index 7e6a65e2d..ba5ec9763 100644 --- a/test/constrained.jl +++ b/test/constrained.jl @@ -26,7 +26,7 @@ initial_x = randn(N) tmp = similar(initial_x) func = (x, g) -> quadratic!(x, g, AtA, A'*b, tmp) - objective = Optim.OnceDifferentiable(x->func(x, nothing), (x,g)->func(x,g), func) + objective = Optim.OnceDifferentiable(x->func(x, nothing), (x,g)->func(x,g), func, initial_x) results = Optim.optimize(objective, initial_x, ConjugateGradient()) results = Optim.optimize(objective, Optim.minimizer(results), ConjugateGradient()) # restart to ensure high-precision convergence @test Optim.converged(results) diff --git a/test/gradient_descent.jl b/test/gradient_descent.jl index 3660e9223..f6097edb8 100644 --- a/test/gradient_descent.jl +++ b/test/gradient_descent.jl @@ -1,26 +1,9 @@ @testset "Gradient Descent" begin - for use_autodiff in (false, true) - for (name, prob) in Optim.UnconstrainedProblems.examples - opt_allow_f_increases = name == "Hosaki" ? true : false - if prob.isdifferentiable - iterations = if name == "Rosenbrock" - 10000 # Zig-zagging - elseif name == "Powell" - 80000 # Zig-zagging as the problem is (intentionally) ill-conditioned - else - 1000 - end - results = Optim.optimize(prob.f, prob.initial_x, GradientDescent(), - Optim.Options(autodiff = use_autodiff, - iterations = iterations, - allow_f_increases = opt_allow_f_increases)) - if !(name in ("Rosenbrock", "Polynomial", "Powell")) - @test Optim.converged(results) - end - @test norm(Optim.minimizer(results) - prob.solutions, Inf) < 1e-2 - end - end - end + run_optim_tests(GradientDescent(), + f_increase_exceptions = ("Hosaki",), + convergence_exceptions = (("Polynomial", 1), ("Polynomial", 2), ("Rosenbrock", 1), ("Rosenbrock", 2)), + iteration_exceptions = (("Rosenbrock", 10000),), + skip = ("Powell",)) function f_gd_1(x) (x[1] - 5.0)^2 @@ -32,7 +15,7 @@ initial_x = [0.0] - d = OnceDifferentiable(f_gd_1, g_gd_1) + d = OnceDifferentiable(f_gd_1, g_gd_1, initial_x) results = Optim.optimize(d, initial_x, GradientDescent()) @test_throws ErrorException Optim.x_trace(results) @@ -50,7 +33,7 @@ storage[2] = eta * x[2] end - d = OnceDifferentiable(f_gd_2, g_gd_2) + d = OnceDifferentiable(f_gd_2, g_gd_2, [1.0, 1.0]) results = Optim.optimize(d, [1.0, 1.0], GradientDescent()) @test_throws ErrorException Optim.x_trace(results) diff --git a/test/l_bfgs.jl b/test/l_bfgs.jl index 1540c6b4a..020edd433 100644 --- a/test/l_bfgs.jl +++ b/test/l_bfgs.jl @@ -1,13 +1,3 @@ @testset "L-BFGS" begin - for use_autodiff in (false, true) - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable - results = Optim.optimize(prob.f, prob.initial_x, LBFGS(), Optim.Options(autodiff = use_autodiff)) - if !(name in ("Rosenbrock", "Polynomial")) - @test Optim.converged(results) - end - @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2 - end - end - end + run_optim_tests(LBFGS(), convergence_exceptions = (("Rosenbrock", 1), ("Polynomial", 1))) end diff --git a/test/levenberg_marquardt.jl b/test/levenberg_marquardt.jl deleted file mode 100644 index 80443f537..000000000 --- a/test/levenberg_marquardt.jl +++ /dev/null @@ -1,97 +0,0 @@ -@testset "Levenberg Marquardt" begin - function f_lm(x) - [x[1], 2.0 - x[2]] - end - function g_lm(x) - [1.0 0.0; 0.0 -1.0] - end - - initial_x = [100.0, 100.0] - - results = Optim.levenberg_marquardt(f_lm, g_lm, initial_x) - @test norm(Optim.minimizer(results) - [0.0, 2.0]) < 0.01 - - - function rosenbrock_res(x, r) - r[1] = 10.0 * (x[2] - x[1]^2 ) - r[2] = 1.0 - x[1] - return r - end - - function rosenbrock_jac(x, j) - j[1, 1] = -20.0 * x[1] - j[1, 2] = 10.0 - j[2, 1] = -1.0 - j[2, 2] = 0.0 - return j - end - - r = zeros(2) - j = zeros(2,2) - - frb(x) = rosenbrock_res(x, r) - grb(x) = rosenbrock_jac(x, j) - - initial_xrb = [-1.2, 1.0] - - results = Optim.levenberg_marquardt(frb, grb, initial_xrb) - - @test norm(Optim.minimizer(results) - [1.0, 1.0]) < 0.01 - - # check estimate is within the bound PR #278 - result = Optim.levenberg_marquardt(frb, grb, [150.0, 150.0]; lower = [10.0, 10.0], upper = [200.0, 200.0]) - @test Optim.minimizer(result)[1] >= 10.0 - @test Optim.minimizer(result)[2] >= 10.0 - - - - - # tests for #178, taken from LsqFit.jl, but stripped - let - srand(12345) - - model(x, p) = p[1]*map(exp,-x.*p[2]) - - xdata = linspace(0,10,20) - ydata = model(xdata, [1.0 2.0]) + 0.01*randn(length(xdata)) - - f_lsq = p -> p[1]*map(exp, -xdata.*p[2])-ydata - g_lsq = Calculus.jacobian(f_lsq) - results = Optim.levenberg_marquardt(f_lsq, g_lsq, [0.5, 0.5]) - - @test norm(Optim.minimizer(results) - [1.0, 2.0]) < 0.05 - end - - let - srand(12345) - - model(x, p) = p[1]*map(exp, -x./p[2])+p[3] - - xdata = 1:100 - ydata = model(xdata, [10.0, 10.0, 10.0]) + 0.1*randn(length(xdata)) - - f_lsq = p -> model(xdata,p)-ydata - g_lsq = Calculus.jacobian(f_lsq) - - # tests for box constraints, PR #196 - @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], lower=[5.0, 11.0]) - @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 5.0, 5.0], upper=[15.0, 9.0]) - @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 10.0, 15.0], lower=[5.0, 11.0, 5.0]) - @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 10.0, 5.0], upper=[15.0, 9.0, 15.0]) - - lower=[5.0, 11.0, 5.0] - results = Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], lower=lower) - Optim.minimizer(results) - @test Optim.converged(results) - @test all(Optim.minimizer(results) .>= lower) - - upper=[15.0, 9.0, 15.0] - results = Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 5.0, 5.0], upper=upper) - Optim.minimizer(results) - @test Optim.converged(results) - @test all(Optim.minimizer(results) .<= upper) - - # tests for PR #267 - Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], show_trace=true) - end -end diff --git a/test/lsthrow.jl b/test/lsthrow.jl index e02895037..2c9e710d7 100644 --- a/test/lsthrow.jl +++ b/test/lsthrow.jl @@ -8,8 +8,7 @@ for optimizer in (ConjugateGradient, GradientDescent, LBFGS, BFGS, Newton, AcceleratedGradientDescent, MomentumGradientDescent) println("Testing $(string(optimizer))") prob = Optim.UnconstrainedProblems.examples["Exponential"] - optimize(prob.f, prob.initial_x, optimizer(linesearch = ls), - Optim.Options(autodiff = true)) + optimize(prob.f, prob.initial_x, optimizer(linesearch = ls)) # TODO: How can I verify that the call to optimize gives a warning? end end diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl index d7e15b9cb..e429b278c 100644 --- a/test/momentum_gradient_descent.jl +++ b/test/momentum_gradient_descent.jl @@ -1,13 +1,7 @@ @testset "Momentum Gradient Descent" begin - for use_autodiff in (false, true) - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.isdifferentiable && !(name in ("Polynomial", "Large Polynomial", "Himmelblau")) # it goes in a direction of ascent -> f_converged == true - iterations = name == "Powell" ? 2000 : 1000 - res = Optim.optimize(prob.f, prob.initial_x, MomentumGradientDescent(), - Optim.Options(autodiff = use_autodiff, - iterations = iterations)) - @test norm(Optim.minimizer(res) - prob.solutions, Inf) < 1e-2 - end - end - end + run_optim_tests(MomentumGradientDescent(), + skip = ("Powell", "Rosenbrock"), + convergence_exceptions = (("Large Polynomial",1), + ("Himmelblau",1), ("Powell", 1)), + minimizer_exceptions = (("Powell", 2),)) end diff --git a/test/nelder_mead.jl b/test/nelder_mead.jl index 3ed74b1d2..f3e7aed03 100644 --- a/test/nelder_mead.jl +++ b/test/nelder_mead.jl @@ -1,15 +1,9 @@ @testset "Nelder Mead" begin # Test Optim.nelder_mead for all functions except Large Polynomials in Optim.UnconstrainedProblems.examples - for (name, prob) in Optim.UnconstrainedProblems.examples - res = Optim.optimize(prob.f, prob.initial_x, NelderMead(), Optim.Options(iterations = 10000)) - if name == "Powell" - res = Optim.optimize(prob.f, prob.initial_x, NelderMead(), Optim.Options(g_tol = 1e-12)) - elseif name == "Large Polynomial" - # TODO do this only when a "run all" flag checked - # res = Optim.optimize(prob.f, prob.initial_x, method=NelderMead(initial_simplex = Optim.AffineSimplexer(1.,1.)), iterations = 450_000) - end - !(name == "Large Polynomial") && @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - end + + run_optim_tests(NelderMead(), + convergence_exceptions = (("Powell", 1),), + skip = (("Large Polynomial"),)) # Test if the trace is correctly stored. prob = Optim.UnconstrainedProblems.examples["Rosenbrock"] diff --git a/test/newton.jl b/test/newton.jl index 32fb25f69..7dd3051fe 100644 --- a/test/newton.jl +++ b/test/newton.jl @@ -10,12 +10,12 @@ function h!_1(x::Vector, storage::Matrix) storage[1, 1] = 12.0 * (x[1] - 5.0)^2 end + initial_x = [0.0] - # Need to specify autodiff! - @test_throws ErrorException Optim.optimize(OnceDifferentiable(f_1, g!_1), [0.0], Newton()) - Optim.optimize(OnceDifferentiable(f_1, g!_1), [0.0], Newton(), Optim.Options(autodiff = true)) + Optim.optimize(OnceDifferentiable(f_1, g!_1, initial_x), [0.0], Newton()) results = Optim.optimize(f_1, g!_1, h!_1, [0.0], Newton()) + @test_throws ErrorException Optim.x_trace(results) @test Optim.g_converged(results) @test norm(Optim.minimizer(results) - [5.0]) < 0.01 @@ -43,30 +43,13 @@ @test Optim.g_converged(results) @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01 - # Test Optim.newton for all twice differentiable functions in Optim.UnconstrainedProblems.examples - @testset "Optim problems" begin - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.istwicedifferentiable - res = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, Newton()) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - end - end - end - @testset "newton in concave region" begin prob=Optim.UnconstrainedProblems.examples["Himmelblau"] res = optimize(prob.f, prob.g!, prob.h!, [0., 0.], Newton()) @test norm(Optim.minimizer(res) - prob.solutions) < 1e-9 end - @testset "Optim problems (ForwardDiff)" begin - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.istwicedifferentiable - res = Optim.optimize(prob.f, prob.initial_x, Newton(), Optim.Options(autodiff = true)) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - res = Optim.optimize(prob.f, prob.g!, prob.initial_x, Newton(), Optim.Options(autodiff = true)) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - end - end + @testset "Optim problems" begin + run_optim_tests(Newton()) end end diff --git a/test/newton_trust_region.jl b/test/newton_trust_region.jl index a01c2b598..1c389f9fb 100644 --- a/test/newton_trust_region.jl +++ b/test/newton_trust_region.jl @@ -148,7 +148,7 @@ end storage[1, 1] = 12.0 * (x[1] - 5.0)^2 end - d = TwiceDifferentiable(f, g!, h!) + d = TwiceDifferentiable(f, g!, h!, [0.0]) results = Optim.optimize(d, [0.0], NewtonTrustRegion()) @test length(results.trace) == 0 @@ -173,7 +173,7 @@ end storage[2, 2] = eta end - d = TwiceDifferentiable(f_2, g!_2, h!_2) + d = TwiceDifferentiable(f_2, g!_2, h!_2, Float64[127, 921]) results = Optim.optimize(d, Float64[127, 921], NewtonTrustRegion()) @test results.g_converged @@ -181,16 +181,8 @@ end # Test Optim.newton for all twice differentiable functions in # Optim.UnconstrainedProblems.examples - for (name, prob) in Optim.UnconstrainedProblems.examples - if prob.istwicedifferentiable - ddf = OnceDifferentiable(prob.f, prob.g!) - res = Optim.optimize(ddf, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true)) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - res = Optim.optimize(ddf.f, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true)) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - res = Optim.optimize(ddf.f, ddf.g!, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true)) - @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2 - end + @testset "Optim problems" begin + run_optim_tests(NewtonTrustRegion()) end end diff --git a/test/optimize.jl b/test/optimize.jl index 522cfcd5a..06d2b6215 100644 --- a/test/optimize.jl +++ b/test/optimize.jl @@ -29,7 +29,7 @@ @test Optim.g_converged(results) @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01 - results = optimize(f1, [127.0, 921.0], Optim.Options(autodiff = true)) + results = optimize(f1, [127.0, 921.0]) @test Optim.g_converged(results) @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01 diff --git a/test/runtests.jl b/test/runtests.jl index b901d33a3..cf011986d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -31,11 +31,49 @@ my_tests = [ "precon.jl", "initial_convergence.jl", "extrapolate.jl", - "levenberg_marquardt.jl", "lsthrow.jl", "api.jl", ] +differentiability_condition(method, prob) = true +differentiability_condition(method::Optim.FirstOrderSolver, prob) = prob.isdifferentiable +differentiability_condition(method::Optim.SecondOrderSolver, prob) = prob.istwicedifferentiable + +input_tuple(method, prob) = ((prob.f,),) +input_tuple(method::Optim.FirstOrderSolver, prob) = ((prob.f,), (prob.f, prob.g!)) +input_tuple(method::Optim.SecondOrderSolver, prob) = ((prob.f,), (prob.f, prob.g!), (prob.f, prob.g!, prob.h!)) + +function run_optim_tests(method; convergence_exceptions = (), + minimizer_exceptions = (), + f_increase_exceptions = (), + iteration_exceptions = (), + skip = (), + show_name = false) + # Loop over unconstrained problems + for (name, prob) in Optim.UnconstrainedProblems.examples + show_name && print_with_color(:green, "Problem: ", name, "\n") + # Look for name in the first elements of the iteration_exceptions tuples + iter_id = find(n[1] == name for n in iteration_exceptions) + # If name wasn't found, use default 1000 iterations, else use provided number + iters = length(iter_id) == 0 ? 1000 : iteration_exceptions[iter_id[1]][2] + # Construct options + options = Optim.Options(allow_f_increases = name in f_increase_exceptions, iterations = iters) + # Check if once or twice differentiable + if differentiability_condition(method, prob) && !(name in skip) + # Loop over appropriate input combinations of f, g!, and h! + for (i, input) in enumerate(input_tuple(method, prob)) + results = Optim.optimize(input..., prob.initial_x, method, options) + if !((name, i) in convergence_exceptions) + @test Optim.converged(results) + end + if !((name, i) in minimizer_exceptions) + @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2 + end + end + end + end +end + for my_test in my_tests @time include(my_test) end diff --git a/test/types.jl b/test/types.jl index 25c723944..c768e7fa8 100644 --- a/test/types.jl +++ b/test/types.jl @@ -30,14 +30,14 @@ import Compat.String if res.method == "Nelder-Mead" @test startswith(lines[8], " * √(Σ(yᵢ-ȳ)²)/n < 1.0e-08: ") @test startswith(lines[9], " * Reached Maximum Number of Iterations: ") - @test startswith(lines[10], " * Objective Function Calls: ") + @test startswith(lines[10], " * Objective Calls: ") elseif res.method == "Simulated Annealing" @test startswith(lines[8], " * |x - x'| < ") @test startswith(lines[9], " * |f(x) - f(x')| / |f(x)| < ") @test startswith(lines[10], " * |g(x)| < ") @test startswith(lines[11], " * f(x) > f(x')") @test startswith(lines[12], " * Reached Maximum Number of Iterations: ") - @test startswith(lines[13], " * Objective Function Calls: ") + @test startswith(lines[13], " * Objective Calls: ") end end @@ -60,6 +60,9 @@ import Compat.String @test startswith(lines[10], " * |g(x)| < ") @test startswith(lines[11], " * f(x) > f(x')") @test startswith(lines[12], " * Reached Maximum Number of Iterations: ") - @test startswith(lines[13], " * Objective Function Calls: ") + @test startswith(lines[13], " * Objective Calls: ") @test startswith(lines[14], " * Gradient Calls: ") + if res.method in ("Newton's Method", "Newton's Method (Trust Region)") + @test startswith(lines[15], " * Hessian Calls: ") + end end