JuliaNLSolvers · pkofod · Mar 11, 2017 · Jan 31, 2017 · Feb 1, 2017 · Feb 2, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -3,8 +3,8 @@ os:
     - linux
     - osx
 julia:
-    - 0.4
     - 0.5
+    - 0.6
     - nightly
 notifications:
     email: false

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# Optim v1.0.0
+* Significant changes to the Non-, Once-, and TwiceDifferentiable setup; these now hold temporaries relevant to the evaluation of objectives, gradients, and Hessians. They also hold f-, g-, and h_calls counters 
+* Refactor tests
+* Drop v0.4 support
+* Add limits to f-, g-, and h_calls
+
 # Optim v0.7.6 release notes
 * Fix deprecations for *Function constructors
 * Fix depwarns on Julia master (v0.6)

diff --git a/REQUIRE b/REQUIRE
@@ -1,4 +1,4 @@
-julia 0.4
+julia 0.5
 Calculus
 ForwardDiff 0.3.0 0.4.0
 PositiveFactorizations

diff --git a/docs/src/algo/autodiff.md b/docs/src/algo/autodiff.md
@@ -47,24 +47,27 @@ julia> Optim.minimizer(optimize(f, g!, h!, initial_x, Newton()))
  1.0
  1.0
 ```
-This is indeed the case. Now let us use finite differences for BFGS (we cannot
-    get finite difference Hessians in Optim).
+This is indeed the case. Now let us use finite differences for BFGS.
 ```jlcon    
 julia> Optim.minimizer(optimize(f, initial_x, BFGS()))
 2-element Array{Float64,1}:
  1.0
  1.0
 ```
 Still looks good. Returning to automatic differentiation, let us try both solvers using this
-method. We enable automatic differentiation by adding `autodiff = true` to our
-`Optim.Options`.
+method. To use automatic differentiation, we have to construct a `OnceDifferentiable`
+instance and use the `autodiff` keyword.
 ```jlcon
-julia> Optim.minimizer(optimize(f, initial_x, BFGS(), Optim.Options(autodiff = true)))
+julia> od = OnceDifferentiable(f, initial_x; autodiff = :forwarddiff);
+
+julia> Optim.minimizer(optimize(od, initial_x, BFGS()))
 2-element Array{Float64,1}:
  1.0
  1.0
 
-julia> Optim.minimizer(optimize(f, initial_x, Newton(), Optim.Options(autodiff = true)))
+julia> td = TwiceDifferentiable(f, initial_x; autodiff = :forwarddiff)
+
+julia> Optim.minimizer(optimize(td, initial_x, Newton()))
 2-element Array{Float64,1}:
  1.0
  1.0

diff --git a/docs/src/dev/contributing.md b/docs/src/dev/contributing.md
@@ -32,7 +32,6 @@ Minim(; linesearch = LineSearches.hagerzhang!, minim_parameter = 1.0) =
 type MinimState{T,N,G}
   @add_generic_fields()
   x_previous::Array{T,N}
-  g::G
   f_x_previous::T
   s::Array{T,N}
   @add_linesearch_fields()

diff --git a/docs/src/user/config.md b/docs/src/user/config.md
@@ -40,12 +40,17 @@ In addition to the solver, you can alter the behavior of the Optim package by us
 * `x_tol`: What is the threshold for determining convergence in the input vector? Defaults to `1e-32`.
 * `f_tol`: What is the threshold for determining convergence in the objective value? Defaults to `1e-32`.
 * `g_tol`: What is the threshold for determining convergence in the gradient? Defaults to `1e-8`. For gradient free methods, this will control the main convergence tolerance, which is solver specific.
+* `f_calls_limit`: A soft upper limit on the number of objective calls. Defaults to `0` (unlimited).
+* `g_calls_limit`: A soft upper limit on the number of gradient calls. Defaults to `0` (unlimited).
+* `h_calls_limit`: A soft upper limit on the number of Hessian calls. Defaults to `0` (unlimited).
+* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `false`.
 * `iterations`: How many iterations will run before the algorithm gives up? Defaults to `1_000`.
 * `store_trace`: Should a trace of the optimization algorithm's state be stored? Defaults to `false`.
 * `show_trace`: Should a trace of the optimization algorithm's state be shown on `STDOUT`? Defaults to `false`.
-* `extended_trace`: Save additional information. Solver dependent.
-* `autodiff`: When only an objective function is provided, use automatic differentiation to compute exact numerical gradients. If not, finite differencing will be used. This functionality is experimental. Defaults to `false`.
+* `extended_trace`: Save additional information. Solver dependent. Defaults to `false`.
 * `show_every`: Trace output is printed every `show_every`th iteration.
+* `callback`: A function to be called during tracing. A return value of `true` stops the `optimize` call.
+* `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited).
 
 We currently recommend the statically dispatched interface by using the `Optim.Options`
 constructor:

diff --git a/docs/src/user/minimization.md b/docs/src/user/minimization.md
@@ -25,11 +25,6 @@ using central finite differencing:
 ```jl
 optimize(f, [0.0, 0.0], LBFGS())
 ```
-Alternatively, the `autodiff` keyword will use atomatic differentiation to construct
-the gradient.
-```jl
-optimize(f, [0.0, 0.0], LBFGS(), Optim.Options(autodiff = true))
-```
 For better performance and greater precision, you can pass your own gradient function. For the Rosenbrock example, the analytical gradient can be shown to be:
 ```jl
 function g!(x, storage)

diff --git a/src/Optim.jl b/src/Optim.jl
@@ -44,6 +44,7 @@ module Optim
 
     # Types
     include("types.jl")
+    include("objective_types.jl")
 
     # API
     include("api.jl")
@@ -80,9 +81,6 @@ module Optim
     # Constrained optimization
     include("fminbox.jl")
 
-    # trust region methods
-    include("levenberg_marquardt.jl")
-
     # Heuristic Optimization Methods
     include("nelder_mead.jl")
     include("simulated_annealing.jl")

diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl
@@ -14,16 +14,13 @@ end
 AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!) =
   AcceleratedGradientDescent(linesearch)
 =#
-function AcceleratedGradientDescent(; linesearch! = nothing,
-                                      linesearch = LineSearches.hagerzhang!)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!)
     AcceleratedGradientDescent(linesearch)
 end
 
-type AcceleratedGradientDescentState{T,N,G}
+type AcceleratedGradientDescentState{T,N}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     f_x_previous::T
     iteration::Int
     y::Array{T,N}
@@ -33,18 +30,12 @@ type AcceleratedGradientDescentState{T,N,G}
 end
 
 function initial_state{T}(method::AcceleratedGradientDescent, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
 
     AcceleratedGradientDescentState("Accelerated Gradient Descent",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          copy(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
                          T(NaN), # Store previous f in state.f_x_previous
                          0, # Iteration
                          copy(initial_x), # Maintain intermediary current state in state.y
@@ -57,7 +48,7 @@ function update_state!{T}(d, state::AcceleratedGradientDescentState{T}, method::
     state.iteration += 1
     # Search direction is always the negative gradient
     @simd for i in 1:state.n
-        @inbounds state.s[i] = -state.g[i]
+        @inbounds state.s[i] = -gradient(d, i)
     end
 
     # Determine the distance of movement along the search line

diff --git a/src/api.jl b/src/api.jl
@@ -34,9 +34,17 @@ g_norm_trace(r::OptimizationResults) = error("g_norm_trace is not implemented fo
 g_norm_trace(r::MultivariateOptimizationResults) = [ state.g_norm for state in trace(r) ]
 
 f_calls(r::OptimizationResults) = r.f_calls
+f_calls(d) = d.f_calls[1]
 
 g_calls(r::OptimizationResults) = error("g_calls is not implemented for $(method(r)).")
 g_calls(r::MultivariateOptimizationResults) = r.g_calls
+g_calls(d::NonDifferentiable) = 0
+g_calls(d) = d.g_calls[1]
+
+h_calls(r::OptimizationResults) = error("h_calls is not implemented for $(method(r)).")
+h_calls(r::MultivariateOptimizationResults) = r.h_calls
+h_calls(d::Union{NonDifferentiable, OnceDifferentiable}) = 0
+h_calls(d) = d.h_calls[1]
 
 converged(r::UnivariateOptimizationResults) = r.converged
 converged(r::MultivariateOptimizationResults) = r.x_converged || r.f_converged || r.g_converged

diff --git a/src/bfgs.jl b/src/bfgs.jl
@@ -10,18 +10,15 @@ end
 BFGS(; linesearch = LineSearches.hagerzhang!, initial_invH = x -> eye(eltype(x), length(x))) =
   BFGS(linesearch, initial_invH)
 =#
-function BFGS(; linesearch! = nothing,
-                linesearch = LineSearches.hagerzhang!,
-              initial_invH = x -> eye(eltype(x), length(x)),
-              resetalpha = true)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function BFGS(; linesearch = LineSearches.hagerzhang!,
+                initial_invH = x -> eye(eltype(x), length(x)),
+                resetalpha = true)
     BFGS(linesearch, initial_invH, resetalpha)
 end
 
 type BFGSState{T,N,G}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     g_previous::G
     f_x_previous::T
     dx::Array{T,N}
@@ -34,20 +31,14 @@ end
 
 function initial_state{T}(method::BFGS, options, d, initial_x::Array{T})
     n = length(initial_x)
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
     # Maintain a cache for line search results
     # Trace the history of states visited
     BFGSState("BFGS",
               n,
               copy(initial_x), # Maintain current state in state.x
-              f_x, # Store current f in state.f_x
-              1, # Track f calls in state.f_calls
-              1, # Track g calls in state.g_calls
-              0, # Track h calls in state.h_calls
               similar(initial_x), # Maintain previous state in state.x_previous
-              g, # Store current gradient in state.g
-              copy(g), # Store previous gradient in state.g_previous
+              copy(gradient(d)), # Store previous gradient in state.g_previous
               T(NaN), # Store previous f in state.f_x_previous
               similar(initial_x), # Store changes in position in state.dx
               similar(initial_x), # Store changes in gradient in state.dg
@@ -61,7 +52,7 @@ end
 function update_state!{T}(d, state::BFGSState{T}, method::BFGS)
     # Set the search direction
     # Search direction is the negative gradient divided by the approximate Hessian
-    A_mul_B!(state.s, state.invH, state.g)
+    A_mul_B!(state.s, state.invH, gradient(d))
     scale!(state.s, -1)
 
     # Determine the distance of movement along the search line
@@ -79,14 +70,14 @@ function update_state!{T}(d, state::BFGSState{T}, method::BFGS)
     end
 
     # Maintain a record of the previous gradient
-    copy!(state.g_previous, state.g)
+    copy!(state.g_previous, gradient(d))
     lssuccess == false # break on linesearch error
 end
 
-function update_h!(d, state, mehtod::BFGS)
+function update_h!(d, state, method::BFGS)
     # Measure the change in the gradient
     @simd for i in 1:state.n
-        @inbounds state.dg[i] = state.g[i] - state.g_previous[i]
+        @inbounds state.dg[i] = gradient(d, i) - state.g_previous[i]
     end
 
     # Update the inverse Hessian approximation using Sherman-Morrison

diff --git a/src/cg.jl b/src/cg.jl
@@ -70,15 +70,11 @@ function ConjugateGradient(;
                                  linesearch)
 end
 =#
-function ConjugateGradient(; linesearch! = nothing,
-                             linesearch = LineSearches.hagerzhang!,
+function ConjugateGradient(; linesearch = LineSearches.hagerzhang!,
                              eta::Real = 0.4,
                              P::Any = nothing,
-                             precondprep! = nothing,
                              precondprep = (P, x) -> nothing)
 
-    linesearch = get_linesearch(linesearch!, linesearch)
-    precondprep = get_precondprep(precondprep!, precondprep)
     ConjugateGradient(Float64(eta),
                       P, precondprep,
                       linesearch)
@@ -87,7 +83,6 @@ end
 type ConjugateGradientState{T,N,G}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     g_previous::G
     f_x_previous::T
     y::Array{T,N}
@@ -99,36 +94,30 @@ end
 
 
 function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
-    pg = copy(g)
-    @assert typeof(f_x) == T
+    value_grad!(d, initial_x)
+    pg = copy(gradient(d))
+    @assert typeof(value(d)) == T
     # Output messages
-    if !isfinite(f_x)
+    if !isfinite(value(d))
         error("Must have finite starting value")
     end
-    if !all(isfinite, g)
-        @show g
-        @show find(!isfinite.(g))
+    if !all(isfinite, gradient(d))
+        @show gradient(d)
+        @show find(!isfinite.(gradient(d)))
         error("Gradient must have all finite values at starting point")
     end
 
     # Determine the intial search direction
     #    if we don't precondition, then this is an extra superfluous copy
     #    TODO: consider allowing a reference for pg instead of a copy
     method.precondprep!(method.P, initial_x)
-    A_ldiv_B!(pg, method.P, g)
+    A_ldiv_B!(pg, method.P, gradient(d))
 
     ConjugateGradientState("Conjugate Gradient",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          similar(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
-                         similar(g), # Store previous gradient in state.g_previous
+                         similar(gradient(d)), # Store previous gradient in state.g_previous
                          T(NaN), # Store previous f in state.f_x_previous
                          similar(initial_x), # Intermediate value in CG calculation
                          similar(initial_x), # Preconditioned intermediate value in CG calculation
@@ -137,11 +126,11 @@ function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Arra
                          @initial_linesearch()...) # Maintain a cache for line search results in state.lsr
 end
 
-function update_state!{T}(df, state::ConjugateGradientState{T}, method::ConjugateGradient)
+function update_state!{T}(d, state::ConjugateGradientState{T}, method::ConjugateGradient)
         # Search direction is predetermined
 
         # Determine the distance of movement along the search line
-        lssuccess = perform_linesearch!(state, method, df)
+        lssuccess = perform_linesearch!(state, method, d)
 
         # Maintain a record of previous position
         copy!(state.x_previous, state.x)
@@ -150,15 +139,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat
         LinAlg.axpy!(state.alpha, state.s, state.x)
 
         # Maintain a record of the previous gradient
-        copy!(state.g_previous, state.g)
+        copy!(state.g_previous, gradient(d))
 
         # Update the function value and gradient
-        state.f_x_previous, state.f_x = state.f_x, df.fg!(state.x, state.g)
-        state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1
+        state.f_x_previous  = value(d)
+        value_grad!(d, state.x)
 
         # Check sanity of function and gradient
-        if !isfinite(state.f_x)
-            error("Function must finite function values")
+        if !isfinite(value(d))
+            error("Function value must be finite")
         end
 
         # Determine the next search direction using HZ's CG rule
@@ -174,15 +163,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat
         dPd = dot(state.s, method.P, state.s)
         etak::T = method.eta * vecdot(state.s, state.g_previous) / dPd
         @simd for i in 1:state.n
-            @inbounds state.y[i] = state.g[i] - state.g_previous[i]
+            @inbounds state.y[i] = gradient(d, i) - state.g_previous[i]
         end
         ydots = vecdot(state.y, state.s)
         copy!(state.py, state.pg)        # below, store pg - pg_previous in py
-        A_ldiv_B!(state.pg, method.P, state.g)
+        A_ldiv_B!(state.pg, method.P, gradient(d))
         @simd for i in 1:state.n     # py = pg - py
            @inbounds state.py[i] = state.pg[i] - state.py[i]
         end
-        betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(state.g, state.s) / ydots) / ydots
+        betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(gradient(d), state.s) / ydots) / ydots
         beta = max(betak, etak)
         @simd for i in 1:state.n
             @inbounds state.s[i] = beta * state.s[i] - state.pg[i]
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,8 +3,8 @@ os: @@
         - linux
         - osx
     julia:
-        - 0.4
         - 0.5
+        - 0.6
         - nightly
     notifications:
         email: false
@@ Expand Down @@