diff --git a/.travis.yml b/.travis.yml
index e7bc78bc5..31e995163 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,8 @@ os:
     - linux
     - osx
 julia:
-    - 0.4
     - 0.5
+    - 0.6
     - nightly
 notifications:
     email: false
diff --git a/NEWS.md b/NEWS.md
index 122f2c0bd..9ba8ffd8d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# Optim v1.0.0
+* Significant changes to the Non-, Once-, and TwiceDifferentiable setup; these now hold temporaries relevant to the evaluation of objectives, gradients, and Hessians. They also hold f-, g-, and h_calls counters 
+* Refactor tests
+* Drop v0.4 support
+* Add limits to f-, g-, and h_calls
+
 # Optim v0.7.6 release notes
 * Fix deprecations for *Function constructors
 * Fix depwarns on Julia master (v0.6)
diff --git a/REQUIRE b/REQUIRE
index facbd9d36..65bac343e 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,4 +1,4 @@
-julia 0.4
+julia 0.5
 Calculus
 ForwardDiff 0.3.0 0.4.0
 PositiveFactorizations
diff --git a/docs/src/algo/autodiff.md b/docs/src/algo/autodiff.md
index ac3455946..8602d8a92 100644
--- a/docs/src/algo/autodiff.md
+++ b/docs/src/algo/autodiff.md
@@ -47,8 +47,7 @@ julia> Optim.minimizer(optimize(f, g!, h!, initial_x, Newton()))
  1.0
  1.0
 ```
-This is indeed the case. Now let us use finite differences for BFGS (we cannot
-    get finite difference Hessians in Optim).
+This is indeed the case. Now let us use finite differences for BFGS.
 ```jlcon    
 julia> Optim.minimizer(optimize(f, initial_x, BFGS()))
 2-element Array{Float64,1}:
@@ -56,15 +55,19 @@ julia> Optim.minimizer(optimize(f, initial_x, BFGS()))
  1.0
 ```
 Still looks good. Returning to automatic differentiation, let us try both solvers using this
-method. We enable automatic differentiation by adding `autodiff = true` to our
-`Optim.Options`.
+method. To use automatic differentiation, we have to construct a `OnceDifferentiable`
+instance and use the `autodiff` keyword.
 ```jlcon
-julia> Optim.minimizer(optimize(f, initial_x, BFGS(), Optim.Options(autodiff = true)))
+julia> od = OnceDifferentiable(f, initial_x; autodiff = :forwarddiff);
+
+julia> Optim.minimizer(optimize(od, initial_x, BFGS()))
 2-element Array{Float64,1}:
  1.0
  1.0
 
-julia> Optim.minimizer(optimize(f, initial_x, Newton(), Optim.Options(autodiff = true)))
+julia> td = TwiceDifferentiable(f, initial_x; autodiff = :forwarddiff)
+
+julia> Optim.minimizer(optimize(td, initial_x, Newton()))
 2-element Array{Float64,1}:
  1.0
  1.0
diff --git a/docs/src/dev/contributing.md b/docs/src/dev/contributing.md
index 240aa5278..55416abdd 100644
--- a/docs/src/dev/contributing.md
+++ b/docs/src/dev/contributing.md
@@ -32,7 +32,6 @@ Minim(; linesearch = LineSearches.hagerzhang!, minim_parameter = 1.0) =
 type MinimState{T,N,G}
   @add_generic_fields()
   x_previous::Array{T,N}
-  g::G
   f_x_previous::T
   s::Array{T,N}
   @add_linesearch_fields()
diff --git a/docs/src/user/config.md b/docs/src/user/config.md
index a9757df40..5a1641b78 100644
--- a/docs/src/user/config.md
+++ b/docs/src/user/config.md
@@ -40,12 +40,17 @@ In addition to the solver, you can alter the behavior of the Optim package by us
 * `x_tol`: What is the threshold for determining convergence in the input vector? Defaults to `1e-32`.
 * `f_tol`: What is the threshold for determining convergence in the objective value? Defaults to `1e-32`.
 * `g_tol`: What is the threshold for determining convergence in the gradient? Defaults to `1e-8`. For gradient free methods, this will control the main convergence tolerance, which is solver specific.
+* `f_calls_limit`: A soft upper limit on the number of objective calls. Defaults to `0` (unlimited).
+* `g_calls_limit`: A soft upper limit on the number of gradient calls. Defaults to `0` (unlimited).
+* `h_calls_limit`: A soft upper limit on the number of Hessian calls. Defaults to `0` (unlimited).
+* `allow_f_increases`: Allow steps that increase the objective value. Defaults to `false`.
 * `iterations`: How many iterations will run before the algorithm gives up? Defaults to `1_000`.
 * `store_trace`: Should a trace of the optimization algorithm's state be stored? Defaults to `false`.
 * `show_trace`: Should a trace of the optimization algorithm's state be shown on `STDOUT`? Defaults to `false`.
-* `extended_trace`: Save additional information. Solver dependent.
-* `autodiff`: When only an objective function is provided, use automatic differentiation to compute exact numerical gradients. If not, finite differencing will be used. This functionality is experimental. Defaults to `false`.
+* `extended_trace`: Save additional information. Solver dependent. Defaults to `false`.
 * `show_every`: Trace output is printed every `show_every`th iteration.
+* `callback`: A function to be called during tracing. A return value of `true` stops the `optimize` call.
+* `time_limit`: A soft upper limit on the total run time. Defaults to `NaN` (unlimited).
 
 We currently recommend the statically dispatched interface by using the `Optim.Options`
 constructor:
diff --git a/docs/src/user/minimization.md b/docs/src/user/minimization.md
index 45311a2a9..3042f7556 100644
--- a/docs/src/user/minimization.md
+++ b/docs/src/user/minimization.md
@@ -25,11 +25,6 @@ using central finite differencing:
 ```jl
 optimize(f, [0.0, 0.0], LBFGS())
 ```
-Alternatively, the `autodiff` keyword will use atomatic differentiation to construct
-the gradient.
-```jl
-optimize(f, [0.0, 0.0], LBFGS(), Optim.Options(autodiff = true))
-```
 For better performance and greater precision, you can pass your own gradient function. For the Rosenbrock example, the analytical gradient can be shown to be:
 ```jl
 function g!(x, storage)
diff --git a/src/Optim.jl b/src/Optim.jl
index 50ff4967f..9bd74f4b0 100644
--- a/src/Optim.jl
+++ b/src/Optim.jl
@@ -44,6 +44,7 @@ module Optim
 
     # Types
     include("types.jl")
+    include("objective_types.jl")
 
     # API
     include("api.jl")
@@ -80,9 +81,6 @@ module Optim
     # Constrained optimization
     include("fminbox.jl")
 
-    # trust region methods
-    include("levenberg_marquardt.jl")
-
     # Heuristic Optimization Methods
     include("nelder_mead.jl")
     include("simulated_annealing.jl")
diff --git a/src/accelerated_gradient_descent.jl b/src/accelerated_gradient_descent.jl
index 75692ebcc..6932c5c13 100644
--- a/src/accelerated_gradient_descent.jl
+++ b/src/accelerated_gradient_descent.jl
@@ -14,16 +14,13 @@ end
 AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!) =
   AcceleratedGradientDescent(linesearch)
 =#
-function AcceleratedGradientDescent(; linesearch! = nothing,
-                                      linesearch = LineSearches.hagerzhang!)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function AcceleratedGradientDescent(; linesearch = LineSearches.hagerzhang!)
     AcceleratedGradientDescent(linesearch)
 end
 
-type AcceleratedGradientDescentState{T,N,G}
+type AcceleratedGradientDescentState{T,N}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     f_x_previous::T
     iteration::Int
     y::Array{T,N}
@@ -33,18 +30,12 @@ type AcceleratedGradientDescentState{T,N,G}
 end
 
 function initial_state{T}(method::AcceleratedGradientDescent, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
 
     AcceleratedGradientDescentState("Accelerated Gradient Descent",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          copy(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
                          T(NaN), # Store previous f in state.f_x_previous
                          0, # Iteration
                          copy(initial_x), # Maintain intermediary current state in state.y
@@ -57,7 +48,7 @@ function update_state!{T}(d, state::AcceleratedGradientDescentState{T}, method::
     state.iteration += 1
     # Search direction is always the negative gradient
     @simd for i in 1:state.n
-        @inbounds state.s[i] = -state.g[i]
+        @inbounds state.s[i] = -gradient(d, i)
     end
 
     # Determine the distance of movement along the search line
diff --git a/src/api.jl b/src/api.jl
index 414f1c0b7..fa1ca238a 100644
--- a/src/api.jl
+++ b/src/api.jl
@@ -34,9 +34,17 @@ g_norm_trace(r::OptimizationResults) = error("g_norm_trace is not implemented fo
 g_norm_trace(r::MultivariateOptimizationResults) = [ state.g_norm for state in trace(r) ]
 
 f_calls(r::OptimizationResults) = r.f_calls
+f_calls(d) = d.f_calls[1]
 
 g_calls(r::OptimizationResults) = error("g_calls is not implemented for $(method(r)).")
 g_calls(r::MultivariateOptimizationResults) = r.g_calls
+g_calls(d::NonDifferentiable) = 0
+g_calls(d) = d.g_calls[1]
+
+h_calls(r::OptimizationResults) = error("h_calls is not implemented for $(method(r)).")
+h_calls(r::MultivariateOptimizationResults) = r.h_calls
+h_calls(d::Union{NonDifferentiable, OnceDifferentiable}) = 0
+h_calls(d) = d.h_calls[1]
 
 converged(r::UnivariateOptimizationResults) = r.converged
 converged(r::MultivariateOptimizationResults) = r.x_converged || r.f_converged || r.g_converged
diff --git a/src/bfgs.jl b/src/bfgs.jl
index c42718f65..2da2c705e 100644
--- a/src/bfgs.jl
+++ b/src/bfgs.jl
@@ -10,18 +10,15 @@ end
 BFGS(; linesearch = LineSearches.hagerzhang!, initial_invH = x -> eye(eltype(x), length(x))) =
   BFGS(linesearch, initial_invH)
 =#
-function BFGS(; linesearch! = nothing,
-                linesearch = LineSearches.hagerzhang!,
-              initial_invH = x -> eye(eltype(x), length(x)),
-              resetalpha = true)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function BFGS(; linesearch = LineSearches.hagerzhang!,
+                initial_invH = x -> eye(eltype(x), length(x)),
+                resetalpha = true)
     BFGS(linesearch, initial_invH, resetalpha)
 end
 
 type BFGSState{T,N,G}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     g_previous::G
     f_x_previous::T
     dx::Array{T,N}
@@ -34,20 +31,14 @@ end
 
 function initial_state{T}(method::BFGS, options, d, initial_x::Array{T})
     n = length(initial_x)
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
     # Maintain a cache for line search results
     # Trace the history of states visited
     BFGSState("BFGS",
               n,
               copy(initial_x), # Maintain current state in state.x
-              f_x, # Store current f in state.f_x
-              1, # Track f calls in state.f_calls
-              1, # Track g calls in state.g_calls
-              0, # Track h calls in state.h_calls
               similar(initial_x), # Maintain previous state in state.x_previous
-              g, # Store current gradient in state.g
-              copy(g), # Store previous gradient in state.g_previous
+              copy(gradient(d)), # Store previous gradient in state.g_previous
               T(NaN), # Store previous f in state.f_x_previous
               similar(initial_x), # Store changes in position in state.dx
               similar(initial_x), # Store changes in gradient in state.dg
@@ -61,7 +52,7 @@ end
 function update_state!{T}(d, state::BFGSState{T}, method::BFGS)
     # Set the search direction
     # Search direction is the negative gradient divided by the approximate Hessian
-    A_mul_B!(state.s, state.invH, state.g)
+    A_mul_B!(state.s, state.invH, gradient(d))
     scale!(state.s, -1)
 
     # Determine the distance of movement along the search line
@@ -79,14 +70,14 @@ function update_state!{T}(d, state::BFGSState{T}, method::BFGS)
     end
 
     # Maintain a record of the previous gradient
-    copy!(state.g_previous, state.g)
+    copy!(state.g_previous, gradient(d))
     lssuccess == false # break on linesearch error
 end
 
-function update_h!(d, state, mehtod::BFGS)
+function update_h!(d, state, method::BFGS)
     # Measure the change in the gradient
     @simd for i in 1:state.n
-        @inbounds state.dg[i] = state.g[i] - state.g_previous[i]
+        @inbounds state.dg[i] = gradient(d, i) - state.g_previous[i]
     end
 
     # Update the inverse Hessian approximation using Sherman-Morrison
diff --git a/src/cg.jl b/src/cg.jl
index c588e1415..6b4e5bd69 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -70,15 +70,11 @@ function ConjugateGradient(;
                                  linesearch)
 end
 =#
-function ConjugateGradient(; linesearch! = nothing,
-                             linesearch = LineSearches.hagerzhang!,
+function ConjugateGradient(; linesearch = LineSearches.hagerzhang!,
                              eta::Real = 0.4,
                              P::Any = nothing,
-                             precondprep! = nothing,
                              precondprep = (P, x) -> nothing)
 
-    linesearch = get_linesearch(linesearch!, linesearch)
-    precondprep = get_precondprep(precondprep!, precondprep)
     ConjugateGradient(Float64(eta),
                       P, precondprep,
                       linesearch)
@@ -87,7 +83,6 @@ end
 type ConjugateGradientState{T,N,G}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     g_previous::G
     f_x_previous::T
     y::Array{T,N}
@@ -99,17 +94,16 @@ end
 
 
 function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
-    pg = copy(g)
-    @assert typeof(f_x) == T
+    value_grad!(d, initial_x)
+    pg = copy(gradient(d))
+    @assert typeof(value(d)) == T
     # Output messages
-    if !isfinite(f_x)
+    if !isfinite(value(d))
         error("Must have finite starting value")
     end
-    if !all(isfinite, g)
-        @show g
-        @show find(!isfinite.(g))
+    if !all(isfinite, gradient(d))
+        @show gradient(d)
+        @show find(!isfinite.(gradient(d)))
         error("Gradient must have all finite values at starting point")
     end
 
@@ -117,18 +111,13 @@ function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Arra
     #    if we don't precondition, then this is an extra superfluous copy
     #    TODO: consider allowing a reference for pg instead of a copy
     method.precondprep!(method.P, initial_x)
-    A_ldiv_B!(pg, method.P, g)
+    A_ldiv_B!(pg, method.P, gradient(d))
 
     ConjugateGradientState("Conjugate Gradient",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          similar(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
-                         similar(g), # Store previous gradient in state.g_previous
+                         similar(gradient(d)), # Store previous gradient in state.g_previous
                          T(NaN), # Store previous f in state.f_x_previous
                          similar(initial_x), # Intermediate value in CG calculation
                          similar(initial_x), # Preconditioned intermediate value in CG calculation
@@ -137,11 +126,11 @@ function initial_state{T}(method::ConjugateGradient, options, d, initial_x::Arra
                          @initial_linesearch()...) # Maintain a cache for line search results in state.lsr
 end
 
-function update_state!{T}(df, state::ConjugateGradientState{T}, method::ConjugateGradient)
+function update_state!{T}(d, state::ConjugateGradientState{T}, method::ConjugateGradient)
         # Search direction is predetermined
 
         # Determine the distance of movement along the search line
-        lssuccess = perform_linesearch!(state, method, df)
+        lssuccess = perform_linesearch!(state, method, d)
 
         # Maintain a record of previous position
         copy!(state.x_previous, state.x)
@@ -150,15 +139,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat
         LinAlg.axpy!(state.alpha, state.s, state.x)
 
         # Maintain a record of the previous gradient
-        copy!(state.g_previous, state.g)
+        copy!(state.g_previous, gradient(d))
 
         # Update the function value and gradient
-        state.f_x_previous, state.f_x = state.f_x, df.fg!(state.x, state.g)
-        state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1
+        state.f_x_previous  = value(d)
+        value_grad!(d, state.x)
 
         # Check sanity of function and gradient
-        if !isfinite(state.f_x)
-            error("Function must finite function values")
+        if !isfinite(value(d))
+            error("Function value must be finite")
         end
 
         # Determine the next search direction using HZ's CG rule
@@ -174,15 +163,15 @@ function update_state!{T}(df, state::ConjugateGradientState{T}, method::Conjugat
         dPd = dot(state.s, method.P, state.s)
         etak::T = method.eta * vecdot(state.s, state.g_previous) / dPd
         @simd for i in 1:state.n
-            @inbounds state.y[i] = state.g[i] - state.g_previous[i]
+            @inbounds state.y[i] = gradient(d, i) - state.g_previous[i]
         end
         ydots = vecdot(state.y, state.s)
         copy!(state.py, state.pg)        # below, store pg - pg_previous in py
-        A_ldiv_B!(state.pg, method.P, state.g)
+        A_ldiv_B!(state.pg, method.P, gradient(d))
         @simd for i in 1:state.n     # py = pg - py
            @inbounds state.py[i] = state.pg[i] - state.py[i]
         end
-        betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(state.g, state.s) / ydots) / ydots
+        betak = (vecdot(state.y, state.pg) - vecdot(state.y, state.py) * vecdot(gradient(d), state.s) / ydots) / ydots
         beta = max(betak, etak)
         @simd for i in 1:state.n
             @inbounds state.s[i] = beta * state.s[i] - state.pg[i]
diff --git a/src/deprecate.jl b/src/deprecate.jl
index 0d6bc2b0f..e69de29bb 100644
--- a/src/deprecate.jl
+++ b/src/deprecate.jl
@@ -1,79 +0,0 @@
-const has_deprecated_linesearch! = Ref(false)
-const has_deprecated_neighbor! = Ref(false)
-const has_deprecated_precondprep! = Ref(false)
-const has_deprecated_levenberg_marquardt = Ref(false)
-
-@deprecate MultivariateOptimizationResults(method,
-    initial_x, minimizer, minimum, iterations,
-    iteration_converged, x_converged, x_tol,
-    f_converged, f_tol, g_converged, g_tol,
-    trace, f_calls, g_calls)  MultivariateOptimizationResults(method,
-        initial_x, minimizer, minimum, iterations,
-        iteration_converged, x_converged, x_tol,
-        f_converged, f_tol, g_converged, g_tol, false,
-        trace, f_calls, g_calls, 0)
-
-@deprecate MultivariateOptimizationResults(method,
-    initial_x, minimizer, minimum, iterations,
-    iteration_converged, x_converged, x_tol,
-    f_converged, f_tol, g_converged, g_tol,
-    trace, f_calls, g_calls, h_calls)  MultivariateOptimizationResults(method,
-        initial_x, minimizer, minimum, iterations,
-        iteration_converged, x_converged, x_tol,
-        f_converged, f_tol, g_converged, g_tol, false,
-        trace, f_calls, g_calls, h_calls)
-
-# LineSearches deprecation
-for name in names(LineSearches)
-    if name in (:LineSearches, :hz_linesearch!, :mt_linesearch!,
-                :interpolating_linesearch!, :backtracking_linesearch!,
-                :interpbacktracking_linesearch!)
-        continue
-    end
-    eval(:(@deprecate $name LineSearches.$name))
-end
-
-@deprecate hz_linesearch! LineSearches.hagerzhang!
-@deprecate mt_linesearch! LineSearches.morethuente!
-@deprecate interpolating_linesearch! LineSearches.strongwolfe!
-@deprecate backtracking_linesearch! LineSearches.backtracking!
-@deprecate interpbacktracking_linesearch! LineSearches.interpbacktracking!
-
-@deprecate OptimizationOptions(args...; kwargs...) Optim.Options(args...; kwargs...)
-
-function get_linesearch(linesearch!, linesearch)
-    if linesearch! != nothing
-        if !has_deprecated_linesearch![]
-            warn("linesearch! keyword is deprecated, use linesearch instead (without !)")
-            has_deprecated_linesearch![] = true
-        end
-        return linesearch!
-    end
-    linesearch
-end
-
-function get_precondprep(precondprep!, precondprep)
-    if precondprep! != nothing
-        if !has_deprecated_precondprep![]
-            warn("precondprep! keyword is deprecated, use precondprep instead (without !)")
-            has_deprecated_precondprep![] = true
-        end
-        return precondprep!
-    end
-    precondprep
-end
-
-function get_neighbor(neighbor!, neighbor)
-    if neighbor! != nothing
-        if !has_deprecated_neighbor![]
-            warn("neighbor! keyword is deprecated, use neighbor instead (without !)")
-            has_deprecated_neighbor![] = true
-        end
-        return neighbor!
-    end
-    neighbor
-end
-
-@deprecate NonDifferentiableFunction(args...) NonDifferentiable(args...)
-@deprecate DifferentiableFunction(args...) OnceDifferentiable(args...)
-@deprecate TwiceDifferentiableFunction(args...) TwiceDifferentiable(args...)
diff --git a/src/fminbox.jl b/src/fminbox.jl
index 7e2fb6dcc..797ea2c77 100644
--- a/src/fminbox.jl
+++ b/src/fminbox.jl
@@ -118,12 +118,10 @@ function optimize{T<:AbstractFloat}(
         extended_trace::Bool = false,
         callback = nothing,
         show_every::Integer = 1,
-        linesearch! = nothing,
         linesearch = LineSearches.hagerzhang!,
         eta::Real = convert(T,0.4),
         mu0::T = convert(T, NaN),
         mufactor::T = convert(T, 0.001),
-        precondprep! = nothing,
         precondprep = (P, x, l, u, mu) -> precondprepbox!(P, x, l, u, mu),
         optimizer = ConjugateGradient,
         optimizer_o = Options(store_trace = store_trace,
@@ -131,11 +129,6 @@ function optimize{T<:AbstractFloat}(
                                           extended_trace = extended_trace),
         nargs...)
 
-    # remove in v0.8.0
-
-    linesearch = get_linesearch(linesearch!, linesearch)
-    precondprep = get_precondprep(precondprep!, precondprep)
-
     optimizer == Newton && warning("Newton is not supported as the inner optimizer. Defaulting to ConjugateGradient.")
     x = copy(initial_x)
     fbarrier = (x, gbarrier) -> barrier_box(x, gbarrier, l, u)
@@ -183,7 +176,7 @@ function optimize{T<:AbstractFloat}(
         # Optimize with current setting of mu
         funcc = (x, g) -> barrier_combined(x, g, gfunc, gbarrier, fb, mu)
         fval0 = funcc(x, nothing)
-        dfbox = OnceDifferentiable(x->funcc(x,nothing), (x,g)->(funcc(x,g); g), funcc)
+        dfbox = OnceDifferentiable(x->funcc(x,nothing), (x,g)->(funcc(x,g); g), funcc, initial_x)
         if show_trace > 0
             println("#### Calling optimizer with mu = ", mu, " ####")
         end
diff --git a/src/gradient_descent.jl b/src/gradient_descent.jl
index c672c760c..34087f5b1 100644
--- a/src/gradient_descent.jl
+++ b/src/gradient_descent.jl
@@ -9,38 +9,27 @@ GradientDescent(; linesearch = LineSearches.hagerzhang!,
                 P = nothing, precondprep = (P, x) -> nothing) =
                     GradientDescent(linesearch, P, precondprep)
 =#
-function GradientDescent(; linesearch! = nothing,
-                           linesearch = LineSearches.hagerzhang!,
+function GradientDescent(; linesearch = LineSearches.hagerzhang!,
                            P = nothing,
-                           precondprep! = nothing,
                            precondprep = (P, x) -> nothing)
-    linesearch = get_linesearch(linesearch!, linesearch)
-    precondprep = get_precondprep(precondprep!, precondprep)
     GradientDescent(linesearch, P, precondprep)
 end
 
-type GradientDescentState{T,N,G}
+type GradientDescentState{T,N}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     f_x_previous::T
     s::Array{T,N}
     @add_linesearch_fields()
 end
 
 function initial_state{T}(method::GradientDescent, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
 
     GradientDescentState("Gradient Descent",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          similar(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
                          T(NaN), # Store previous f in state.f_x_previous
                          similar(initial_x), # Maintain current search direction in state.s
                          @initial_linesearch()...) # Maintain a cache for line search results in state.lsr
@@ -49,7 +38,7 @@ end
 function update_state!{T}(d, state::GradientDescentState{T}, method::GradientDescent)
     # Search direction is always the negative preconditioned gradient
     method.precondprep!(method.P, state.x)
-    A_ldiv_B!(state.s, method.P, state.g)
+    A_ldiv_B!(state.s, method.P, gradient(d))
     @simd for i in 1:state.n
         @inbounds state.s[i] = -state.s[i]
     end
diff --git a/src/l_bfgs.jl b/src/l_bfgs.jl
index 9cdccc9ef..78a9b6034 100644
--- a/src/l_bfgs.jl
+++ b/src/l_bfgs.jl
@@ -75,24 +75,19 @@ LBFGS(; m::Integer = 10, linesearch = LineSearches.hagerzhang!,
       LBFGS(Int(m), linesearch, P, precondprep, extrapolate, snap2one)
 =#
 
-function LBFGS(; linesearch! = nothing,
-                 m::Integer = 10,
+function LBFGS(; m::Integer = 10,
                  linesearch = LineSearches.hagerzhang!,
                  P=nothing,
-                 precondprep! = nothing,
                  precondprep = (P, x) -> nothing,
                  extrapolate::Bool=false,
                  snap2one = (0.75, Inf))
 
-    linesearch = get_linesearch(linesearch!, linesearch)
-    precondprep = get_precondprep(precondprep!, precondprep)
     LBFGS(Int(m), linesearch, P, precondprep, extrapolate, snap2one)
 end
 
 type LBFGSState{T,N,M}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::Array{T,N}
     g_previous::Array{T,N}
     rho::Array{T,N}
     dx_history::Array{T,M}
@@ -110,20 +105,12 @@ end
 
 function initial_state{T}(method::LBFGS, options, d, initial_x::Array{T})
     n = length(initial_x)
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
-    # Maintain a cache for line search results
-    # Trace the history of states visited
+    value_grad!(d, initial_x)
     LBFGSState("L-BFGS",
               n,
               copy(initial_x), # Maintain current state in state.x
-              f_x, # Store current f in state.f_x
-              1, # Track f calls in state.f_calls
-              1, # Track g calls in state.g_calls
-              0, # Track h calls in state.h_calls
               similar(initial_x), # Maintain previous state in state.x_previous
-              g, # Store current gradient in state.g
-              similar(g), # Store previous gradient in state.g_previous
+              similar(gradient(d)), # Store previous gradient in state.g_previous
               Array{T}(method.m), # state.rho
               Array{T}(n, method.m), # Store changes in position in state.dx_history
               Array{T}(n, method.m), # Store changes in gradient in state.dg_history
@@ -147,7 +134,7 @@ function update_state!{T}(d, state::LBFGSState{T}, method::LBFGS)
     method.precondprep!(method.P, state.x)
 
     # Determine the L-BFGS search direction # FIXME just pass state and method?
-    twoloop!(state.s, state.g, state.rho, state.dx_history, state.dg_history,
+    twoloop!(state.s, gradient(d), state.rho, state.dx_history, state.dg_history,
              method.m, state.pseudo_iteration,
              state.twoloop_alpha, state.twoloop_q, method.P)
 
@@ -165,8 +152,8 @@ function update_state!{T}(d, state::LBFGSState{T}, method::LBFGS)
     end
 
     # Save old f and g values to prepare for update_g! call
-    state.f_x_previous = state.f_x
-    copy!(state.g_previous, state.g)
+    state.f_x_previous = value(d)
+    copy!(state.g_previous, gradient(d))
     lssuccess == false # break on linesearch error
 end
 
@@ -174,7 +161,7 @@ end
 function update_h!(d, state, method::LBFGS)
     # Measure the change in the gradient
     @simd for i in 1:state.n
-        @inbounds state.dg[i] = state.g[i] - state.g_previous[i]
+        @inbounds state.dg[i] = gradient(d, i) - state.g_previous[i]
     end
 
     # Update the L-BFGS history of positions and gradients
diff --git a/src/levenberg_marquardt.jl b/src/levenberg_marquardt.jl
deleted file mode 100644
index fa89e611e..000000000
--- a/src/levenberg_marquardt.jl
+++ /dev/null
@@ -1,174 +0,0 @@
-immutable LevenbergMarquardt <: Optimizer end
-
-"""
-    `levenberg_marquardt(f, g, initial_x; <keyword arguments>`
-
-Returns the argmin over x of `sum(f(x).^2)` using the Levenberg-Marquardt algorithm, and an
-estimate of the Jacobian of `f` at x.
-
-The function `f` should take an input vector of length n and return an output vector of length m.
-The function `g` is the Jacobian of f, and should be an m x n matrix.
-`initial_x` is an initial guess for the solution.
-
-Implements box constraints as described in Kanzow, Yamashita, Fukushima (2004; J Comp & Applied Math).
-
-# Keyword arguments
-* `tolX::Real=1e-8`: search tolerance in x
-* `tolG::Real=1e-12`: search tolerance in gradient
-* `maxIter::Integer=100`: maximum number of iterations
-* `lambda::Real=10.0`: (inverse of) initial trust region radius
-* `show_trace::Bool=false`: print a status summary on each iteration if true
-* `lower,upper=[]`: bound solution to these limits
-"""
-function levenberg_marquardt{F<:Function, G<:Function, T}(f::F, g::G, initial_x::AbstractVector{T};
-    tolX::Real = 1e-8, tolG::Real = 1e-12, maxIter::Integer = 100,
-    lambda::Real = 10.0, show_trace::Bool = false, lower::Vector{T} = Array{T}(0), upper::Vector{T} = Array{T}(0))
-
-    if !has_deprecated_levenberg_marquardt[]
-        warn("levenberg_marquardt has been moved out of Optim.jl and into LsqFit.jl. Please adjust your code, and change your dependency to match this migration.")
-        has_deprecated_levenberg_marquardt[] = true
-    end
-    # check parameters
-    ((isempty(lower) || length(lower)==length(initial_x)) && (isempty(upper) || length(upper)==length(initial_x))) ||
-            throw(ArgumentError("Bounds must either be empty or of the same length as the number of parameters."))
-    ((isempty(lower) || all(initial_x .>= lower)) && (isempty(upper) || all(initial_x .<= upper))) ||
-            throw(ArgumentError("Initial guess must be within bounds."))
-
-    # other constants
-    const MAX_LAMBDA = 1e16 # minimum trust region radius
-    const MIN_LAMBDA = 1e-16 # maximum trust region radius
-    const MIN_STEP_QUALITY = 1e-3
-    const GOOD_STEP_QUALITY = 0.75
-    const MIN_DIAGONAL = 1e-6 # lower bound on values of diagonal matrix used to regularize the trust region step
-
-
-    converged = false
-    x_converged = false
-    g_converged = false
-    need_jacobian = true
-    iterCt = 0
-    x = copy(initial_x)
-    delta_x = copy(initial_x)
-    f_calls = 0
-    g_calls = 0
-
-    fcur = f(x)
-    f_calls += 1
-    residual = sum(abs2,fcur)
-
-    # Create buffers
-    m = length(fcur)
-    n = length(x)
-    JJ = Matrix{T}(n, n)
-    n_buffer = Vector{T}(n)
-    m_buffer = Vector{T}(m)
-
-    # Maintain a trace of the system.
-    tr = OptimizationTrace{LevenbergMarquardt}()
-    if show_trace
-        d = Dict("lambda" => lambda)
-        os = OptimizationState{LevenbergMarquardt}(iterCt, sum(abs2,fcur), NaN, d)
-        push!(tr, os)
-        println(os)
-    end
-
-    while (~converged && iterCt < maxIter)
-        if need_jacobian
-            J = g(x)
-            g_calls += 1
-            need_jacobian = false
-        end
-        # we want to solve:
-        #    argmin 0.5*||J(x)*delta_x + f(x)||^2 + lambda*||diagm(J'*J)*delta_x||^2
-        # Solving for the minimum gives:
-        #    (J'*J + lambda*diagm(DtD)) * delta_x == -J^T * f(x), where DtD = sum(abs2,J,1)
-        # Where we have used the equivalence: diagm(J'*J) = diagm(sum(abs2,J,1))
-        # It is additionally useful to bound the elements of DtD below to help
-        # prevent "parameter evaporation".
-        DtD = vec(sum(abs2,J, 1))
-        for i in 1:length(DtD)
-            if DtD[i] <= MIN_DIAGONAL
-                DtD[i] = MIN_DIAGONAL
-            end
-        end
-        # delta_x = ( J'*J + lambda * diagm(DtD) ) \ ( -J'*fcur )
-        At_mul_B!(JJ, J, J)
-        @simd for i in 1:n
-            @inbounds JJ[i, i] += lambda * DtD[i]
-        end
-        At_mul_B!(n_buffer, J, fcur)
-        scale!(n_buffer, -1)
-        delta_x = JJ \ n_buffer
-
-        # apply box constraints
-        if !isempty(lower)
-            @simd for i in 1:n
-               @inbounds delta_x[i] = max(x[i] + delta_x[i], lower[i]) - x[i]
-            end
-        end
-        if !isempty(upper)
-            @simd for i in 1:n
-               @inbounds delta_x[i] = min(x[i] + delta_x[i], upper[i]) - x[i]
-            end
-        end
-
-        # if the linear assumption is valid, our new residual should be:
-        # predicted_residual = sum(abs2,J*delta_x + fcur)
-        A_mul_B!(m_buffer, J, delta_x)
-        LinAlg.axpy!(1, fcur, m_buffer)
-        predicted_residual = sum(abs2,m_buffer)
-        # check for numerical problems in solving for delta_x by ensuring that the predicted residual is smaller
-        # than the current residual
-        if predicted_residual > residual + 2max(eps(predicted_residual),eps(residual))
-            warn("""Problem solving for delta_x: predicted residual increase.
-                             $predicted_residual (predicted_residual) >
-                             $residual (residual) + $(eps(predicted_residual)) (eps)""")
-        end
-        # try the step and compute its quality
-        @simd for i in 1:n
-            @inbounds n_buffer[i] = x[i] + delta_x[i]
-        end
-        trial_f = f(n_buffer)
-        f_calls += 1
-        trial_residual = sum(abs2,trial_f)
-        # step quality = residual change / predicted residual change
-        rho = (trial_residual - residual) / (predicted_residual - residual)
-        if rho > MIN_STEP_QUALITY
-            x += delta_x
-            fcur = trial_f
-            residual = trial_residual
-            if rho > GOOD_STEP_QUALITY
-                # increase trust region radius
-                lambda = max(0.1*lambda, MIN_LAMBDA)
-            end
-            need_jacobian = true
-        else
-            # decrease trust region radius
-            lambda = min(10*lambda, MAX_LAMBDA)
-        end
-        iterCt += 1
-
-        # show state
-        if show_trace
-            At_mul_B!(n_buffer, J, fcur)
-            g_norm = norm(n_buffer, Inf)
-            d = @compat Dict("g(x)" => g_norm, "dx" => delta_x, "lambda" => lambda)
-            os = OptimizationState{LevenbergMarquardt}(iterCt, sum(abs2,fcur), g_norm, d)
-            push!(tr, os)
-            println(os)
-        end
-
-        # check convergence criteria:
-        # 1. Small gradient: norm(J^T * fcur, Inf) < tolG
-        # 2. Small step size: norm(delta_x) < tolX
-        At_mul_B!(n_buffer, J, fcur)
-        if norm(n_buffer, Inf) < tolG
-            g_converged = true
-        elseif norm(delta_x) < tolX*(tolX + norm(x))
-            x_converged = true
-        end
-        converged = g_converged | x_converged
-    end
-
-    MultivariateOptimizationResults("Levenberg-Marquardt", initial_x, x, sum(abs2,fcur), iterCt, !converged, x_converged, 0.0, false, 0.0, g_converged, tolG, false, tr, f_calls, g_calls, 0)
-end
diff --git a/src/momentum_gradient_descent.jl b/src/momentum_gradient_descent.jl
index bb866e651..74e126b1e 100644
--- a/src/momentum_gradient_descent.jl
+++ b/src/momentum_gradient_descent.jl
@@ -11,34 +11,25 @@ MomentumGradientDescent(; mu::Real = 0.01, linesearch = LineSearches.hagerzhang!
   MomentumGradientDescent(Float64(mu), linesearch)
 =#
 
-function MomentumGradientDescent(; mu::Real = 0.01, linesearch! = nothing,
-                                   linesearch = LineSearches.hagerzhang!)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function MomentumGradientDescent(; mu::Real = 0.01, linesearch = LineSearches.hagerzhang!)
     MomentumGradientDescent(Float64(mu), linesearch)
 end
 
-type MomentumGradientDescentState{T,N,G}
+type MomentumGradientDescentState{T,N}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     f_x_previous::T
     s::Array{T,N}
     @add_linesearch_fields()
 end
 
 function initial_state{T}(method::MomentumGradientDescent, options, d, initial_x::Array{T})
-    g = similar(initial_x)
-    f_x = d.fg!(initial_x, g)
+    value_grad!(d, initial_x)
 
     MomentumGradientDescentState("Momentum Gradient Descent",
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         1, # Track f calls in state.f_calls
-                         1, # Track g calls in state.g_calls
-                         0, # Track h calls in state.h_calls
                          copy(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
                          T(NaN), # Store previous f in state.f_x_previous
                          similar(initial_x), # Maintain current search direction in state.s
                          @initial_linesearch()...) # Maintain a cache for line search results in state.lsr
@@ -47,7 +38,7 @@ end
 function update_state!{T}(d, state::MomentumGradientDescentState{T}, method::MomentumGradientDescent)
     # Search direction is always the negative gradient
     @simd for i in 1:state.n
-        @inbounds state.s[i] = -state.g[i]
+        @inbounds state.s[i] = -gradient(d, i)
     end
 
     # Determine the distance of movement along the search line
diff --git a/src/nelder_mead.jl b/src/nelder_mead.jl
index ca4e9fb66..d5705ad03 100644
--- a/src/nelder_mead.jl
+++ b/src/nelder_mead.jl
@@ -124,14 +124,13 @@ type NelderMeadState{T, N}
     step_type::String
 end
 
-initial_state(method::NelderMead, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x)
-function initial_state{F<:Function, T}(method::NelderMead, options, f::F, initial_x::Array{T})
+function initial_state{F, T}(method::NelderMead, options, f::F, initial_x::Array{T})
     n = length(initial_x)
     m = n + 1
     simplex = simplexer(method.initial_simplex, initial_x)
     f_simplex = zeros(T, m)
     @inbounds for i in 1:length(simplex)
-        f_simplex[i] = f(simplex[i])
+        f_simplex[i] = value(f, simplex[i])
     end
     # Get the indeces that correspond to the ordering of the f values
     # at the vertices. i_order[1] is the index in the simplex of the vertex
@@ -144,10 +143,6 @@ function initial_state{F<:Function, T}(method::NelderMead, options, f::F, initia
 NelderMeadState("Nelder-Mead",
           n, # Dimensionality of the problem
           similar(initial_x), # Variable to hold final minimizer value for MultivariateOptimizationResults
-          f_simplex[i_order[1]], # Store Nelder Mead objective in state.f_x = state.f_lowest
-          m,
-          0,
-          0,
           m, # Number of vertices in the simplex
           simplex, # Maintain simplex in state.simplex
           centroid(simplex,  i_order[m]), # Maintain centroid in state.centroid
@@ -167,8 +162,7 @@ NelderMeadState("Nelder-Mead",
           "initial")
 end
 
-update_state!(d, state::NelderMeadState, method::NelderMead) = update_state!(d.f, state, method)
-function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::NelderMead)
+function update_state!{F, T}(f::F, state::NelderMeadState{T}, method::NelderMead)
     # Augment the iteration counter
     shrink = false
     n, m = state.n, state.m
@@ -186,15 +180,13 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::
         state.x_reflect[j] = state.x_centroid[j] + state.α * (state.x_centroid[j]-state.x_highest[j])
     end
 
-    f_reflect = f(state.x_reflect)
-    state.f_calls += 1
+    f_reflect = value(f, state.x_reflect)
     if f_reflect < state.f_lowest
         # Compute an expansion
         @inbounds for j in 1:n
             state.x_cache[j] = state.x_centroid[j] + state.β *(state.x_reflect[j] - state.x_centroid[j])
         end
-        f_expand = f(state.x_cache)
-        state.f_calls += 1
+        f_expand = value(f, state.x_cache)
 
         if f_expand < f_reflect
             copy!(state.simplex[state.i_order[m]], state.x_cache)
@@ -222,7 +214,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::
             @simd for j in 1:n
                 @inbounds state.x_cache[j] = state.x_centroid[j] + state.γ * (state.x_reflect[j]-state.x_centroid[j])
             end
-            f_outside_contraction = f(state.x_cache)
+            f_outside_contraction = value(f, state.x_cache)
             if f_outside_contraction < f_reflect
                 copy!(state.simplex[state.i_order[m]], state.x_cache)
                 @inbounds state.f_simplex[state.i_order[m]] = f_outside_contraction
@@ -237,7 +229,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::
             @simd for j in 1:n
                 @inbounds state.x_cache[j] = state.x_centroid[j] - γ *(state.x_reflect[j] - state.x_centroid[j])
             end
-            f_inside_contraction = f(state.x_cache)
+            f_inside_contraction = value(f, state.x_cache)
             if f_inside_contraction < f_highest
                 copy!(state.simplex[state.i_order[m]], state.x_cache)
                 @inbounds state.f_simplex[state.i_order[m]] = f_inside_contraction
@@ -253,7 +245,7 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::
         for i = 2:m
             ord = state.i_order[i]
             copy!(state.simplex[ord], state.x_lowest + state.δ*(state.simplex[ord]-state.x_lowest))
-            state.f_simplex[ord] = f(state.simplex[ord])
+            state.f_simplex[ord] = value(f, state.simplex[ord])
         end
         step_type = "shrink"
         sortperm!(state.i_order, state.f_simplex)
@@ -263,18 +255,16 @@ function update_state!{F<:Function, T}(f::F, state::NelderMeadState{T}, method::
     false
 end
 
-after_while!(d, state, method::NelderMead, options) = after_while!(d.f, state, method::NelderMead, options)
-function after_while!{F<:Function}(f::F, state, method::NelderMead, options)
+function after_while!{F}(f::F, state, method::NelderMead, options)
     sortperm!(state.i_order, state.f_simplex)
     x_centroid_min = centroid(state.simplex, state.i_order[state.m])
-    f_centroid_min = f(x_centroid_min)
-    state.f_calls += 1
+    f_centroid_min = value(f, x_centroid_min)
     f_min, i_f_min = findmin(state.f_simplex)
     x_min = state.simplex[i_f_min]
     if f_centroid_min < f_min
         x_min = x_centroid_min
         f_min = f_centroid_min
     end
-    state.f_x = f_min
+    f.f_x = f_min
     state.x[:] = x_min
 end
diff --git a/src/newton.jl b/src/newton.jl
index f95a0077c..6bff2cdd6 100644
--- a/src/newton.jl
+++ b/src/newton.jl
@@ -1,23 +1,19 @@
-immutable Newton <: Optimizer
-    linesearch!::Function
+immutable Newton{L} <: Optimizer
+    linesearch!::L
     resetalpha::Bool
 end
 #= uncomment v0.8.0
 Newton(; linesearch::Function = LineSearches.hagerzhang!) =
 Newton(linesearch)
 =#
-function Newton(; linesearch! = nothing, linesearch::Function = LineSearches.hagerzhang!,
-                resetalpha = true)
-    linesearch = get_linesearch(linesearch!, linesearch)
+function Newton(; linesearch::Function = LineSearches.hagerzhang!, resetalpha = true)
     Newton(linesearch,resetalpha)
 end
 
 type NewtonState{T, N, F<:Base.LinAlg.Cholesky, Thd}
     @add_generic_fields()
     x_previous::Array{T, N}
-    g::Array{T, N}
     f_x_previous::T
-    H::Matrix{T}
     F::F
     Hd::Thd
     s::Array{T, N}
@@ -27,26 +23,15 @@ end
 function initial_state{T}(method::Newton, options, d, initial_x::Array{T})
     n = length(initial_x)
     # Maintain current gradient in gr
-    g = similar(initial_x)
-    s = similar(g)
-    x_ls, g_ls = similar(g), similar(g)
-    f_x_previous, f_x = NaN, d.fg!(initial_x, g)
-    f_calls, g_calls = 1, 1
-    H = Array{T}(n, n)
-    d.h!(initial_x, H)
-    h_calls = 1
-
+    s = similar(initial_x)
+    x_ls, g_ls = similar(initial_x), similar(initial_x)
+    value_grad!(d, initial_x)
+    hessian!(d, initial_x)
     NewtonState("Newton's Method",
                 length(initial_x),
                 copy(initial_x), # Maintain current state in state.x
-                f_x, # Store current f in state.f_x
-                f_calls, # Track f calls in state.f_calls
-                g_calls, # Track g calls in state.g_calls
-                h_calls,
                 similar(initial_x), # Maintain previous state in state.x_previous
-                g, # Store current gradient in state.g
                 T(NaN), # Store previous f in state.f_x_previous
-                H,
                 Base.LinAlg.Cholesky(Matrix{T}(0, 0), :U),
                 Vector{Int8}(),
                 similar(initial_x), # Maintain current search direction in state.s
@@ -59,8 +44,8 @@ function update_state!{T}(d, state::NewtonState{T}, method::Newton)
     # represented by H. It deviates from the usual "add a scaled
     # identity matrix" version of the modified Newton method. More
     # information can be found in the discussion at issue #153.
-    state.F, state.Hd = ldltfact!(Positive, state.H)
-    state.s[:] = -(state.F\state.g)
+    state.F, state.Hd = ldltfact!(Positive, hessian(d))
+    state.s[:] = -(state.F\gradient(d))
 
     # Determine the distance of movement along the search line
     lssuccess = perform_linesearch!(state, method, d)
diff --git a/src/newton_trust_region.jl b/src/newton_trust_region.jl
index 6daef8dcd..9479f06d0 100644
--- a/src/newton_trust_region.jl
+++ b/src/newton_trust_region.jl
@@ -208,11 +208,9 @@ NewtonTrustRegion(; initial_delta::Real = 1.0,
 type NewtonTrustRegionState{T,N,G}
     @add_generic_fields()
     x_previous::Array{T,N}
-    g::G
     g_previous::G
     f_x_previous::T
     s::Array{T,N}
-    H::Matrix{T}
     hard_case::Bool
     reached_subproblem_solution::Bool
     interior::Bool
@@ -238,33 +236,22 @@ function initial_state{T}(method::NewtonTrustRegion, options, d, initial_x::Arra
     reached_subproblem_solution = true
     interior = true
     lambda = NaN
-    g = similar(initial_x)
-    f_x_previous, f_x = NaN, d.fg!(initial_x, g)
-    f_calls, g_calls = 1, 1
-    H = Array{T}(n, n)
-    d.h!(initial_x, H)
-    h_calls = 1
-
+    value_grad!(d, initial_x)
+    hessian!(d, initial_x)
     NewtonTrustRegionState("Newton's Method (Trust Region)", # Store string with model name in state.method
                          length(initial_x),
                          copy(initial_x), # Maintain current state in state.x
-                         f_x, # Store current f in state.f_x
-                         f_calls, # Track f calls in state.f_calls
-                         g_calls, # Track g calls in state.g_calls
-                         h_calls,
                          similar(initial_x), # Maintain previous state in state.x_previous
-                         g, # Store current gradient in state.g
-                         similar(g), # Store previous gradient in state.g_previous
+                         similar(gradient(d)), # Store previous gradient in state.g_previous
                          T(NaN), # Store previous f in state.f_x_previous
                          similar(initial_x), # Maintain current search direction in state.s
-                         H,
                          hard_case,
                          reached_subproblem_solution,
                          interior,
                          T(delta),
                          lambda,
                          method.eta, # eta
-                         zero(T)) # Maintain a cache for line search results in state.lsr
+                         zero(T)) # rho
 end
 
 
@@ -273,24 +260,25 @@ function update_state!{T}(d, state::NewtonTrustRegionState{T}, method::NewtonTru
 
     # Find the next step direction.
     m, state.interior, state.lambda, state.hard_case, state.reached_subproblem_solution =
-        solve_tr_subproblem!(state.g, state.H, state.delta, state.s)
+        solve_tr_subproblem!(gradient(d), hessian(d), state.delta, state.s)
 
     # Maintain a record of previous position
     copy!(state.x_previous, state.x)
 
     # Update current position
-    for i in 1:state.n
+    @simd for i in 1:state.n
         @inbounds state.x[i] = state.x[i] + state.s[i]
     end
 
     # Update the function value and gradient
-    copy!(state.g_previous, state.g)
-    state.f_x_previous, state.f_x = state.f_x, d.fg!(state.x, state.g)
-    state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1
+    copy!(state.g_previous, gradient(d))
+    state.f_x_previous = value(d)
+    value_grad!(d, state.x)
+
 
     # Update the trust region size based on the discrepancy between
     # the predicted and actual function values.  (Algorithm 4.1 in N&W)
-    f_x_diff = state.f_x_previous - state.f_x
+    f_x_diff = state.f_x_previous - value(d)
     if abs(m) <= eps(T)
         # This should only happen when the step is very small, in which case
         # we should accept the step and assess_convergence().
@@ -322,9 +310,9 @@ function update_state!{T}(d, state::NewtonTrustRegionState{T}, method::NewtonTru
         x_diff = state.x - state.x_previous
         delta = 0.25 * sqrt(vecdot(x_diff, x_diff))
 
-        state.f_x = state.f_x_previous
+        d.f_x = state.f_x_previous
         copy!(state.x, state.x_previous)
-        copy!(state.g, state.g_previous)
+        copy!(gradient(d), state.g_previous)
     end
 
     false
diff --git a/src/objective_types.jl b/src/objective_types.jl
new file mode 100644
index 000000000..6abe630c9
--- /dev/null
+++ b/src/objective_types.jl
@@ -0,0 +1,275 @@
+@compat abstract type AbstractObjective end
+type NonDifferentiable{T} <: AbstractObjective
+    f
+    f_x::T
+    last_x_f::Array{T}
+    f_calls::Vector{Int}
+end
+NonDifferentiable{T}(f, x_seed::Array{T}) = NonDifferentiable(f, f(x_seed), copy(x_seed), [1])
+
+type OnceDifferentiable{T, Tgrad} <: AbstractObjective
+    f
+    g!
+    fg!
+    f_x::T
+    g::Tgrad
+    last_x_f::Array{T}
+    last_x_g::Array{T}
+    f_calls::Vector{Int}
+    g_calls::Vector{Int}
+end
+function OnceDifferentiable(f, g!, fg!, x_seed)
+    g = similar(x_seed)
+    g!(x_seed, g)
+    OnceDifferentiable(f, g!, fg!, f(x_seed), g, copy(x_seed), copy(x_seed), [1], [1])
+end
+function OnceDifferentiable(f, g!, x_seed)
+    function fg!(x, storage)
+        g!(x, storage)
+        return f(x)
+    end
+    return OnceDifferentiable(f, g!, fg!, x_seed)
+end
+function OnceDifferentiable{T}(f, x_seed::Vector{T}; autodiff = :finitediff)
+    n_x = length(x_seed)
+    f_calls = [1]
+    g_calls = [1]
+    if autodiff == :finitediff
+        function g!(x, storage)
+            Calculus.finite_difference!(x->(f_calls[1]+=1;f(x)), x, storage, :central)
+            return
+        end
+        function fg!(x, storage)
+            g!(x, storage)
+            return f(x)
+        end
+    elseif autodiff == :forwarddiff
+        gcfg = ForwardDiff.GradientConfig(x_seed)
+        g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg)
+
+        fg! = (x, out) -> begin
+            gr_res = DiffBase.DiffResult(zero(T), out)
+            ForwardDiff.gradient!(gr_res, f, x, gcfg)
+            DiffBase.value(gr_res)
+        end
+    end
+    g = similar(x_seed)
+    g!(x_seed, g)
+    return OnceDifferentiable(f, g!, fg!, f(x_seed), g, copy(x_seed), copy(x_seed), f_calls, g_calls)
+end
+
+type TwiceDifferentiable{T<:Real} <: AbstractObjective
+    f
+    g!
+    fg!
+    h!
+    f_x::T
+    g::Vector{T}
+    H::Matrix{T}
+    last_x_f::Vector{T}
+    last_x_g::Vector{T}
+    last_x_h::Vector{T}
+    f_calls::Vector{Int}
+    g_calls::Vector{Int}
+    h_calls::Vector{Int}
+end
+function TwiceDifferentiable{T}(f, g!, fg!, h!, x_seed::Array{T})
+    n_x = length(x_seed)
+    g = similar(x_seed)
+    H = Array{T}(n_x, n_x)
+    g!(x_seed, g)
+    h!(x_seed, H)
+    TwiceDifferentiable(f, g!, fg!, h!, f(x_seed),
+                                g, H, copy(x_seed),
+                                copy(x_seed), copy(x_seed), [1], [1], [1])
+end
+
+function TwiceDifferentiable{T}(f,
+                                 g!,
+                                 h!,
+                                 x_seed::Array{T})
+    function fg!(x::Vector, storage::Vector)
+        g!(x, storage)
+        return f(x)
+    end
+    return TwiceDifferentiable(f, g!, fg!, h!, x_seed)
+end
+function TwiceDifferentiable{T}(f, x_seed::Vector{T}; autodiff = :finitediff)
+    n_x = length(x_seed)
+    f_calls = [1]
+    g_calls = [1]
+    h_calls = [1]
+    if autodiff == :finitediff
+        function g!(x::Vector, storage::Vector)
+            Calculus.finite_difference!(x->(f_calls[1]+=1;f(x)), x, storage, :central)
+            return
+        end
+        function fg!(x::Vector, storage::Vector)
+            g!(x, storage)
+            return f(x)
+        end
+        function h!(x::Vector, storage::Matrix)
+            Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage)
+            return
+        end
+    elseif autodiff == :forwarddiff
+        gcfg = ForwardDiff.GradientConfig(x_seed)
+        g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg)
+
+        fg! = (x, out) -> begin
+            gr_res = DiffBase.DiffResult(zero(T), out)
+            ForwardDiff.gradient!(gr_res, f, x, gcfg)
+            DiffBase.value(gr_res)
+        end
+
+        hcfg = ForwardDiff.HessianConfig(x_seed)
+        h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg)
+    end
+    g = similar(x_seed)
+    H = Array{T}(n_x, n_x)
+    g!(x_seed, g)
+    h!(x_seed, H)
+    return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed),
+                                       g, H, copy(x_seed),
+                                       copy(x_seed), copy(x_seed), f_calls, g_calls, h_calls)
+end
+
+
+function TwiceDifferentiable{T}(f, g!, x_seed::Array{T}; autodiff = :finitediff)
+    n_x = length(x_seed)
+    f_calls = [1]
+    function fg!(x, storage)
+        g!(x, storage)
+        return f(x)
+    end
+    if autodiff == :finitediff
+        function h!(x, storage)
+            Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage)
+            return
+        end
+    elseif autodiff == :forwarddiff
+        hcfg = ForwardDiff.HessianConfig(similar(x_seed))
+        h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg)
+    end
+    g = similar(x_seed)
+    H = Array{T}(n_x, n_x)
+    g!(x_seed, g)
+    h!(x_seed, H)
+    return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed),
+                                       g, H, copy(x_seed),
+                                       copy(x_seed), copy(x_seed), f_calls, [1], [1])
+end
+#=
+function TwiceDifferentiable{T}(f, g!, fg!, x_seed::Array{T}; autodiff = :finitediff)
+    n_x = length(x_seed)
+    f_calls = [1]
+    if autodiff == :finitediff
+        function h!(x::Vector, storage::Matrix)
+            Calculus.finite_difference_hessian!(x->(f_calls[1]+=1;f(x)), x, storage)
+            return
+        end
+    elseif autodiff == :forwarddiff
+        hcfg = ForwardDiff.HessianConfig(similar(gradient(d)))
+        h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg)
+    end
+    g = similar(x_seed)
+    g!(x_seed, g)
+    return TwiceDifferentiable(f, g!, fg!, h!, f(x_seed),
+                                       g, Array{T}(n_x, n_x), copy(x_seed), f_calls, [1], [0])
+end
+=#
+function TwiceDifferentiable(d::OnceDifferentiable; autodiff = :finitediff)
+    n_x = length(d.last_x_f)
+    T = eltype(d.last_x_f)
+    if autodiff == :finitediff
+        function h!(x::Vector, storage::Matrix)
+            Calculus.finite_difference_hessian!(x->(d.f_calls[1]+=1;d.f(x)), x, storage)
+            return
+        end
+    elseif autodiff == :forwarddiff
+        hcfg = ForwardDiff.HessianConfig(similar(gradient(d)))
+        h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg)
+    end
+    H = Array{T}(n_x, n_x)
+    h!(d.last_x_g, H)
+    return TwiceDifferentiable(d.f, d.g!, d.fg!, h!, d.f_x,
+                                       gradient(d), H, d.last_x_f,
+                                       d.last_x_g, copy(d.last_x_g), d.f_calls, d.g_calls, [1])
+end
+
+function _unchecked_value!(obj, x)
+    obj.f_calls .+= 1
+    copy!(obj.last_x_f, x)
+    obj.f_x = obj.f(x)
+end
+function value(obj, x)
+    if x != obj.last_x_f
+        obj.f_calls += 1
+        return obj.f(x)
+    end
+    obj.f_x
+end
+function value!(obj, x)
+    if x != obj.last_x_f
+        _unchecked_value!(obj, x)
+    end
+    obj.f_x
+end
+
+
+function _unchecked_grad!(obj, x)
+    obj.g_calls .+= 1
+    copy!(obj.last_x_g, x)
+    obj.g!(x)
+end
+function gradient!(obj, x)
+    if x != obj.last_x_g
+        _unchecked_gradient!(obj, x)
+    end
+end
+
+function value_grad!(obj, x)
+    if x != obj.last_x_f && x != obj.last_x_g
+        obj.f_calls .+= 1
+        obj.g_calls .+= 1
+        obj.last_x_f[:], obj.last_x_g[:] = copy(x), copy(x)
+        obj.f_x = obj.fg!(x, obj.g)
+    elseif x != obj.last_x_f
+        _unchecked_value!(obj, x)
+    elseif x != obj.last_x_g
+        _unchecked_grad!(obj, x)
+    end
+    obj.f_x
+end
+
+function _unchecked_hessian!(obj, x)
+    obj.h_calls .+= 1
+    copy!(obj.last_x_h, x)
+    obj.h!(x, obj.H)
+end
+function hessian!(obj, x)
+    if x != obj.last_x_h
+        _unchecked_hessian!(obj, x)
+    end
+end
+
+# Getters are without ! and accept only an objective and index or just an objective
+value(obj) = obj.f_x
+gradient(obj) = obj.g
+gradient(obj, i::Integer) = obj.g[i]
+hessian(obj) = obj.H
+#=
+# This can be used when LineSearches switches to value_grad! and family
+# Remember to change all fg!'s to "nothing" for finite differences
+# and when f and g! are passed but no fg!. Can potentially avoid more calls
+# than current setup.
+function value_grad!(obj::Union{OnceDifferentiable{Void}, TwiceDifferentiable{Void}}, x)
+    if x != obj.last_x_f
+        _unchecked_value!(obj, x)
+    end
+    if x != obj.last_x_g
+        _unchecked_grad!(obj, x)
+    end
+    obj.f_x
+end
+=#
diff --git a/src/optimize.jl b/src/optimize.jl
index 8bc8e6d05..223b4ecd9 100644
--- a/src/optimize.jl
+++ b/src/optimize.jl
@@ -46,23 +46,24 @@ function optimize(d::TwiceDifferentiable, initial_x::Array; kwargs...)
     optimize(d, initial_x, method, Options(;kwargs...))
 end
 
-optimize(d::Function, initial_x, options::Options) = optimize(d, initial_x, NelderMead(), options)
-optimize(d::OnceDifferentiable, initial_x, options::Options) = optimize(d, initial_x, BFGS(), options)
-optimize(d::TwiceDifferentiable, initial_x, options::Options) = optimize(d, initial_x, Newton(), options)
+optimize(f::Function, initial_x::Array, options::Options) = optimize(NonDifferentiable(f, initial_x), initial_x, NelderMead(), options)
+optimize(f::Function, initial_x::Array, method::Optimizer, options::Options = Options()) = optimize(NonDifferentiable(f, initial_x), initial_x, method, options)
+optimize(d::OnceDifferentiable, initial_x::Array, options::Options) = optimize(d, initial_x, BFGS(), options)
+optimize(d::TwiceDifferentiable, initial_x::Array, options::Options) = optimize(d, initial_x, Newton(), options)
 
 function optimize{F<:Function, G<:Function}(f::F,
                   g!::G,
                   initial_x::Array,
                   method::Optimizer,
                   options::Options = Options())
-    d = OnceDifferentiable(f, g!)
+    d = OnceDifferentiable(f, g!, initial_x)
     optimize(d, initial_x, method, options)
 end
 function optimize{F<:Function, G<:Function}(f::F,
                   g!::G,
                   initial_x::Array,
                   options::Options)
-    d = OnceDifferentiable(f, g!)
+    d = OnceDifferentiable(f, g!, initial_x)
     optimize(d, initial_x, BFGS(), options)
 end
 
@@ -72,7 +73,7 @@ function optimize{F<:Function, G<:Function, H<:Function}(f::F,
                   initial_x::Array,
                   method::Optimizer,
                   options::Options = Options())
-    d = TwiceDifferentiable(f, g!, h!)
+    d = TwiceDifferentiable(f, g!, h!, initial_x)
     optimize(d, initial_x, method, options)
 end
 function optimize{F<:Function, G<:Function, H<:Function}(f::F,
@@ -80,7 +81,7 @@ function optimize{F<:Function, G<:Function, H<:Function}(f::F,
                   h!::H,
                   initial_x::Array,
                   options)
-    d = TwiceDifferentiable(f, g!, h!)
+    d = TwiceDifferentiable(f, g!, h!, initial_x)
     optimize(d, initial_x, Newton(), options)
 end
 
@@ -88,79 +89,34 @@ function optimize{F<:Function, T, M <: Union{FirstOrderSolver, SecondOrderSolver
                   initial_x::Array{T},
                   method::M,
                   options::Options)
-    if !options.autodiff
-        if M <: FirstOrderSolver
-            d = OnceDifferentiable(f)
-        else
-            error("No gradient or Hessian was provided. Either provide a gradient and Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.")
-        end
+    if M <: FirstOrderSolver
+        d = OnceDifferentiable(f, initial_x)
     else
-        gcfg = ForwardDiff.GradientConfig(initial_x)
-        g! = (x, out) -> ForwardDiff.gradient!(out, f, x, gcfg)
-
-        fg! = (x, out) -> begin
-            gr_res = DiffBase.DiffResult(zero(T), out)
-            ForwardDiff.gradient!(gr_res, f, x, gcfg)
-            DiffBase.value(gr_res)
-        end
-
-        if M <: FirstOrderSolver
-            d = OnceDifferentiable(f, g!, fg!)
-        else
-            hcfg = ForwardDiff.HessianConfig(initial_x)
-            h! = (x, out) -> ForwardDiff.hessian!(out, f, x, hcfg)
-            d = TwiceDifferentiable(f, g!, fg!, h!)
-        end
+        d = TwiceDifferentiable(f, initial_x)
     end
-
     optimize(d, initial_x, method, options)
 end
 
 function optimize(d::OnceDifferentiable,
                   initial_x::Array,
-                  method::Newton,
-                  options::Options)
-    if !options.autodiff
-        error("No Hessian was provided. Either provide a Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.")
-    else
-        hcfg = ForwardDiff.HessianConfig(initial_x)
-        h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg)
-    end
-    optimize(TwiceDifferentiable(d.f, d.g!, d.fg!, h!), initial_x, method, options)
-end
-
-function optimize(d::OnceDifferentiable,
-                  initial_x::Array,
-                  method::NewtonTrustRegion,
+                  method::SecondOrderSolver,
                   options::Options)
-    if !options.autodiff
-        error("No Hessian was provided. Either provide a Hessian, set autodiff = true in the Options if applicable, or choose a solver that doesn't require a Hessian.")
-    else
-        hcfg = ForwardDiff.HessianConfig(initial_x)
-        h! = (x, out) -> ForwardDiff.hessian!(out, d.f, x, hcfg)
-    end
-    optimize(TwiceDifferentiable(d.f, d.g!, d.fg!, h!), initial_x, method, options)
+    optimize(TwiceDifferentiable(d), initial_x, method, options)
 end
 
 update_g!(d, state, method) = nothing
-
 function update_g!{M<:Union{FirstOrderSolver, Newton}}(d, state, method::M)
     # Update the function value and gradient
-    state.f_x_previous, state.f_x = state.f_x, d.fg!(state.x, state.g)
-    state.f_calls, state.g_calls = state.f_calls + 1, state.g_calls + 1
+    value_grad!(d, state.x)
 end
 
-update_h!(d, state, method) = nothing
-
 # Update the Hessian
-function update_h!(d, state, method::SecondOrderSolver)
-    d.h!(state.x, state.H)
-    state.h_calls += 1
-end
+update_h!(d, state, method) = nothing
+update_h!(d, state, method::SecondOrderSolver) = hessian!(d, state.x)
 
 after_while!(d, state, method, options) = nothing
 
-function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M,
+function optimize{D<:AbstractObjective, T, M<:Optimizer}(d::D, initial_x::Array{T}, method::M,
                                    options::Options = Options())
     t0 = time() # Initial time stamp used to control early stopping by options.time_limit
 
@@ -168,27 +124,26 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M,
         error("Use optimize(f, scalar, scalar) for 1D problems")
     end
 
-
     state = initial_state(method, options, d, initial_x)
 
     tr = OptimizationTrace{typeof(method)}()
     tracing = options.store_trace || options.show_trace || options.extended_trace || options.callback != nothing
     stopped, stopped_by_callback, stopped_by_time_limit = false, false, false
-
+    f_limit_reached, g_limit_reached, h_limit_reached = false, false, false
     x_converged, f_converged, f_increased = false, false, false
     g_converged = if typeof(method) <: NelderMead
         nmobjective(state.f_simplex, state.m, state.n) < options.g_tol
     elseif  typeof(method) <: ParticleSwarm || typeof(method) <: SimulatedAnnealing
         g_converged = false
     else
-        vecnorm(state.g, Inf) < options.g_tol
+        vecnorm(gradient(d), Inf) < options.g_tol
     end
 
     converged = g_converged
     iteration = 0
 
     options.show_trace && print_header(method)
-    trace!(tr, state, iteration, method, options)
+    trace!(tr, d, state, iteration, method, options)
 
     while !converged && !stopped && iteration < options.iterations
         iteration += 1
@@ -196,29 +151,24 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M,
         update_state!(d, state, method) && break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS)
         update_g!(d, state, method)
         x_converged, f_converged,
-        g_converged, converged, f_increased = assess_convergence(state, options)
-        # We don't use the Hessian for anything if we have declared convergence,
-        # so we might as well not make the (expensive) update if converged == true
-        !converged && update_h!(d, state, method)
+        g_converged, converged, f_increased = assess_convergence(state, d, options)
+
+        !converged && update_h!(d, state, method) # only relevant if not converged
 
-        # If tracing, update trace with trace!. If a callback is provided, it
-        # should have boolean return value that controls the variable stopped_by_callback.
-        # This allows for early stopping controlled by the callback.
         if tracing
-            stopped_by_callback = trace!(tr, state, iteration, method, options)
+            # update trace; callbacks can stop routine early by returning true
+            stopped_by_callback = trace!(tr, d, state, iteration, method, options)
         end
 
-        # Check time_limit; if none is provided it is NaN and the comparison
-        # will always return false.
         stopped_by_time_limit = time()-t0 > options.time_limit ? true : false
+        f_limit_reached = options.f_calls_limit > 0 && f_calls(d) >= options.f_calls_limit ? true : false
+        g_limit_reached = options.g_calls_limit > 0 && g_calls(d) >= options.g_calls_limit ? true : false
+        h_limit_reached = options.h_calls_limit > 0 && h_calls(d) >= options.h_calls_limit ? true : false
 
-        # Combine the two, so see if the stopped flag should be changed to true
-        # and stop the while loop
-        stopped = stopped_by_callback || stopped_by_time_limit ? true : false
-
-        # Did the iteration provide a non-decreasing step?
-        f_increased && !options.allow_f_increases && break
-
+        if (f_increased && !options.allow_f_increases) || stopped_by_callback ||
+            stopped_by_time_limit || f_limit_reached || g_limit_reached || h_limit_reached
+            stopped = true
+        end
     end # while
 
     after_while!(d, state, method, options)
@@ -226,7 +176,7 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M,
     return MultivariateOptimizationResults(state.method_string,
                                             initial_x,
                                             f_increased ? state.x_previous : state.x,
-                                            f_increased ? state.f_x_previous : state.f_x,
+                                            f_increased ? state.f_x_previous : value(d),
                                             iteration,
                                             iteration == options.iterations,
                                             x_converged,
@@ -237,9 +187,9 @@ function optimize{T, M<:Optimizer}(d, initial_x::Array{T}, method::M,
                                             options.g_tol,
                                             f_increased,
                                             tr,
-                                            state.f_calls,
-                                            state.g_calls,
-                                            state.h_calls)
+                                            f_calls(d),
+                                            g_calls(d),
+                                            h_calls(d))
 end
 
 # Univariate Options
diff --git a/src/particle_swarm.jl b/src/particle_swarm.jl
index 48c0785bd..a9e5a4f17 100644
--- a/src/particle_swarm.jl
+++ b/src/particle_swarm.jl
@@ -26,9 +26,7 @@ type ParticleSwarmState{T,N}
     iterations::Int
 end
 
-initial_state(method::ParticleSwarm, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x)
-
-function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x::Array{T})
+function initial_state{T}(method::ParticleSwarm, options, f, initial_x::Array{T})
 
     #=
     Variable X represents the whole swarm of solutions with
@@ -83,10 +81,8 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x
     best_score = zeros(T, n_particles)
     x_learn = zeros(T, n)
 
-    f_calls = 0
     current_state = 0
-    f_x = f(initial_x)
-    f_calls += 1
+    value!(f, initial_x)
 
     # if search space is limited, spread the initial population
     # uniformly over the whole search space
@@ -123,10 +119,6 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x
     ParticleSwarmState("Particle Swarm",
         n,
         x,
-        f_x,
-        f_calls, # f call
-        0, # g calls
-        0, # h calls
         0,
         lower,
         upper,
@@ -145,24 +137,22 @@ function initial_state{T}(method::ParticleSwarm, options, f::Function, initial_x
         options.iterations)
 end
 
-update_state!(d, state::ParticleSwarmState, method::ParticleSwarm) = update_state!(d.f, state, method)
-function update_state!{T}(f::Function, state::ParticleSwarmState{T}, method::ParticleSwarm)
+function update_state!{T}(f, state::ParticleSwarmState{T}, method::ParticleSwarm)
     if state.limit_search_space
         limit_X!(state.X, state.lower, state.upper, state.n_particles, state.n)
     end
     compute_cost!(f, state.n_particles, state.X, state.score)
-    state.f_calls += state.n_particles
 
     if state.iteration == 0
         copy!(state.best_score, state.score)
-        state.f_x = Base.minimum(state.score)
+        f.f_x = Base.minimum(state.score)
     end
-    state.f_x = housekeeping!(state.score,
+    f.f_x = housekeeping!(state.score,
                               state.best_score,
                               state.X,
                               state.X_best,
                               state.x,
-                              state.f_x,
+                              value(f),
                               state.n_particles)
     # Elitist Learning:
     # find a new solution named 'x_learn' which is the current best
@@ -195,9 +185,8 @@ function update_state!{T}(f::Function, state::ParticleSwarmState{T}, method::Par
         end
     end
 
-    score_learn = f(state.x_learn)
-    state.f_calls += 1
-    if score_learn < state.f_x
+    score_learn = value(f, state.x_learn)
+    if score_learn < f.f_x
         state.f_x = score_learn * 1.0
         for j in 1:state.n
             state.X_best[j, i_worst] = state.x_learn[j]
@@ -440,13 +429,13 @@ function limit_X!(X, lower, upper, n_particles, n)
     nothing
 end
 
-function compute_cost!(f::Function,
+function compute_cost!(f,
                        n_particles::Int,
                        X::Matrix,
                        score::Vector)
 
     for i in 1:n_particles
-        score[i] = f(X[:, i])
+        score[i] = value(f, X[:, i])
     end
     nothing
 end
diff --git a/src/simulated_annealing.jl b/src/simulated_annealing.jl
index a217f041a..71aee7997 100644
--- a/src/simulated_annealing.jl
+++ b/src/simulated_annealing.jl
@@ -16,11 +16,10 @@ immutable SimulatedAnnealing{Tn, Ttemp} <: Optimizer
     keep_best::Bool # not used!?
 end
 
-SimulatedAnnealing(;neighbor! = nothing,
-                    neighbor = default_neighbor!,
+SimulatedAnnealing(;neighbor = default_neighbor!,
                     temperature = log_temperature,
                     keep_best::Bool = true) =
-  SimulatedAnnealing(get_neighbor(neighbor!, neighbor), temperature, keep_best)
+  SimulatedAnnealing(neighbor, temperature, keep_best)
 
 type SimulatedAnnealingState{T, N}
     @add_generic_fields()
@@ -31,24 +30,17 @@ type SimulatedAnnealingState{T, N}
     f_proposal::T
 end
 
-initial_state(method::SimulatedAnnealing, options, d, initial_x::Array) = initial_state(method, options, d.f, initial_x)
-
-function initial_state{T}(method::SimulatedAnnealing, options, f::Function, initial_x::Array{T})
+function initial_state{T}(method::SimulatedAnnealing, options, f, initial_x::Array{T})
     # Count number of parameters
     n = length(initial_x)
-
-    # Store f(x) in f_x
-    f_x = f(initial_x)
-    f_calls = 1
+    value!(f, initial_x)
 
     # Store the best state ever visited
     best_x = copy(initial_x)
-    best_f_x = f_x
-    SimulatedAnnealingState("Simulated Annealing", n, copy(best_x), f_x, f_calls, 0, 0, 1, best_x, similar(initial_x), best_f_x, f_x)
+    SimulatedAnnealingState("Simulated Annealing", n, copy(best_x), 1, best_x, similar(initial_x), f.f_x, f.f_x)
 end
 
-update_state!(d, state::SimulatedAnnealingState, method::SimulatedAnnealing) = update_state!(d.f, state, method)
-function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method::SimulatedAnnealing)
+function update_state!{T}(nd, state::SimulatedAnnealingState{T}, method::SimulatedAnnealing)
 
     # Determine the temperature for current iteration
     t = method.temperature(state.iteration)
@@ -57,8 +49,7 @@ function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method
     method.neighbor!(state.x_current, state.x_proposal)
 
     # Evaluate the cost function at the proposed state
-    state.f_proposal = f(state.x_proposal)
-    state.f_calls += 1
+    state.f_proposal = value(nd, state.x_proposal)
 
     if state.f_proposal <= state.f_x_current
         # If proposal is superior, we always move to it
@@ -66,8 +57,8 @@ function update_state!{T}(f::Function, state::SimulatedAnnealingState{T}, method
         state.f_x_current = state.f_proposal
 
         # If the new state is the best state yet, keep a record of it
-        if state.f_proposal < state.f_x
-            state.f_x = state.f_proposal
+        if state.f_proposal < nd.f_x
+            nd.f_x = state.f_proposal
             copy!(state.x, state.x_proposal)
         end
     else
diff --git a/src/types.jl b/src/types.jl
index b19640e1a..b18cbd860 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -3,12 +3,14 @@ immutable Options{TCallback <: Union{Void, Function}}
     x_tol::Float64
     f_tol::Float64
     g_tol::Float64
+    f_calls_limit::Int
+    g_calls_limit::Int
+    h_calls_limit::Int
     allow_f_increases::Bool
     iterations::Int
     store_trace::Bool
     show_trace::Bool
     extended_trace::Bool
-    autodiff::Bool
     show_every::Int
     callback::TCallback
     time_limit::Float64
@@ -18,12 +20,14 @@ function Options(;
         x_tol::Real = 1e-32,
         f_tol::Real = 1e-32,
         g_tol::Real = 1e-8,
+        f_calls_limit::Int = 0,
+        g_calls_limit::Int = 0,
+        h_calls_limit::Int = 0,
         allow_f_increases::Bool = false,
         iterations::Integer = 1_000,
         store_trace::Bool = false,
         show_trace::Bool = false,
         extended_trace::Bool = false,
-        autodiff::Bool = false,
         show_every::Integer = 1,
         callback = nothing,
         time_limit = NaN)
@@ -32,9 +36,9 @@ function Options(;
         show_trace = true
     end
     Options{typeof(callback)}(
-        Float64(x_tol), Float64(f_tol), Float64(g_tol), allow_f_increases, Int(iterations),
-        store_trace, show_trace, extended_trace, autodiff, Int(show_every),
-        callback, time_limit)
+        Float64(x_tol), Float64(f_tol), Float64(g_tol), f_calls_limit, g_calls_limit, h_calls_limit,
+        allow_f_increases, Int(iterations), store_trace, show_trace, extended_trace,
+        Int(show_every), callback, time_limit)
 end
 
 function print_header(options::Options)
@@ -93,23 +97,6 @@ type UnivariateOptimizationResults{T,M} <: OptimizationResults
     f_calls::Int
 end
 
-immutable NonDifferentiable
-    f::Function
-end
-
-immutable OnceDifferentiable
-    f::Function
-    g!::Function
-    fg!::Function
-end
-
-immutable TwiceDifferentiable
-    f::Function
-    g!::Function
-    fg!::Function
-    h!::Function
-end
-
 function Base.show(io::IO, t::OptimizationState)
     @printf io "%6d   %14e   %14e\n" t.iteration t.value t.g_norm
     if !isempty(t.metadata)
@@ -154,10 +141,13 @@ function Base.show(io::IO, r::MultivariateOptimizationResults)
         @printf io "   * f(x) > f(x'): %s\n" f_increased(r)
     end
     @printf io "   * Reached Maximum Number of Iterations: %s\n" iteration_limit_reached(r)
-    @printf io " * Objective Function Calls: %d" f_calls(r)
+    @printf io " * Objective Calls: %d" f_calls(r)
     if !(r.method in ("Nelder-Mead", "Simulated Annealing"))
         @printf io "\n * Gradient Calls: %d" g_calls(r)
     end
+    if r.method in ("Newton's Method", "Newton's Method (Trust Region)")
+        @printf io "\n * Hessian Calls: %d" h_calls(r)
+    end
     return
 end
 
@@ -185,50 +175,3 @@ function Base.append!(a::MultivariateOptimizationResults, b::MultivariateOptimiz
     a.f_calls += f_calls(b)
     a.g_calls += g_calls(b)
 end
-
-# TODO: Expose ability to do forward and backward differencing
-function OnceDifferentiable(f::Function)
-    function g!(x::Array, storage::Array)
-        Calculus.finite_difference!(f, x, storage, :central)
-        return
-    end
-    function fg!(x::Array, storage::Array)
-        g!(x, storage)
-        return f(x)
-    end
-    return OnceDifferentiable(f, g!, fg!)
-end
-
-function OnceDifferentiable(f::Function, g!::Function)
-    function fg!(x::Array, storage::Array)
-        g!(x, storage)
-        return f(x)
-    end
-    return OnceDifferentiable(f, g!, fg!)
-end
-
-function TwiceDifferentiable(f::Function)
-    function g!(x::Vector, storage::Vector)
-        Calculus.finite_difference!(f, x, storage, :central)
-        return
-    end
-    function fg!(x::Vector, storage::Vector)
-        g!(x, storage)
-        return f(x)
-    end
-    function h!(x::Vector, storage::Matrix)
-        Calculus.finite_difference_hessian!(f, x, storage)
-        return
-    end
-    return TwiceDifferentiable(f, g!, fg!, h!)
-end
-
-function TwiceDifferentiable(f::Function,
-                                     g!::Function,
-                                     h!::Function)
-    function fg!(x::Vector, storage::Vector)
-        g!(x, storage)
-        return f(x)
-    end
-    return TwiceDifferentiable(f, g!, fg!, h!)
-end
diff --git a/src/utilities/assess_convergence.jl b/src/utilities/assess_convergence.jl
index 3bee1f3f1..cd09d3a35 100644
--- a/src/utilities/assess_convergence.jl
+++ b/src/utilities/assess_convergence.jl
@@ -33,7 +33,7 @@ function assess_convergence(x::Array,
 end
 
 
-function assess_convergence(state, options)
+function assess_convergence(state, d, options)
     x_converged, f_converged, f_increased, g_converged = false, false, false, false
 
     if maxdiff(state.x, state.x_previous) < options.x_tol
@@ -43,15 +43,15 @@ function assess_convergence(state, options)
     # Absolute Tolerance
     # if abs(f_x - f_x_previous) < f_tol
     # Relative Tolerance
-    if abs(state.f_x - state.f_x_previous) < max(options.f_tol * (abs(state.f_x) + options.f_tol), eps(abs(state.f_x)+abs(state.f_x_previous)))
+    if abs(value(d) - state.f_x_previous) < max(options.f_tol * (abs(value(d)) + options.f_tol), eps(abs(value(d))+abs(state.f_x_previous)))
         f_converged = true
     end
 
-    if state.f_x > state.f_x_previous
+    if value(d) > state.f_x_previous
         f_increased = true
     end
 
-    if vecnorm(state.g, Inf) < options.g_tol
+    if vecnorm(gradient(d), Inf) < options.g_tol
         g_converged = true
     end
 
@@ -60,19 +60,19 @@ function assess_convergence(state, options)
     return x_converged, f_converged, g_converged, converged, f_increased
 end
 
-function assess_convergence(state::NelderMeadState, options)
+function assess_convergence(state::NelderMeadState, d, options)
     g_converged = state.nm_x <= options.g_tol # Hijact g_converged for NM stopping criterior
     return false, false, g_converged, g_converged, false
 end
 
 
-function assess_convergence(state::Union{ParticleSwarmState, SimulatedAnnealingState}, options)
+function assess_convergence(state::Union{ParticleSwarmState, SimulatedAnnealingState}, d, options)
     false, false, false, false, false
 end
 
 
 
-function assess_convergence(state::NewtonTrustRegionState, options)
+function assess_convergence(state::NewtonTrustRegionState, d, options)
     x_converged, f_converged, g_converged, converged, f_increased = false, false, false, false, false
     if state.rho > state.eta
         # Accept the point and check convergence
@@ -82,9 +82,9 @@ function assess_convergence(state::NewtonTrustRegionState, options)
         converged,
         f_increased = assess_convergence(state.x,
                                        state.x_previous,
-                                       state.f_x,
+                                       value(d),
                                        state.f_x_previous,
-                                       state.g,
+                                       gradient(d),
                                        options.x_tol,
                                        options.f_tol,
                                        options.g_tol)
diff --git a/src/utilities/generic.jl b/src/utilities/generic.jl
index c89b54083..d186db578 100644
--- a/src/utilities/generic.jl
+++ b/src/utilities/generic.jl
@@ -11,10 +11,6 @@ end
     method_string::String
     n::Int
     x::Array{T,N}
-    f_x::T
-    f_calls::Int
-    g_calls::Int
-    h_calls::Int
 end
 
 @def add_linesearch_fields begin
@@ -28,7 +24,7 @@ end
 @def initial_linesearch begin
     (similar(initial_x), # Buffer of x for line search in state.x_ls
     similar(initial_x), # Buffer of g for line search in state.g_ls
-    LineSearches.alphainit(one(T), initial_x, g, f_x), # Keep track of step size in state.alpha
+    LineSearches.alphainit(one(T), initial_x, gradient(d), value(d)), # Keep track of step size in state.alpha
     false, # state.mayterminate
     LineSearches.LineSearchResults(T))
 end
diff --git a/src/utilities/perform_linesearch.jl b/src/utilities/perform_linesearch.jl
index 2d4f25f08..1e4e8a314 100644
--- a/src/utilities/perform_linesearch.jl
+++ b/src/utilities/perform_linesearch.jl
@@ -1,7 +1,7 @@
-checked_dphi0!(state, method) = vecdot(state.g, state.s)
-function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, method::M)
+checked_dphi0!(state, d, method) = vecdot(gradient(d), state.s)
+function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, d, method::M)
     # If invH is not positive definite, reset it
-    dphi0 = vecdot(state.g, state.s)
+    dphi0 = vecdot(gradient(d), state.s)
     if dphi0 > 0.0
         # "reset" Hessian approximation
         if M <: BFGS
@@ -12,32 +12,32 @@ function checked_dphi0!{M<:Union{BFGS, LBFGS}}(state, method::M)
 
         # Re-calculate direction
         @simd for i in 1:state.n
-            @inbounds state.s[i] = -state.g[i]
+            @inbounds state.s[i] = -gradient(d, i)
         end
-        dphi0 = vecdot(state.g, state.s)
+        dphi0 = vecdot(gradient(d), state.s)
     end
     return dphi0
 end
-function checked_dphi0!(state, method::ConjugateGradient)
+function checked_dphi0!(state, d, method::ConjugateGradient)
     # Reset the search direction if it becomes corrupted
-    dphi0 = vecdot(state.g, state.s)
+    dphi0 = vecdot(gradient(d), state.s)
     if dphi0 >= 0
         @simd for i in 1:state.n
             @inbounds state.s[i] = -state.pg[i]
         end
-        dphi0 = vecdot(state.g, state.s)
+        dphi0 = vecdot(gradient(d), state.s)
     end
     dphi0
 end
 
-alphaguess!(state, method, dphi0, df) = nothing
-function alphaguess!(state, method::LBFGS, dphi0, df)
+alphaguess!(state, method, dphi0, d) = nothing
+function alphaguess!(state, method::LBFGS, dphi0, d)
     # compute an initial guess for the linesearch based on
     # Nocedal/Wright, 2nd ed, (3.60)
     # TODO: this is a temporary fix, but should eventually be split off into
     #       a separate type and possibly live in LineSearches; see #294
     if method.extrapolate && state.pseudo_iteration > 1
-        alphaguess = 2.0 * (state.f_x - state.f_x_previous) / dphi0
+        alphaguess = 2.0 * (value(d) - state.f_x_previous) / dphi0
         alphaguess = max(alphaguess, state.alpha/4.0)  # not too much reduction
         # if alphaguess ≈ 1, then make it 1 (Newton-type behaviour)
         if method.snap2one[1] < alphaguess < method.snap2one[2]
@@ -47,24 +47,24 @@ function alphaguess!(state, method::LBFGS, dphi0, df)
     end
 end
 # Reset to 1 for BFGS and Newton if resetalpha
-function alphaguess!(state, method::Union{BFGS, Newton}, dphi0, df)
+function alphaguess!(state, method::Union{BFGS, Newton}, dphi0, d)
     if method.resetalpha == true
         state.alpha = one(state.alpha)
     end
 end
-function alphaguess!(state, method::ConjugateGradient, dphi0, df)
+function alphaguess!(state, method::ConjugateGradient, dphi0, d)
     # Pick the initial step size (HZ #I1-I2)
     state.alpha, state.mayterminate, f_update, g_update =
-      LineSearches.alphatry(state.alpha, df, state.x, state.s, state.x_ls, state.g_ls, state.lsr)
-    state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
+      LineSearches.alphatry(state.alpha, d, state.x, state.s, state.x_ls, state.g_ls, state.lsr)
+    d.f_calls, d.g_calls = d.f_calls + f_update, d.g_calls + g_update
 end
 function perform_linesearch!{M}(state, method::M, d)
     # Calculate search direction dphi0
-    dphi0 = checked_dphi0!(state, method)
+    dphi0 = checked_dphi0!(state, d, method)
 
     # Refresh the line search cache
     LineSearches.clear!(state.lsr)
-    push!(state.lsr, zero(state.f_x), state.f_x, dphi0)
+    push!(state.lsr, zero(value(d)), value(d), dphi0)
 
     # Guess an alpha
     alphaguess!(state, method, dphi0, d)
@@ -74,11 +74,11 @@ function perform_linesearch!{M}(state, method::M, d)
         state.alpha, f_update, g_update =
         method.linesearch!(d, state.x, state.s, state.x_ls, state.g_ls, state.lsr,
                            state.alpha, state.mayterminate)
-        state.f_calls, state.g_calls = state.f_calls + f_update, state.g_calls + g_update
+        d.f_calls, d.g_calls = d.f_calls + f_update, d.g_calls + g_update
         return true
     catch ex
         if isa(ex, LineSearches.LineSearchException)
-            state.f_calls, state.g_calls = state.f_calls + ex.f_update, state.g_calls + ex.g_update
+            d.f_calls, d.g_calls = d.f_calls + ex.f_update, d.g_calls + ex.g_update
             state.alpha = ex.alpha
             Base.warn("Linesearch failed, using alpha = $(state.alpha) and exiting optimization.")
             return false #lssuccess = false
diff --git a/src/utilities/trace.jl b/src/utilities/trace.jl
index 556cb297c..93abf4a1a 100644
--- a/src/utilities/trace.jl
+++ b/src/utilities/trace.jl
@@ -1,4 +1,4 @@
-function trace!(tr, state, iteration, method::NelderMead, options)
+function trace!(tr, d, state, iteration, method::NelderMead, options)
     dt = Dict()
     if options.extended_trace
         dt["centroid"] = state.x_centroid
@@ -16,14 +16,14 @@ function trace!(tr, state, iteration, method::NelderMead, options)
 end
 
 
-function trace!(tr, state, iteration, method::Union{ParticleSwarm, SimulatedAnnealing}, options)
+function trace!(tr, d, state, iteration, method::Union{ParticleSwarm, SimulatedAnnealing}, options)
     dt = Dict()
     if options.extended_trace
         dt["x"] = copy(state.x)
     end
     update!(tr,
             state.iteration,
-            state.f_x,
+            d.f_x,
             NaN,
             dt,
             options.store_trace,
@@ -32,18 +32,18 @@ function trace!(tr, state, iteration, method::Union{ParticleSwarm, SimulatedAnne
             options.callback)
 end
 
-function trace!(tr, state, iteration, method::BFGS, options)
+function trace!(tr, d, state, iteration, method::BFGS, options)
     dt = Dict()
     if options.extended_trace
         dt["x"] = copy(state.x)
-        dt["g(x)"] = copy(state.g)
+        dt["g(x)"] = copy(gradient(d))
         dt["~inv(H)"] = copy(state.invH)
         dt["Current step size"] = state.alpha
     end
-    g_norm = vecnorm(state.g, Inf)
+    g_norm = vecnorm(gradient(d), Inf)
     update!(tr,
     iteration,
-    state.f_x,
+    value(d),
     g_norm,
     dt,
     options.store_trace,
@@ -52,17 +52,17 @@ function trace!(tr, state, iteration, method::BFGS, options)
     options.callback)
 end
 
-function trace!(tr, state, iteration, method::Union{LBFGS, AcceleratedGradientDescent, GradientDescent, MomentumGradientDescent, ConjugateGradient}, options)
+function trace!(tr, d, state, iteration, method::Union{LBFGS, AcceleratedGradientDescent, GradientDescent, MomentumGradientDescent, ConjugateGradient}, options)
     dt = Dict()
     if options.extended_trace
         dt["x"] = copy(state.x)
-        dt["g(x)"] = copy(state.g)
+        dt["g(x)"] = copy(gradient(d))
         dt["Current step size"] = state.alpha
     end
-    g_norm = vecnorm(state.g, Inf)
+    g_norm = vecnorm(gradient(d), Inf)
     update!(tr,
             iteration,
-            state.f_x,
+            value(d),
             g_norm,
             dt,
             options.store_trace,
@@ -71,17 +71,17 @@ function trace!(tr, state, iteration, method::Union{LBFGS, AcceleratedGradientDe
             options.callback)
 end
 
-function trace!(tr, state, iteration, method::Newton, options)
+function trace!(tr, d, state, iteration, method::Newton, options)
     dt = Dict()
     if options.extended_trace
         dt["x"] = copy(state.x)
-        dt["g(x)"] = copy(state.g)
-        dt["h(x)"] = copy(state.H)
+        dt["g(x)"] = copy(gradient(d))
+        dt["h(x)"] = copy(hessian(d))
     end
-    g_norm = vecnorm(state.g, Inf)
+    g_norm = vecnorm(gradient(d), Inf)
     update!(tr,
             iteration,
-            state.f_x,
+            value(d),
             g_norm,
             dt,
             options.store_trace,
@@ -90,22 +90,22 @@ function trace!(tr, state, iteration, method::Newton, options)
             options.callback)
 end
 
-function trace!(tr, state, iteration, method::NewtonTrustRegion, options)
+function trace!(tr, d, state, iteration, method::NewtonTrustRegion, options)
     dt = Dict()
     if options.extended_trace
         dt["x"] = copy(state.x)
-        dt["g(x)"] = copy(state.g)
-        dt["h(x)"] = copy(state.H)
+        dt["g(x)"] = copy(gradient(d))
+        dt["h(x)"] = copy(hessian(H))
         dt["delta"] = copy(state.delta)
         dt["interior"] = state.interior
         dt["hard case"] = state.hard_case
         dt["reached_subproblem_solution"] = state.reached_subproblem_solution
         dt["lambda"] = state.lambda
     end
-    g_norm = norm(state.g, Inf)
+    g_norm = norm(gradient(d), Inf)
     update!(tr,
             iteration,
-            state.f_x,
+            value(d),
             g_norm,
             dt,
             options.store_trace,
diff --git a/test/accelerated_gradient_descent.jl b/test/accelerated_gradient_descent.jl
index fc2519ac7..16762c396 100644
--- a/test/accelerated_gradient_descent.jl
+++ b/test/accelerated_gradient_descent.jl
@@ -10,15 +10,8 @@
     results = Optim.optimize(f, g!, initial_x, AcceleratedGradientDescent(), options)
     @test norm(Optim.minimum(results)) < 1e-6
 
-    for (name, prob) in Optim.UnconstrainedProblems.examples
-        if prob.isdifferentiable
-            if !(name in ("Large Polynomial", "Parabola"))
-                results = Optim.optimize(prob.f, prob.g!, prob.initial_x, AcceleratedGradientDescent(), Optim.Options(allow_f_increases=true))
-                if !(name in ("Rosenbrock", "Polynomial", "Powell"))
-                    @test Optim.converged(results)
-                end
-                @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2
-            end
-        end
-    end
+    run_optim_tests(AcceleratedGradientDescent(); skip = ("Large Polynomial","Parabola"),
+                                                  convergence_exceptions = (("Rosenbrock", 1),("Rosenbrock", 2)),
+                                                  iteration_exceptions = (("Powell", 1100),
+                                                                          ("Polynomial", 1500)))
 end
diff --git a/test/api.jl b/test/api.jl
index 43a725d90..bfa4fd126 100644
--- a/test/api.jl
+++ b/test/api.jl
@@ -6,9 +6,9 @@
     h! = rosenbrock.h!
     initial_x = rosenbrock.initial_x
 
-    d1 = OnceDifferentiable(f)
-    d2 = OnceDifferentiable(f, g!)
-    d3 = TwiceDifferentiable(f, g!, h!)
+    d1 = OnceDifferentiable(f, initial_x)
+    d2 = OnceDifferentiable(f, g!, initial_x)
+    d3 = TwiceDifferentiable(f, g!, h!, initial_x)
 
     Optim.optimize(f, initial_x, BFGS())
     Optim.optimize(f, g!, initial_x, BFGS())
diff --git a/test/bfgs.jl b/test/bfgs.jl
index ed54523bb..c5abadd91 100644
--- a/test/bfgs.jl
+++ b/test/bfgs.jl
@@ -1,14 +1,3 @@
 @testset "BFGS" begin
-    for use_autodiff in (false, true)
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-            opt_allow_f_increases = name == "Polynomial" ? true : false
-            if prob.isdifferentiable
-                results = Optim.optimize(prob.f, prob.initial_x, BFGS(), Optim.Options(autodiff = use_autodiff, allow_f_increases = opt_allow_f_increases))
-                if name != "Polynomial" && use_autodiff == false
-                    @test Optim.converged(results)
-                end
-                @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2
-            end
-        end
-    end
+    run_optim_tests(BFGS(); convergence_exceptions = (("Polynomial",1),))
 end
diff --git a/test/callbacks.jl b/test/callbacks.jl
index 08ad8d3ec..a087fed30 100644
--- a/test/callbacks.jl
+++ b/test/callbacks.jl
@@ -5,8 +5,8 @@
     g! = problem.g!
     h! = problem.h!
     initial_x = problem.initial_x
-    d2 = OnceDifferentiable(f, g!)
-    d3 = TwiceDifferentiable(f, g!, h!)
+    d2 = OnceDifferentiable(f, g!, initial_x)
+    d3 = TwiceDifferentiable(f, g!, h!, initial_x)
 
     for method in (NelderMead(), SimulatedAnnealing())
         ot_run = false
diff --git a/test/cg.jl b/test/cg.jl
index 82739a0e0..9499de960 100644
--- a/test/cg.jl
+++ b/test/cg.jl
@@ -1,11 +1,7 @@
-@testset "Conjugate Gradient" begin
+ @testset "Conjugate Gradient" begin
 	# Test Optim.cg for all differentiable functions in Optim.UnconstrainedProblems.examples
-	for (name, prob) in Optim.UnconstrainedProblems.examples
-		if prob.isdifferentiable
-			res = Optim.optimize(prob.f, prob.g!, prob.initial_x, ConjugateGradient())
-			@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-		end
-	end
+	run_optim_tests(ConjugateGradient(),
+                    convergence_exceptions = (("Powell", 1), ("Powell", 2), ("Polynomial", 1)))
 
 	@testset "matrix input" begin
 		objective(X, B) = sum((X.-B).^2)/2
@@ -18,8 +14,7 @@
 
 		srand(1)
 		B = rand(2,2)
-		df = Optim.OnceDifferentiable(X -> objective(X, B), (X, G) -> objective_gradient!(X, G, B))
-		results = Optim.optimize(df, rand(2,2), ConjugateGradient())
+		results = Optim.optimize(X -> objective(X, B), (X, G) -> objective_gradient!(X, G, B), rand(2,2), ConjugateGradient())
 		@test Optim.converged(results)
 		@test Optim.minimum(results) < 1e-8
 	end
diff --git a/test/constrained.jl b/test/constrained.jl
index 7e6a65e2d..ba5ec9763 100644
--- a/test/constrained.jl
+++ b/test/constrained.jl
@@ -26,7 +26,7 @@
         initial_x = randn(N)
         tmp = similar(initial_x)
         func = (x, g) -> quadratic!(x, g, AtA, A'*b, tmp)
-        objective = Optim.OnceDifferentiable(x->func(x, nothing), (x,g)->func(x,g), func)
+        objective = Optim.OnceDifferentiable(x->func(x, nothing), (x,g)->func(x,g), func, initial_x)
         results = Optim.optimize(objective, initial_x, ConjugateGradient())
         results = Optim.optimize(objective, Optim.minimizer(results), ConjugateGradient())  # restart to ensure high-precision convergence
         @test Optim.converged(results)
diff --git a/test/gradient_descent.jl b/test/gradient_descent.jl
index 3660e9223..f6097edb8 100644
--- a/test/gradient_descent.jl
+++ b/test/gradient_descent.jl
@@ -1,26 +1,9 @@
 @testset "Gradient Descent" begin
-    for use_autodiff in (false, true)
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-            opt_allow_f_increases = name == "Hosaki" ? true : false
-            if prob.isdifferentiable
-                iterations = if name == "Rosenbrock"
-                        10000 # Zig-zagging
-                    elseif name == "Powell"
-                        80000 # Zig-zagging as the problem is (intentionally) ill-conditioned
-                    else
-                        1000
-                end
-                results = Optim.optimize(prob.f, prob.initial_x, GradientDescent(),
-                                     Optim.Options(autodiff = use_autodiff,
-                                                   iterations = iterations,
-                                                   allow_f_increases = opt_allow_f_increases))
-                if !(name in ("Rosenbrock", "Polynomial", "Powell"))
-                    @test Optim.converged(results)
-                end
-                @test norm(Optim.minimizer(results) - prob.solutions, Inf) < 1e-2
-            end
-        end
-    end
+    run_optim_tests(GradientDescent(),
+                    f_increase_exceptions = ("Hosaki",),
+                    convergence_exceptions = (("Polynomial", 1), ("Polynomial", 2), ("Rosenbrock", 1), ("Rosenbrock", 2)),
+                    iteration_exceptions = (("Rosenbrock", 10000),),
+                    skip = ("Powell",))
 
     function f_gd_1(x)
       (x[1] - 5.0)^2
@@ -32,7 +15,7 @@
 
     initial_x = [0.0]
 
-    d = OnceDifferentiable(f_gd_1, g_gd_1)
+    d = OnceDifferentiable(f_gd_1, g_gd_1, initial_x)
 
     results = Optim.optimize(d, initial_x, GradientDescent())
     @test_throws ErrorException Optim.x_trace(results)
@@ -50,7 +33,7 @@
       storage[2] = eta * x[2]
     end
 
-    d = OnceDifferentiable(f_gd_2, g_gd_2)
+    d = OnceDifferentiable(f_gd_2, g_gd_2, [1.0, 1.0])
 
     results = Optim.optimize(d, [1.0, 1.0], GradientDescent())
     @test_throws ErrorException Optim.x_trace(results)
diff --git a/test/l_bfgs.jl b/test/l_bfgs.jl
index 1540c6b4a..020edd433 100644
--- a/test/l_bfgs.jl
+++ b/test/l_bfgs.jl
@@ -1,13 +1,3 @@
 @testset "L-BFGS" begin
-    for use_autodiff in (false, true)
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-            if prob.isdifferentiable
-                results = Optim.optimize(prob.f, prob.initial_x, LBFGS(), Optim.Options(autodiff = use_autodiff))
-                if !(name in ("Rosenbrock", "Polynomial"))
-                    @test Optim.converged(results)
-                end
-                @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2
-            end
-        end
-    end
+    run_optim_tests(LBFGS(), convergence_exceptions = (("Rosenbrock", 1), ("Polynomial", 1)))
 end
diff --git a/test/levenberg_marquardt.jl b/test/levenberg_marquardt.jl
deleted file mode 100644
index 80443f537..000000000
--- a/test/levenberg_marquardt.jl
+++ /dev/null
@@ -1,97 +0,0 @@
-@testset "Levenberg Marquardt" begin
-    function f_lm(x)
-      [x[1], 2.0 - x[2]]
-    end
-    function g_lm(x)
-      [1.0 0.0; 0.0 -1.0]
-    end
-
-    initial_x = [100.0, 100.0]
-
-    results = Optim.levenberg_marquardt(f_lm, g_lm, initial_x)
-    @test norm(Optim.minimizer(results) - [0.0, 2.0]) < 0.01
-
-
-    function rosenbrock_res(x, r)
-        r[1] = 10.0 * (x[2] - x[1]^2 )
-        r[2] =  1.0 - x[1]
-        return r
-    end
-
-    function rosenbrock_jac(x, j)
-        j[1, 1] = -20.0 * x[1]
-        j[1, 2] =  10.0
-        j[2, 1] =  -1.0
-        j[2, 2] =   0.0
-        return j
-    end
-
-    r = zeros(2)
-    j = zeros(2,2)
-
-    frb(x) = rosenbrock_res(x, r)
-    grb(x) = rosenbrock_jac(x, j)
-
-    initial_xrb = [-1.2, 1.0]
-
-    results = Optim.levenberg_marquardt(frb, grb, initial_xrb)
-
-    @test norm(Optim.minimizer(results) - [1.0, 1.0]) < 0.01
-
-    # check estimate is within the bound PR #278
-     result = Optim.levenberg_marquardt(frb, grb, [150.0, 150.0]; lower = [10.0, 10.0], upper = [200.0, 200.0])
-     @test Optim.minimizer(result)[1] >= 10.0
-     @test Optim.minimizer(result)[2] >= 10.0
-
-
-
-
-    # tests for #178, taken from LsqFit.jl, but stripped
-    let
-        srand(12345)
-
-        model(x, p) = p[1]*map(exp,-x.*p[2])
-
-        xdata = linspace(0,10,20)
-        ydata = model(xdata, [1.0 2.0]) + 0.01*randn(length(xdata))
-
-        f_lsq = p -> p[1]*map(exp, -xdata.*p[2])-ydata
-        g_lsq = Calculus.jacobian(f_lsq)
-        results = Optim.levenberg_marquardt(f_lsq, g_lsq, [0.5, 0.5])
-
-        @test norm(Optim.minimizer(results) - [1.0, 2.0]) < 0.05
-    end
-
-    let
-        srand(12345)
-
-        model(x, p) = p[1]*map(exp, -x./p[2])+p[3]
-
-        xdata = 1:100
-        ydata = model(xdata, [10.0, 10.0, 10.0]) + 0.1*randn(length(xdata))
-
-        f_lsq = p -> model(xdata,p)-ydata
-        g_lsq = Calculus.jacobian(f_lsq)
-
-        # tests for box constraints, PR #196
-        @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], lower=[5.0, 11.0])
-        @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 5.0, 5.0], upper=[15.0, 9.0])
-        @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 10.0, 15.0], lower=[5.0, 11.0, 5.0])
-        @test_throws ArgumentError Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 10.0, 5.0], upper=[15.0, 9.0, 15.0])
-
-        lower=[5.0, 11.0, 5.0]
-        results = Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], lower=lower)
-        Optim.minimizer(results)
-        @test Optim.converged(results)
-        @test all(Optim.minimizer(results) .>= lower)
-
-        upper=[15.0, 9.0, 15.0]
-        results = Optim.levenberg_marquardt(f_lsq, g_lsq, [5.0, 5.0, 5.0], upper=upper)
-        Optim.minimizer(results)
-        @test Optim.converged(results)
-        @test all(Optim.minimizer(results) .<= upper)
-
-        # tests for PR #267
-        Optim.levenberg_marquardt(f_lsq, g_lsq, [15.0, 15.0, 15.0], show_trace=true)
-    end
-end
diff --git a/test/lsthrow.jl b/test/lsthrow.jl
index e02895037..2c9e710d7 100644
--- a/test/lsthrow.jl
+++ b/test/lsthrow.jl
@@ -8,8 +8,7 @@
     for optimizer in (ConjugateGradient, GradientDescent, LBFGS, BFGS, Newton, AcceleratedGradientDescent, MomentumGradientDescent)
         println("Testing $(string(optimizer))")
         prob = Optim.UnconstrainedProblems.examples["Exponential"]
-        optimize(prob.f, prob.initial_x, optimizer(linesearch = ls),
-                 Optim.Options(autodiff = true))
+        optimize(prob.f, prob.initial_x, optimizer(linesearch = ls))
         # TODO: How can I verify that the call to optimize gives a warning?
     end
 end
diff --git a/test/momentum_gradient_descent.jl b/test/momentum_gradient_descent.jl
index d7e15b9cb..e429b278c 100644
--- a/test/momentum_gradient_descent.jl
+++ b/test/momentum_gradient_descent.jl
@@ -1,13 +1,7 @@
 @testset "Momentum Gradient Descent" begin
-    for use_autodiff in (false, true)
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-            if prob.isdifferentiable && !(name in ("Polynomial", "Large Polynomial", "Himmelblau")) # it goes in a direction of ascent -> f_converged == true
-                iterations = name == "Powell" ? 2000 : 1000
-                res = Optim.optimize(prob.f, prob.initial_x, MomentumGradientDescent(),
-                                     Optim.Options(autodiff = use_autodiff,
-                                                         iterations = iterations))
-                @test norm(Optim.minimizer(res) - prob.solutions, Inf) < 1e-2
-            end
-        end
-    end
+    run_optim_tests(MomentumGradientDescent(),
+                    skip = ("Powell", "Rosenbrock"),
+                    convergence_exceptions = (("Large Polynomial",1),
+                     ("Himmelblau",1), ("Powell", 1)),
+                    minimizer_exceptions = (("Powell", 2),))
 end
diff --git a/test/nelder_mead.jl b/test/nelder_mead.jl
index 3ed74b1d2..f3e7aed03 100644
--- a/test/nelder_mead.jl
+++ b/test/nelder_mead.jl
@@ -1,15 +1,9 @@
 @testset "Nelder Mead" begin
 	# Test Optim.nelder_mead for all functions except Large Polynomials in Optim.UnconstrainedProblems.examples
-	for (name, prob) in Optim.UnconstrainedProblems.examples
-		res = Optim.optimize(prob.f, prob.initial_x, NelderMead(), Optim.Options(iterations = 10000))
-		if name == "Powell"
-			res = Optim.optimize(prob.f, prob.initial_x, NelderMead(), Optim.Options(g_tol = 1e-12))
-		elseif name == "Large Polynomial"
-			# TODO do this only when a "run all" flag checked
-			# res = Optim.optimize(prob.f, prob.initial_x, method=NelderMead(initial_simplex = Optim.AffineSimplexer(1.,1.)), iterations = 450_000)
-		end
-		!(name == "Large Polynomial") && @test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-	end
+
+	run_optim_tests(NelderMead(),
+					convergence_exceptions = (("Powell", 1),),
+					skip =  (("Large Polynomial"),))
 
     # Test if the trace is correctly stored.
     prob = Optim.UnconstrainedProblems.examples["Rosenbrock"]
diff --git a/test/newton.jl b/test/newton.jl
index 32fb25f69..7dd3051fe 100644
--- a/test/newton.jl
+++ b/test/newton.jl
@@ -10,12 +10,12 @@
     function h!_1(x::Vector, storage::Matrix)
         storage[1, 1] = 12.0 * (x[1] - 5.0)^2
     end
+    initial_x = [0.0]
 
-    # Need to specify autodiff!
-    @test_throws ErrorException Optim.optimize(OnceDifferentiable(f_1, g!_1), [0.0], Newton())
-    Optim.optimize(OnceDifferentiable(f_1, g!_1), [0.0], Newton(), Optim.Options(autodiff = true))
+    Optim.optimize(OnceDifferentiable(f_1, g!_1, initial_x), [0.0], Newton())
 
     results = Optim.optimize(f_1, g!_1, h!_1, [0.0], Newton())
+
     @test_throws ErrorException Optim.x_trace(results)
     @test Optim.g_converged(results)
     @test norm(Optim.minimizer(results) - [5.0]) < 0.01
@@ -43,30 +43,13 @@
     @test Optim.g_converged(results)
     @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01
 
-    # Test Optim.newton for all twice differentiable functions in Optim.UnconstrainedProblems.examples
-    @testset "Optim problems" begin
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-        	if prob.istwicedifferentiable
-        		res = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, Newton())
-        		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-        	end
-        end
-    end
-
     @testset "newton in concave region" begin
         prob=Optim.UnconstrainedProblems.examples["Himmelblau"]
         res = optimize(prob.f, prob.g!, prob.h!, [0., 0.], Newton())
         @test norm(Optim.minimizer(res) - prob.solutions) < 1e-9
     end
 
-    @testset "Optim problems (ForwardDiff)" begin
-        for (name, prob) in Optim.UnconstrainedProblems.examples
-        	if prob.istwicedifferentiable
-        		res = Optim.optimize(prob.f, prob.initial_x, Newton(), Optim.Options(autodiff = true))
-        		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-                res = Optim.optimize(prob.f, prob.g!, prob.initial_x, Newton(), Optim.Options(autodiff = true))
-        		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-        	end
-        end
+    @testset "Optim problems" begin
+        run_optim_tests(Newton())
     end
 end
diff --git a/test/newton_trust_region.jl b/test/newton_trust_region.jl
index a01c2b598..1c389f9fb 100644
--- a/test/newton_trust_region.jl
+++ b/test/newton_trust_region.jl
@@ -148,7 +148,7 @@ end
         storage[1, 1] = 12.0 * (x[1] - 5.0)^2
     end
 
-    d = TwiceDifferentiable(f, g!, h!)
+    d = TwiceDifferentiable(f, g!, h!, [0.0])
 
     results = Optim.optimize(d, [0.0], NewtonTrustRegion())
     @test length(results.trace) == 0
@@ -173,7 +173,7 @@ end
         storage[2, 2] = eta
     end
 
-    d = TwiceDifferentiable(f_2, g!_2, h!_2)
+    d = TwiceDifferentiable(f_2, g!_2, h!_2, Float64[127, 921])
 
     results = Optim.optimize(d, Float64[127, 921], NewtonTrustRegion())
     @test results.g_converged
@@ -181,16 +181,8 @@ end
 
     # Test Optim.newton for all twice differentiable functions in
     # Optim.UnconstrainedProblems.examples
-    for (name, prob) in Optim.UnconstrainedProblems.examples
-    	if prob.istwicedifferentiable
-    		ddf = OnceDifferentiable(prob.f, prob.g!)
-    		res = Optim.optimize(ddf, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true))
-    		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-    		res = Optim.optimize(ddf.f, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true))
-    		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-            res = Optim.optimize(ddf.f, ddf.g!, prob.initial_x, NewtonTrustRegion(), Optim.Options(autodiff = true))
-    		@test norm(Optim.minimizer(res) - prob.solutions) < 1e-2
-    	end
+    @testset "Optim problems" begin
+        run_optim_tests(NewtonTrustRegion())
     end
 end
 
diff --git a/test/optimize.jl b/test/optimize.jl
index 522cfcd5a..06d2b6215 100644
--- a/test/optimize.jl
+++ b/test/optimize.jl
@@ -29,7 +29,7 @@
     @test Optim.g_converged(results)
     @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01
 
-    results = optimize(f1, [127.0, 921.0], Optim.Options(autodiff = true))
+    results = optimize(f1, [127.0, 921.0])
     @test Optim.g_converged(results)
     @test norm(Optim.minimizer(results) - [0.0, 0.0]) < 0.01
 
diff --git a/test/runtests.jl b/test/runtests.jl
index b901d33a3..cf011986d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -31,11 +31,49 @@ my_tests = [
     "precon.jl",
     "initial_convergence.jl",
     "extrapolate.jl",
-    "levenberg_marquardt.jl",
     "lsthrow.jl",
     "api.jl",
 ]
 
+differentiability_condition(method, prob) = true
+differentiability_condition(method::Optim.FirstOrderSolver, prob) = prob.isdifferentiable
+differentiability_condition(method::Optim.SecondOrderSolver, prob) = prob.istwicedifferentiable
+
+input_tuple(method, prob) = ((prob.f,),)
+input_tuple(method::Optim.FirstOrderSolver, prob) = ((prob.f,), (prob.f, prob.g!))
+input_tuple(method::Optim.SecondOrderSolver, prob) = ((prob.f,), (prob.f, prob.g!), (prob.f, prob.g!, prob.h!))
+
+function run_optim_tests(method; convergence_exceptions = (),
+                                 minimizer_exceptions = (),
+                                 f_increase_exceptions = (),
+                                 iteration_exceptions = (),
+                                 skip = (),
+                                 show_name = false)
+    # Loop over unconstrained problems
+    for (name, prob) in Optim.UnconstrainedProblems.examples
+        show_name && print_with_color(:green, "Problem: ", name, "\n")
+        # Look for name in the first elements of the iteration_exceptions tuples
+        iter_id = find(n[1] == name for n in iteration_exceptions)
+        # If name wasn't found, use default 1000 iterations, else use provided number
+        iters = length(iter_id) == 0 ? 1000 : iteration_exceptions[iter_id[1]][2]
+        # Construct options
+        options = Optim.Options(allow_f_increases = name in f_increase_exceptions, iterations = iters)
+        # Check if once or twice differentiable
+        if differentiability_condition(method, prob) && !(name in skip)
+            # Loop over appropriate input combinations of f, g!, and h!
+            for (i, input) in enumerate(input_tuple(method, prob))
+                results = Optim.optimize(input..., prob.initial_x, method, options)
+                if !((name, i) in convergence_exceptions)
+                    @test Optim.converged(results)
+                end
+                if !((name, i) in minimizer_exceptions)
+                    @test norm(Optim.minimizer(results) - prob.solutions) < 1e-2
+                end
+            end
+        end
+    end
+end
+
 for my_test in my_tests
     @time include(my_test)
 end
diff --git a/test/types.jl b/test/types.jl
index 25c723944..c768e7fa8 100644
--- a/test/types.jl
+++ b/test/types.jl
@@ -30,14 +30,14 @@ import Compat.String
         if res.method == "Nelder-Mead"
             @test startswith(lines[8], "   *  √(Σ(yᵢ-ȳ)²)/n < 1.0e-08: ")
             @test startswith(lines[9], "   * Reached Maximum Number of Iterations: ")
-            @test startswith(lines[10], " * Objective Function Calls: ")
+            @test startswith(lines[10], " * Objective Calls: ")
         elseif res.method == "Simulated Annealing"
             @test startswith(lines[8], "   * |x - x'| < ")
             @test startswith(lines[9], "   * |f(x) - f(x')| / |f(x)| < ")
             @test startswith(lines[10], "   * |g(x)| < ")
             @test startswith(lines[11], "   * f(x) > f(x')")
             @test startswith(lines[12], "   * Reached Maximum Number of Iterations: ")
-            @test startswith(lines[13], " * Objective Function Calls: ")
+            @test startswith(lines[13], " * Objective Calls: ")
         end
     end
 
@@ -60,6 +60,9 @@ import Compat.String
     @test startswith(lines[10], "   * |g(x)| < ")
     @test startswith(lines[11], "   * f(x) > f(x')")
     @test startswith(lines[12], "   * Reached Maximum Number of Iterations: ")
-    @test startswith(lines[13], " * Objective Function Calls: ")
+    @test startswith(lines[13], " * Objective Calls: ")
     @test startswith(lines[14], " * Gradient Calls: ")
+    if res.method in ("Newton's Method", "Newton's Method (Trust Region)")
+        @test startswith(lines[15], " * Hessian Calls: ")
+    end
 end