diff --git a/src/globalization/trs_solvers/TRS.jl b/src/globalization/trs_solvers/TRS.jl index c9117ec..03d5368 100644 --- a/src/globalization/trs_solvers/TRS.jl +++ b/src/globalization/trs_solvers/TRS.jl @@ -7,7 +7,7 @@ function (ms::TRSolver)(∇f, H, Δ, p) x, info = trs(H, ∇f, Δ) p .= x[:, 1] - m = dot(∇f, p) + dot(p, H * p) / 2 + m = dot(∇f, p) + dot(p, H, p) / 2 interior = norm(p, 2) ≤ Δ return ( p = p, diff --git a/src/globalization/trs_solvers/root.jl b/src/globalization/trs_solvers/root.jl index 9a0e32f..ab3c365 100644 --- a/src/globalization/trs_solvers/root.jl +++ b/src/globalization/trs_solvers/root.jl @@ -4,6 +4,16 @@ abstract type TRSPSolver end abstract type NearlyExactTRSP <: TRSPSolver end +trs_supports_outofplace(trs) = false + +function trs_outofplace_check(trs,prob) + if !trs_supports_outofplace(trs) + throw( + ErrorException("solve() not defined for OutOfPlace() with $(typeof(trs).name.wrapper) for $(typeof(prob).name.wrapper)"), + ) + end +end + include("solvers/NWI.jl") include("solvers/Dogleg.jl") include("solvers/NTR.jl") @@ -11,7 +21,7 @@ include("solvers/TCG.jl") #include("subproblemsolvers/TRS.jl") just make an example instead of relying onTRS.jl function tr_return(; λ, ∇f, H, s, interior, solved, hard_case, Δ, m = nothing) - m = m isa Nothing ? dot(∇f, s) + dot(s, H * s) / 2 : m + m = m isa Nothing ? dot(∇f, s) + dot(s, H, s) / 2 : m ( p = s, mz = m, @@ -23,13 +33,35 @@ function tr_return(; λ, ∇f, H, s, interior, solved, hard_case, Δ, m = nothin ) end -function update_H!(H, h, λ = nothing) +update_H!(mstyle::OutOfPlace,H, h, λ) = _update_H(H, h, λ) +update_H!(mstyle::OutOfPlace,H, h) = _update_H(H, h, nothing) +update_H!(mstyle::InPlace,H, h, λ) = _update_H!(H, h, λ) +update_H!(mstyle::InPlace,H, h) = _update_H!(H, h, nothing) + +function _update_H!(H, h, λ) T = eltype(h) n = length(h) - if !(λ == T(0)) + if λ == nothing for i = 1:n - @inbounds H[i, i] = λ isa Nothing ? h[i] : h[i] + λ + @inbounds H[i, i] = h[i] + end + elseif !(λ == T(0)) + for i = 1:n + @inbounds H[i, i] = h[i] + λ end end H end + +function _update_H(H, h, λ = nothing) + T = eltype(h) + if λ == nothing + Hd = Diagonal(h) + return H + Hd + elseif !(λ == T(0)) + Hd = Diagonal(h) + return H + Hd + λ*I + else + return H + end +end \ No newline at end of file diff --git a/src/globalization/trs_solvers/solvers/Dogleg.jl b/src/globalization/trs_solvers/solvers/Dogleg.jl index d61e916..0c834f7 100644 --- a/src/globalization/trs_solvers/solvers/Dogleg.jl +++ b/src/globalization/trs_solvers/solvers/Dogleg.jl @@ -20,6 +20,8 @@ struct Dogleg{T} <: TRSPSolver end Dogleg() = Dogleg(nothing) +trs_supports_outofplace(trs::Dogleg) = true + function (dogleg::Dogleg)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) T = eltype(p) n = length(∇f) @@ -80,7 +82,7 @@ function (dogleg::Dogleg)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxite interior = false end end - m = dot(∇f, p) + dot(p, H * p) / 2 + m = dot(∇f, p) + dot(p, H, p) / 2 return ( p = p, diff --git a/src/globalization/trs_solvers/solvers/NTR.jl b/src/globalization/trs_solvers/solvers/NTR.jl index 9dc42cc..5e6a200 100644 --- a/src/globalization/trs_solvers/solvers/NTR.jl +++ b/src/globalization/trs_solvers/solvers/NTR.jl @@ -64,16 +64,21 @@ function (ms::NTR)( n = length(∇f) h = H isa UniformScaling ? copy(∇f) .* 0 .+ 1 : diag(H) H = H isa UniformScaling ? Diagonal(copy(∇f) .* 0 .+ 1) : H - + inplace = mstyle == InPlace() # Check for interior convergence if λ == T(0) F = cholesky(Symmetric(H); check = false) - s .= -∇f - s .= F \ s + if inplace + s .= -∇f + s .= F \ s + else + s = -∇f + s = F \ s + end s₂ = norm(s, 2) if issuccess(F) && s₂ < Δ - H = update_H!(H, h) + H = update_H!(mstyle, H, h) return tr_return(; λ = λ, ∇f = ∇f, @@ -94,7 +99,7 @@ function (ms::NTR)( λL, λU = isg.L, isg.U for iter = 1:maxiter - H = update_H!(H, h, λ) + H = update_H!(mstyle, H, h, λ) F = cholesky(Symmetric(H); check = false) in𝓖, linpack = false, false #=========================================================================== @@ -109,13 +114,17 @@ function (ms::NTR)( # Algorithm 7.3.1 on p. 185 in [ConnGouldTointBook] # Step 1 was factorizing # Step 2 - s .= -∇f - s .= F \ s - + if inplace + s .= -∇f + s .= F \ s + else + s = -∇f + s = F \ s + end # Check if step is approximately equal to the radius s₂ = norm(s, 2) if s₂ ≈ Δ - H = update_H!(H, h) + H = update_H!(mstyle, H, h) return tr_return(; λ = λ, ∇f = ∇f, @@ -145,15 +154,18 @@ function (ms::NTR)( if in𝓖 linpack = true w, u = λL_with_linpack(F) - λL = max(λL, λ - dot(u, H * u)) + λL = max(λL, λ - dot(u, H, u)) α, s_g, m_g = 𝓖_root(u, s, Δ, ∇f, H) - s .= s_g - + if inplace + s .= s_g + else + s = s_g + end s₂ = norm(s) # check hard case convergnce - if α^2 * dot(u, H * u) ≤ κhard * (dot(s, H * s) + λ * Δ^2) - H = update_H!(H, h) + if α^2 * dot(u, H, u) ≤ κhard * (dot(s, H, s) + λ * Δ^2) + H = update_H!(mstyle, H, h) return tr_return(; λ = λ, ∇f = ∇f, @@ -167,12 +179,13 @@ function (ms::NTR)( ) end # If not the hard case solution, try to factorize H(λ⁺) - H = update_H!(H, h, λ⁺) + H = update_H!(mstyle, H, h, λ⁺) F = cholesky(H; check = false) if issuccess(F) # Then we're in L, great! lemma 7.3.2 λ = λ⁺ else # we landed in N, this is bad, so use bounds to approach L - λ = max(sqrt(λL * λU), λL + θ * (λU - λL)) + λLλU = abs(λL * λU) + λ = max(sqrt(λLλU), λL + θ * (λU - λL)) end else # in L, we can safely step λ = λ⁺ @@ -180,7 +193,7 @@ function (ms::NTR)( # check for convergence if in𝓖 && abs(s₂ - Δ) ≤ κeasy * Δ - H = update_H!(H, h) + H = update_H!(mstyle, H, h) return tr_return(; λ = λ, ∇f = ∇f, @@ -194,9 +207,13 @@ function (ms::NTR)( elseif abs(s₂ - Δ) ≤ κeasy * Δ # implicitly "if in 𝓕" since we're in that branch # u and α comes from linpack if linpack - if α^2 * dot(u, H * u) ≤ κhard * (dot(sλ, H * sλ) * Δ^2) - s .= s .+ α * u - H = update_H!(H, h) + if α^2 * dot(u, H, u) ≤ κhard * (dot(sλ, H, sλ) * Δ^2) + if inplace + s .= s .+ α * u + else + s = s + α * u + end + H = update_H!(mstyle, H, h) return tr_return(; λ = λ, ∇f = ∇f, @@ -216,10 +233,10 @@ function (ms::NTR)( # lower bound, we cannot apply the Newton step here. δ, v = λL_in_𝓝(H, F) λL = max(λL, λ + δ / dot(v, v)) # update lower bound - λ = max(sqrt(λL * λU), λL + θ * (λU - λL)) # no converence possible, so step in bracket + λ = max(sqrt(λLλU), λL + θ * (λU - λL)) # no convergence possible, so step in bracket end end - H = update_H!(H, h) + H = update_H!(mstyle, H, h) tr_return(; λ = λ, ∇f = ∇f, @@ -272,9 +289,9 @@ function 𝓖_root(u, s, Δ, ∇f, H) α₂ = (-pb - pd) / 2pa s₁ = s + α₁ * u - m₁ = dot(∇f, s₁) + dot(s₁, H * s₁) / 2 + m₁ = dot(∇f, s₁) + dot(s₁, H, s₁) / 2 s₂ = s + α₂ * u - m₂ = dot(∇f, s₂) + dot(s₂, H * s₂) / 2 + m₂ = dot(∇f, s₂) + dot(s₂, H, s₂) / 2 α, s, m = m₁ ≤ m₂ ? (α₁, s₁, m₁) : (α₂, s₂, m₂) α, s, m end diff --git a/src/globalization/trs_solvers/solvers/NWI.jl b/src/globalization/trs_solvers/solvers/NWI.jl index c7ec478..69e3ff8 100644 --- a/src/globalization/trs_solvers/solvers/NWI.jl +++ b/src/globalization/trs_solvers/solvers/NWI.jl @@ -25,7 +25,7 @@ struct NWI{T} <: NearlyExactTRSP end NWI() = NWI(eigen) summary(::NWI) = "Trust Region (Newton, eigen)" - +trs_supports_outofplace(trs::NWI) = true """ initial_safeguards(B, h, g, Δ) @@ -112,19 +112,27 @@ function is_maybe_hard_case(QΛQ, Qt∇f::AbstractVector{T}) where {T} end # Equation 4.38 in N&W (2006) -calc_p!(p, Qt∇f, QΛQ, λ) = calc_p!(p, Qt∇f, QΛQ, λ, 1) +calc_p!(mstyle::MutateStyle, p, Qt∇f, QΛQ, λ) = calc_p!(mstyle, p, Qt∇f, QΛQ, λ, 1) # Equation 4.45 in N&W (2006) since we allow for first_j > 1 -function calc_p!(p, Qt∇f, QΛQ, λ::T, first_j) where {T} +function calc_p!(mstyle::MutateStyle, p, Qt∇f, QΛQ, λ::T, first_j) where {T} + inplace = mstyle === InPlace() # Reset search direction to 0 - fill!(p, T(0)) - + if inplace + fill!(p, T(0)) + else + p = T(0) .* p + end # Unpack eigenvalues and eigenvectors Λ = QΛQ.values Q = QΛQ.vectors for j = first_j:length(Λ) κ = Qt∇f[j] / (Λ[j] + λ) - @. p = p - κ * Q[:, j] + if inplace + @. p = p - κ * Q[:, j] + else + p = p - κ * Q[:, j] + end end p end @@ -153,7 +161,7 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) n = length(∇f) H = H isa UniformScaling ? Diagonal(copy(∇f) .* 0 .+ 1) : H h = diag(H) - + inplace = mstyle == InPlace() # Note that currently the eigenvalues are only sorted if H is perfectly # symmetric. (Julia issue #17093) if H isa Diagonal @@ -176,14 +184,14 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) # positive, so the Newton step, pN, is fine unless norm(pN, 2) > Δ. if λmin >= sqrt(eps(T)) λ = T(0) # no amount of I is added yet - p = calc_p!(p, Qt∇f, QΛQ, λ) # calculate the Newton step + p = calc_p!(mstyle, p, Qt∇f, QΛQ, λ) # calculate the Newton step if norm(p, 2) ≤ Δ # No shrinkage is necessary: -(H \ ∇f) is the minimizer interior = true solved = true hard_case = false - m = dot(∇f, p) + dot(p, H * p) / 2 + m = dot(∇f, p) + dot(p, H, p) / 2 return ( p = p, @@ -218,7 +226,7 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) # The old p is discarded, and replaced with one that takes into account # the first j such that λj ≠ λmin. Formula 4.45 in N&W (2006) - pλ = calc_p!(p, Qt∇f, QΛQ, λ, first_j) + pλ = calc_p!(mstyle, p, Qt∇f, QΛQ, λ, first_j) # Check if the choice of λ leads to a solution inside the trust region. # If it does, then we construct the "hard case solution". @@ -228,9 +236,12 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) tau = sqrt(Δ^2 - norm(pλ, 2)^2) - @. p = -pλ + tau * Q[:, 1] - - m = dot(∇f, p) + dot(p, H * p) / 2 + if inplace + @. p = -pλ + tau * Q[:, 1] + else + p = -pλ + tau * Q[:, 1] + end + m = dot(∇f, p) + dot(p, H, p) / 2 return ( p = p, @@ -257,7 +268,7 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) λ = safeguard_λ(λ, isg) for iter = 1:maxiter λ_previous = λ - H = update_H!(H, h, λ) + H = update_H!(mstyle, H, h, λ) F = H isa Diagonal ? cholesky(H; check = false) : @@ -271,7 +282,11 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) continue end R = F.U - p .= R \ (R' \ -∇f) + if inplace + p .= R \ (R' \ -∇f) + else + p = R \ (R' \ -∇f) + end q_l = R' \ p p_norm = norm(p, 2) @@ -289,8 +304,8 @@ function (ms::NWI)(∇f, H, Δ, p, scheme, mstyle; abstol = 1e-10, maxiter = 50) end end - H = update_H!(H, h) - m = dot(∇f, p) + dot(p, H * p) / 2 + H = update_H!(mstyle, H, h) + m = dot(∇f, p) + dot(p, H, p) / 2 return ( p = p, mz = m, diff --git a/src/globalization/trs_solvers/solvers/TCG.jl b/src/globalization/trs_solvers/solvers/TCG.jl index 94d1405..d7a84ef 100644 --- a/src/globalization/trs_solvers/solvers/TCG.jl +++ b/src/globalization/trs_solvers/solvers/TCG.jl @@ -15,6 +15,7 @@ """ struct TCG <: TRSPSolver end summary(::TCG) = "Steihaug-Toint Truncated CG" +trs_supports_outofplace(trs::TCG) = true function (ms::TCG)( ∇f, @@ -30,8 +31,13 @@ function (ms::TCG)( κhard = T(2) / 10, ) where {T} M = I + inplace = mstyle == InPlace() # We only know that TCG has its properties if we start with s = 0 - s .= T(0) + if inplace + s .= T(0) + else + s = s * T(0) + end # g is the gradient of the quadratic model with the step α*p if all(iszero, ∇f) return tr_return(; @@ -50,17 +56,21 @@ function (ms::TCG)( p = -v for i = 1:maxiter # Check for negative curvature - κ = dot(p, H * p) + κ = dot(p, H, p) - c = dot(s, M * s) - b′ = dot(s, M * p) # b/2 - a = dot(p, M * p) + c = dot(s, M, s) + b′ = dot(s, M, p) # b/2 + a = dot(p, M, p) if κ <= 0 # This branch just catches that sigma can be nan if a is exactly 0. if !iszero(a) σ = (-b′ + sqrt(b′^2 + a * (Δ^2 - c))) / a - @. s = s + σ * p + if inplace + @. s = s + σ * p + else + s = s + σ * p + end end return tr_return(; λ = NaN, @@ -78,7 +88,11 @@ function (ms::TCG)( if a * α^2 + α * 2 * b′ + c >= Δ^2 σ = (-b′ + sqrt(b′^2 + a * (Δ^2 - c))) / a - @. s = s + σ * p + if inplace + @. s = s + σ * p + else + s = s + σ * p + end return tr_return(; λ = NaN, ∇f = ∇f, @@ -90,12 +104,24 @@ function (ms::TCG)( Δ = Δ, ) end - @. s = s + α * p + if inplace + @. s = s + α * p + else + s = s + α * p + end den = dot(g, v) - g .= g .+ α .* (H * p) - v .= M \ g - β = dot(g, v) / den - @. p = -v + β * p + if inplace + g .= g .+ α .* (H * p) + v .= M \ g + β = dot(g, v) / den + @. p = -v + β * p + else + den = dot(g, v) + g = g + α * (H * p) + v = M \ g + β = dot(g, v) / den + p = -v + β * p + end end tr_return(; λ = NaN, diff --git a/src/nlsolve/trustregions/newton.jl b/src/nlsolve/trustregions/newton.jl index af25da8..462746f 100644 --- a/src/nlsolve/trustregions/newton.jl +++ b/src/nlsolve/trustregions/newton.jl @@ -47,14 +47,9 @@ function solve( x, approach::TrustRegion{<:Union{SR1,DBFGS,BFGS,Newton},<:Any,<:Any}, options::NEqOptions, -) - if !(mstyle(prob) === InPlace()) && !(approach.spsolve isa Dogleg) - throw( - ErrorException( - "solve() not defined for OutOfPlace() with Trustregion for NEqProblem", - ), - ) - end +) + + trs_outofplace_check(approach.spsolve,prob) F = prob.R # should we wrap a Fx here so we can log F0 info here? # and so we can extract it at the end as well? diff --git a/src/optimize/trustregions/optimize/inplace_loop.jl b/src/optimize/trustregions/optimize/inplace_loop.jl index f1b7eb8..7dfa901 100644 --- a/src/optimize/trustregions/optimize/inplace_loop.jl +++ b/src/optimize/trustregions/optimize/inplace_loop.jl @@ -16,11 +16,8 @@ function solve( objvars; initial_Δ = 20.0, ) - if !(mstyle(problem) === InPlace()) && !(approach.spsolve == Dogleg()) - throw( - ErrorException("solve() not defined for OutOfPlace() with TrustRegion solvers"), - ) - end + + trs_outofplace_check(approach.spsolve,problem) t0 = time() T = eltype(objvars.z) Δmin = cbrt(eps(T)) diff --git a/src/quasinewton/update_qn.jl b/src/quasinewton/update_qn.jl index 3129ba4..7116992 100644 --- a/src/quasinewton/update_qn.jl +++ b/src/quasinewton/update_qn.jl @@ -8,14 +8,14 @@ function update_obj!(problem, s, y, ∇fx, z, ∇fz, B, scheme, scale = nothing) # Update B if scale == nothing if isa(scheme.approx, Direct) - yBy = dot(y, B * y) + yBy = dot(y, B, y) if !iszero(yBy) Badj = dot(y, s) / yBy .* B end else ys = dot(y, s) if !iszero(ys) - Badj = dot(y, B * y) / ys .* B + Badj = dot(y, B, y) / ys .* B else return fz, ∇fz, B, s, y end @@ -45,7 +45,7 @@ function update_obj(problem, s, ∇fx, z, ∇fz, B, scheme, scale = nothing) # Update B if scale == nothing if isa(scheme.approx, Direct) - yBy = dot(y, B * y) + yBy = dot(y, B, y) if !iszero(yBy) && isfinite(yBy) Badj = dot(y, s) / yBy * B # this is different than above? else @@ -54,7 +54,7 @@ function update_obj(problem, s, ∇fx, z, ∇fz, B, scheme, scale = nothing) else ys = dot(y, s) if !iszero(ys) && isfinite(ys) - Badj = dot(y, B * y) / ys * B + Badj = dot(y, B, y) / ys * B else Badj = B end