Skip to content

Commit

Permalink
Documented grad_test() and updated version number
Browse files Browse the repository at this point in the history
  • Loading branch information
gbruer15 committed Apr 24, 2023
1 parent 9ffcb44 commit f01bead
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JutulDarcyRules"
uuid = "41f0c4f5-9bdd-4ef1-8c3a-d454dff2d562"
authors = ["Ziyi Yin <ziyi.yin@gatech.edu>"]
version = "0.2.4"
version = "0.2.5"

[deps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand Down
83 changes: 60 additions & 23 deletions test/grad_test.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
using Test
using Printf
using LinearAlgebra
mean(x) = sum(x)/length(x)

function log_division(a, b)
Expand All @@ -10,14 +7,54 @@ function log_division(a, b)
return log(a / b)
end

function grad_test(f, x0, dx, dJdx; dJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
if !xor(isnothing(dJdx), isnothing(dJ))
error("Must specify either dJdx or dJ")
"""
grad_test(J, x0, Δx, dJdx; ΔJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
Test the gradient using Taylor series convergence.
Compute a series of residuals using zeroth order and first order Taylor approximations.
Each perturbed computation J(x₀ + hᵢ Δx) is compared to J(x₀) and J(x₀) + hᵢ ΔJ,
where hᵢ = hfactor * hᵢ₋₁ and ΔJ = dJdx ⋅ Δx by default. If the computation of J and
dJdx is correct, J is sufficiently smooth, and h is sufficiently small, the zeroth
order approximation should converge linearly and the first order approximation should
converge quadratically.
It can be difficult to obtain the correct convergence, so we average the
convergence factors across maxiter values of h and test that the convergence
factor is more than correct convergence factor minus the stol parameter.
# Mathematical basis
For J sufficiently smooth, the value of a point at a small perturbation from x₀ can be
computed using the Taylor series expansion.
```math
J(x₀ + h Δx) = J(x₀) + h (dJ/dx) Δx + h² (d²J/dx²) : (Δx Δxᵀ) + O(h³)
```
If d²J/dx² is non-zero and h is small enough, the value of the first-order Taylor
approximation differs from the true value by approximately a constant proportional to h².
```math
err(h) = |J(x₀ + h Δx) - J(x₀) - h (dJ/dx) Δx| ≈ h²c
```
If we consider the error for two different values of h, the unknown constant can be eliminated.
```math
err(h₁) / err(h₂) ≈ (h₁ / h₂)^2
```
So if h₁ is divided by a factor α, then the ratio of the errors should be divided by a factor α².
Or we can compute the exponent for the rate of convergence using logarithms.
```math
log(err(h₁) / err(h₂)) / log (h₁ / h₂) ≈ 2
```
"""
function grad_test(J, x0, Δx, dJdx; ΔJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
if !xor(isnothing(dJdx), isnothing(ΔJ))
error("Must specify either dJdx or ΔJ")
end
if isnothing(dJ)
dJ = dot(dJdx, dx)
if isnothing(ΔJ)
ΔJ = dot(dJdx, Δx)
end
J0 = f(x0)
J0 = J(x0)
h = h0

log_factor = log(hfactor)
Expand All @@ -32,40 +69,40 @@ function grad_test(f, x0, dx, dJdx; dJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, h
@printf("%11s, %12s | %11s, %11s | %11s, %11s | % 12s, % 12s \n", "___________", "___________", "___________", "___________", "___________", "___________", "____________", "____________")
all_info = []
for j=1:maxiter
J = f(x0 + h*dx)
Js[j] = J
err1[j] = norm(J - J0, 1)
err2[j] = norm(J - J0 - h*dJ, 1)
Jh = J(x0 + h*Δx)
Js[j] = Jh
err1[j] = norm(Jh - J0, 1)
err2[j] = norm(Jh - J0 - h*ΔJ, 1)
j == 1 ? prev = 1 : prev = j - 1

dJ_est = (J - J0) / h
dJ_est = (Jh - J0) / h
α = expected_f2
dJ_est1 =*Js[prev] - J + (1-α)*J0) / (h */hfactor - 1))
dJ_est1 =*Js[prev] - Jh + (1-α)*J0) / (h */hfactor - 1))
α = -expected_f2
dJ_est2 =*Js[prev] - J + (1-α)*J0) / (h */hfactor - 1))
dJ_est2 =*Js[prev] - Jh + (1-α)*J0) / (h */hfactor - 1))

rate1 = log_division(err1[j], err1[prev]) / log_factor
rate2 = log_division(err2[j], err2[prev]) / log_factor

info = (h, h*norm(dJ, 1), err1[j], err2[j], err1[prev]/err1[j], err2[prev]/err2[j], rate1, rate2, dJ_est, dJ_est1, dJ_est2)
info = (h, h*norm(ΔJ, 1), err1[j], err2[j], err1[prev]/err1[j], err2[prev]/err2[j], rate1, rate2, dJ_est, dJ_est1, dJ_est2)
push!(all_info, info)
@printf("%11.5e, % 12.5e | %11.5e, %11.5e | %11.5e, %11.5e | % 12.5e, % 12.5e | % 12.5e, % 12.5e, % 12.5e \n", info...)
h = h * hfactor
end

println()
@printf("%11s, %12s | %11s, %11s | %11s, %11s | %12s, %12s | %12s %12s %12s \n", "h", "dJ", "e1", "e2", "factor1", "factor2", "rate1", "rate2", "Finite-diff", "T1 approx", "T2 approx")
@printf("%11s, % 12.5e | %11s, %11s | %11.5e, %11.5e | % 12.5e, % 12.5e | \n", "", dJ, "0", "0", expected_f1, expected_f2, 1, 2)
@printf("%11s, %12s | %11s, %11s | %11s, %11s | %12s, %12s | %12s %12s %12s \n", "h", "ΔJ", "e1", "e2", "factor1", "factor2", "rate1", "rate2", "Finite-diff", "T1 approx", "T2 approx")
@printf("%11s, % 12.5e | %11s, %11s | %11.5e, %11.5e | % 12.5e, % 12.5e | \n", "", ΔJ, "0", "0", expected_f1, expected_f2, 1, 2)
@printf("%11s, %12s | %11s, %11s | %11s, %11s | % 12s, % 12s \n", "___________", "___________", "___________", "___________", "___________", "___________", "____________", "____________")
for j=1:maxiter
info = all_info[j]
@printf("%11.5e, % 12.5e | %11.5e, %11.5e | %11.5e, %11.5e | % 12.5e, % 12.5e | % 12.5e, % 12.5e, % 12.5e \n", info...)
h = h * hfactor
end

rate1 = err1[1:end-1]./err1[2:end]
rate2 = err2[1:end-1]./err2[2:end]
@test mean(rate1) expected_f1 - stol
@test mean(rate2) expected_f2 - stol
factor1 = err1[1:end-1]./err1[2:end]
factor2 = err2[1:end-1]./err2[2:end]
@test mean(factor1) expected_f1 - stol
@test mean(factor2) expected_f2 - stol
end

0 comments on commit f01bead

Please sign in to comment.