Documented grad_test() and updated version number

slimgroup · Apr 24, 2023 · f01bead · f01bead
1 parent 9ffcb44
commit f01bead
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 24 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "JutulDarcyRules"
 uuid = "41f0c4f5-9bdd-4ef1-8c3a-d454dff2d562"
 authors = ["Ziyi Yin <ziyi.yin@gatech.edu>"]
-version = "0.2.4"
+version = "0.2.5"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"

diff --git a/test/grad_test.jl b/test/grad_test.jl
@@ -1,6 +1,3 @@
-using Test
-using Printf
-using LinearAlgebra
 mean(x) = sum(x)/length(x)
 
 function log_division(a, b)
@@ -10,14 +7,54 @@ function log_division(a, b)
     return log(a / b)
 end
 
-function grad_test(f, x0, dx, dJdx; dJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
-    if !xor(isnothing(dJdx), isnothing(dJ))
-        error("Must specify either dJdx or dJ") 
+"""
+    grad_test(J, x0, Δx, dJdx; ΔJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
+
+Test the gradient using Taylor series convergence.
+
+Compute a series of residuals using zeroth order and first order Taylor approximations.
+Each perturbed computation J(x₀ + hᵢ Δx) is compared to J(x₀) and J(x₀) + hᵢ ΔJ,
+where hᵢ = hfactor * hᵢ₋₁ and ΔJ = dJdx ⋅ Δx by default. If the computation of J and
+dJdx is correct, J is sufficiently smooth, and h is sufficiently small, the zeroth
+order approximation should converge linearly and the first order approximation should
+converge quadratically.
+
+It can be difficult to obtain the correct convergence, so we average the
+convergence factors across maxiter values of h and test that the convergence
+factor is more than correct convergence factor minus the stol parameter.
+
+# Mathematical basis
+
+For J sufficiently smooth, the value of a point at a small perturbation from x₀ can be
+computed using the Taylor series expansion.
+```math
+J(x₀ + h Δx) = J(x₀) + h (dJ/dx) Δx + h² (d²J/dx²) : (Δx Δxᵀ) + O(h³)
+```
+
+If d²J/dx² is non-zero and h is small enough, the value of the first-order Taylor
+approximation differs from the true value by approximately a constant proportional to h².
+```math
+err(h) = |J(x₀ + h Δx) - J(x₀) - h (dJ/dx) Δx| ≈ h²c
+```
+
+If we consider the error for two different values of h, the unknown constant can be eliminated.
+```math
+err(h₁) / err(h₂) ≈ (h₁ / h₂)^2
+```
+So if h₁ is divided by a factor α, then the ratio of the errors should be divided by a factor α².
+Or we can compute the exponent for the rate of convergence using logarithms.
+```math
+log(err(h₁) / err(h₂)) / log (h₁ / h₂) ≈ 2
+```
+"""
+function grad_test(J, x0, Δx, dJdx; ΔJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, hfactor=8e-1)
+    if !xor(isnothing(dJdx), isnothing(ΔJ))
+        error("Must specify either dJdx or ΔJ")
     end
-    if isnothing(dJ)
-        dJ = dot(dJdx, dx)
+    if isnothing(ΔJ)
+        ΔJ = dot(dJdx, Δx)
     end
-    J0 = f(x0)
+    J0 = J(x0)
     h = h0
 
     log_factor = log(hfactor)
@@ -32,40 +69,40 @@ function grad_test(f, x0, dx, dJdx; dJ=nothing, maxiter=6, h0=5e-2, stol=1e-1, h
     @printf("%11s, %12s | %11s, %11s | %11s, %11s | % 12s, % 12s \n", "___________", "___________", "___________", "___________", "___________", "___________", "____________", "____________")
     all_info = []
     for j=1:maxiter
-        J = f(x0 + h*dx)
-        Js[j] = J
-        err1[j] = norm(J - J0, 1)
-        err2[j] = norm(J - J0 - h*dJ, 1)
+        Jh = J(x0 + h*Δx)
+        Js[j] = Jh
+        err1[j] = norm(Jh - J0, 1)
+        err2[j] = norm(Jh - J0 - h*ΔJ, 1)
         j == 1 ? prev = 1 : prev = j - 1
 
-        dJ_est = (J - J0) / h
+        dJ_est = (Jh - J0) / h
         α = expected_f2
-        dJ_est1 = (α*Js[prev] - J + (1-α)*J0) / (h * (α/hfactor - 1))
+        dJ_est1 = (α*Js[prev] - Jh + (1-α)*J0) / (h * (α/hfactor - 1))
         α = -expected_f2
-        dJ_est2 = (α*Js[prev] - J + (1-α)*J0) / (h * (α/hfactor - 1))
+        dJ_est2 = (α*Js[prev] - Jh + (1-α)*J0) / (h * (α/hfactor - 1))
 
         rate1 = log_division(err1[j], err1[prev]) / log_factor
         rate2 = log_division(err2[j], err2[prev]) / log_factor
 
-        info = (h, h*norm(dJ, 1), err1[j], err2[j], err1[prev]/err1[j], err2[prev]/err2[j], rate1, rate2, dJ_est, dJ_est1, dJ_est2)
+        info = (h, h*norm(ΔJ, 1), err1[j], err2[j], err1[prev]/err1[j], err2[prev]/err2[j], rate1, rate2, dJ_est, dJ_est1, dJ_est2)
         push!(all_info, info)
         @printf("%11.5e, % 12.5e | %11.5e, %11.5e | %11.5e, %11.5e | % 12.5e, % 12.5e | % 12.5e, % 12.5e, % 12.5e \n", info...)
         h = h * hfactor
     end
 
     println()
-    @printf("%11s, %12s | %11s, %11s | %11s, %11s | %12s, %12s | %12s %12s %12s \n", "h", "dJ", "e1", "e2", "factor1", "factor2", "rate1", "rate2", "Finite-diff", "T1 approx", "T2 approx")
-    @printf("%11s, % 12.5e | %11s, %11s | %11.5e, %11.5e | % 12.5e, % 12.5e | \n", "", dJ, "0", "0", expected_f1, expected_f2, 1, 2)
+    @printf("%11s, %12s | %11s, %11s | %11s, %11s | %12s, %12s | %12s %12s %12s \n", "h", "ΔJ", "e1", "e2", "factor1", "factor2", "rate1", "rate2", "Finite-diff", "T1 approx", "T2 approx")
+    @printf("%11s, % 12.5e | %11s, %11s | %11.5e, %11.5e | % 12.5e, % 12.5e | \n", "", ΔJ, "0", "0", expected_f1, expected_f2, 1, 2)
     @printf("%11s, %12s | %11s, %11s | %11s, %11s | % 12s, % 12s \n", "___________", "___________", "___________", "___________", "___________", "___________", "____________", "____________")
     for j=1:maxiter
         info = all_info[j]
         @printf("%11.5e, % 12.5e | %11.5e, %11.5e | %11.5e, %11.5e | % 12.5e, % 12.5e | % 12.5e, % 12.5e, % 12.5e \n", info...)
         h = h * hfactor
     end
 
-    rate1 = err1[1:end-1]./err1[2:end]
-    rate2 = err2[1:end-1]./err2[2:end]
-    @test mean(rate1) ≥ expected_f1 - stol
-    @test mean(rate2) ≥ expected_f2 - stol
+    factor1 = err1[1:end-1]./err1[2:end]
+    factor2 = err2[1:end-1]./err2[2:end]
+    @test mean(factor1) ≥ expected_f1 - stol
+    @test mean(factor2) ≥ expected_f2 - stol
 end