diff --git a/Project.toml b/Project.toml index 64819f2..1d3ff31 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ManifoldDiff" uuid = "af67fdf4-a580-4b9f-bbec-742ef357defd" authors = ["Seth Axen ", "Mateusz Baran ", "Ronny Bergmann "] -version = "0.2.1" +version = "0.3" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/docs/Project.toml b/docs/Project.toml index 7a484f2..9e55c4c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,16 +8,11 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd" Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" -StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] Documenter = "0.27" -ManifoldDiff = "0.2" +ManifoldDiff = "0.3" ManifoldsBase = "0.13" -Manifolds = "0.8" -Plots = "1" -PyPlot = "2.9" +Manifolds = "0.8" \ No newline at end of file diff --git a/docs/make.jl b/docs/make.jl index 523352c..05a2103 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,7 +1,5 @@ -using Plots, Manifolds, ManifoldsBase, ManifoldDiff, Documenter, PyPlot -ENV["GKSwstype"] = "100" -# required for loading the manifold tests functions -using Test, ForwardDiff, ReverseDiff, FiniteDifferences, Zygote +using ForwardDiff, ReverseDiff, FiniteDifferences, Zygote +using Manifolds, ManifoldsBase, ManifoldDiff, Documenter makedocs( # for development, we disable prettyurls diff --git a/docs/src/index.md b/docs/src/index.md index 2f853cb..f2a0f37 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,3 +1,17 @@ # ManifoldDiff The package __ManifoldDiff__ aims to provide automatic calculation of Riemannian gradients of functions defined on manifolds. It builds upon [`Manifolds.jl`](https://github.com/JuliaManifolds/Manifolds.jl). + +## Naming scheme + +Providing a derivative, differential or gradient for a given function, this package adds that information to the function name. +For example + +* `grad_f` for a gradient ``\operatorname{grad} f`` +* `differential_f` for ``Df`` (also called pushforward) +* `differential_f_variable` if `f` has multiple variables / parameters, since a usual writing in math is ``f_x`` in this case +* `adjoint_differential_f` for pullbacks +* `adjoint_differential_f_variable` if `f` has multiple variables / parameters +* `f_derivative` for ``f'`` + +the scheme is not completely fixed but tries to follow the mathematical notation. diff --git a/docs/src/library.md b/docs/src/library.md index e8e6222..b544b37 100644 --- a/docs/src/library.md +++ b/docs/src/library.md @@ -2,24 +2,45 @@ Documentation for `ManifoldDiff.jl`'s methods and types for finite differences and automatic differentiation. -## Differentials +## Derivatives ```@autodocs Modules = [ManifoldDiff] -Pages = ["differentials.jl"] +Pages = ["derivatives.jl"] Order = [:type, :function, :constant] +Private = true ``` +## Differentials and their adjoints + ```@autodocs Modules = [ManifoldDiff] Pages = ["adjoint_differentials.jl"] Order = [:type, :function, :constant] +Private = true +``` + +```@autodocs +Modules = [ManifoldDiff] +Pages = ["differentials.jl"] +Order = [:type, :function, :constant] +Private = true ``` ```@autodocs Modules = [ManifoldDiff] Pages = ["diagonalizing_projectors.jl"] Order = [:type, :function, :constant] +Private = true +``` + +## Gradients + +```@autodocs +Modules = [ManifoldDiff] +Pages = ["gradients.jl"] +Order = [:type, :function, :constant] +Private = true ``` ## Jacobi fields @@ -72,18 +93,12 @@ Pages = ["finite_differences.jl"] Order = [:type, :function, :constant] ``` -### ReverseDiff.jl - -```@autodocs -Modules = [ManifoldDiff] -Pages = ["reverse_diff.jl"] -Order = [:type, :function, :constant] -``` - -### Zygote.jl +## Internal functions ```@autodocs Modules = [ManifoldDiff] -Pages = ["zygote.jl"] +Pages = ["ManifoldDiff.jl"] Order = [:type, :function, :constant] +Private = true +Public=false ``` diff --git a/src/ManifoldDiff.jl b/src/ManifoldDiff.jl index bfc079b..97907f3 100644 --- a/src/ManifoldDiff.jl +++ b/src/ManifoldDiff.jl @@ -14,6 +14,7 @@ using ManifoldsBase: TangentSpaceType, PowerManifoldNested, PowerManifoldNestedReplacing, + allocate_result, get_iterator, _write, _read @@ -36,7 +37,7 @@ struct NoneDiffBackend <: AbstractDiffBackend end _derivative(f, t[, backend::AbstractDiffBackend]) Compute the derivative of a callable `f` at time `t` computed using the given `backend`, -an object of type [`AbstractDiffBackend`](@ref). If the backend is not explicitly +an object of type [`AbstractDiffBackend`](@ref ManifoldDiff.AbstractDiffBackend). If the backend is not explicitly specified, it is obtained using the function [`default_differential_backend`](@ref). This function calculates plain Euclidean derivatives, for Riemannian differentiation see @@ -64,7 +65,7 @@ end _gradient(f, p[, backend::AbstractDiffBackend]) Compute the gradient of a callable `f` at point `p` computed using the given `backend`, -an object of type [`AbstractDiffBackend`](@ref). If the backend is not explicitly +an object of type [`AbstractDiffBackend`](@ref ManifoldDiff.AbstractDiffBackend). If the backend is not explicitly specified, it is obtained using the function [`default_differential_backend`](@ref). This function calculates plain Euclidean gradients, for Riemannian gradient calculation see @@ -164,13 +165,16 @@ end include("diagonalizing_projectors.jl") -include("differentials.jl") include("adjoint_differentials.jl") +include("derivatives.jl") +include("differentials.jl") +include("gradients.jl") include("Jacobi_fields.jl") include("riemannian_diff.jl") include("embedded_diff.jl") + function __init__() @require FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" begin using .FiniteDiff diff --git a/src/derivatives.jl b/src/derivatives.jl new file mode 100644 index 0000000..820a8d7 --- /dev/null +++ b/src/derivatives.jl @@ -0,0 +1,68 @@ +@doc raw""" + Y = geodesic_derivative(M, p, X, t::Number; γt = geodesic(M, p, X, t)) + geodesic_derivative!(M, Y, p, X, t::Number; γt = geodesic(M, p, X, t)) + +Evaluate the derivative of the geodesic ``γ(t)`` with ``γ_{p,X}(0) = p`` and ``\dot γ_{p,X}(0) = X`` at ``t``. +The formula reads + +```math +\dot γ(t) = \mathcal P_{γ(t) \gets p} X +``` + +where $\mathcal P$ denotes the parallel transport. +This computation can also be done in-place of ``Y``. + +# Optional Parameters + +* `γt` – (`geodesic(M, p, X, t)`) the point on the geodesic at ``t``. + This way if the point was computed earlier it can be resued here. +""" +function geodesic_derivative(M, p, X, t::Number; q = geodesic(M, p, X, t)) + return parallel_transport_to(M, p, X, q) +end +function geodesic_derivative!(M, Y, p, X, t::Number; q = geodesic(M, p, X, t)) + parallel_transport_to!(M, Y, p, X, q) + return Y +end + +@doc raw""" + Y = shortest_geodesic_derivative(M, p, X, t::Number; γt = shortest_geodesic(M, p, q, t)) + shortest_geodesic_derivative!(M, Y, p, X, t::Number; γt = shortest_geodesic(M, p, q, t)) + +Evaluate the derivative of the shortest geodesic ``γ(t)`` with ``γ_{p,q}(0) = p`` and ``\dot γ_{p,q}(1) = q`` +at ``t``. +The formula reads + +```math +\dot γ(t) = \mathcal P_{γ(t) \gets p} \log_pq +``` + +where $\mathcal P$ denotes the parallel transport. +This computation can also be done in-place of ``Y``. + +# Optional Parameters + +* `γt = geodesic(M, p, X, t)` the point on the geodesic at ``t``. + This way if the point was computed earlier it can be resued here. +""" +function shortest_geodesic_derivative( + M, + p, + q, + t::Number; + γt = shortest_geodesic(M, p, q, t), +) + return parallel_transport_to(M, p, log(M, p, q), γt) +end +function shortest_geodesic_derivative!( + M, + Y, + p, + q, + t::Number; + γt = shortest_geodesic(M, p, q, t), +) + log!(M, Y, p, q) + parallel_transport_to!(M, Y, p, Y, γt) + return Y +end diff --git a/src/diagonalizing_projectors.jl b/src/diagonalizing_projectors.jl index 93fb910..b9e1347 100644 --- a/src/diagonalizing_projectors.jl +++ b/src/diagonalizing_projectors.jl @@ -18,7 +18,7 @@ Compute eigenvalues of the Jacobi operator $Y → R(Y,X)X$, where $R$ is the cur endomorphism, together with projectors onto eigenspaces of the operator. Projectors are objects of subtypes of [`AbstractProjector`](@ref). -By default constructs projectors using the [`DiagonalizingOrthonormalBasis`](@ref). +By default constructs projectors using the [`DiagonalizingOrthonormalBasis`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/bases.html#ManifoldsBase.DiagonalizingOrthonormalBasis). """ function diagonalizing_projectors(M::AbstractManifold, p, X) B = get_basis(M, p, DiagonalizingOrthonormalBasis(X)) @@ -31,7 +31,7 @@ end """ ProjectorOntoVector{TM<:AbstractManifold,TP,TX} - + A structure that represents projector onto the subspace of the tangent space at `p` from manifold `M` spanned by tangent vector `X` of unit norm. @@ -51,7 +51,7 @@ end """ CoprojectorOntoVector{TM<:AbstractManifold,TP,TX} - + A structure that represents projector onto the subspace of the tangent space at `p` from manifold `M` othogonal to vector `X` of unit norm. diff --git a/src/differentials.jl b/src/differentials.jl index 570381a..a18f479 100644 --- a/src/differentials.jl +++ b/src/differentials.jl @@ -1,6 +1,5 @@ - @doc raw""" - differential_shortest_geodesic_startpoint(M, p, q, t, X) + Y = differential_shortest_geodesic_startpoint(M, p, q, t, X) differential_shortest_geodesic_startpoint!(M, Y, p, q, t, X) Compute ``D_p γ(t;p,q)[η]`` (in place of `Y`). @@ -17,7 +16,7 @@ end @doc raw""" - differential_shortest_geodesic_endpoint(M, p, q, t, X) + Y = differential_shortest_geodesic_endpoint(M, p, q, t, X) differential_shortest_geodesic_endpoint!(M, Y, p, q, t, X) Compute ``D_qγ(t;p,q)[X]`` (in place of `Y`). @@ -33,7 +32,7 @@ function differential_shortest_geodesic_endpoint!(M::AbstractManifold, Y, p, q, end @doc raw""" - differential_exp_basepoint(M, p, X, Y) + Z = differential_exp_basepoint(M, p, X, Y) differential_exp_basepoint!(M, Z, p, X, Y) Compute ``D_p\exp_p X[Y]`` (in place of `Z`). @@ -49,7 +48,7 @@ function differential_exp_basepoint!(M::AbstractManifold, Z, p, X, Y) end @doc raw""" - differential_exp_argument(M, p, X, Y) + Z = differential_exp_argument(M, p, X, Y) differential_exp_argument!(M, Z, p, X, Y) computes ``D_X\exp_pX[Y]`` (in place of `Z`). @@ -66,7 +65,7 @@ function differential_exp_argument!(M::AbstractManifold, Z, p, X, Y) end @doc raw""" - differential_log_basepoint(M, p, q, X) + Y = differential_log_basepoint(M, p, q, X) differential_log_basepoint!(M, Y, p, q, X) computes ``D_p\log_pq[X]`` (in place of `Y`). @@ -82,8 +81,8 @@ function differential_log_basepoint!(M::AbstractManifold, Y, p, q, X) end @doc raw""" - differential_log_argument(M, p, q, X) - differential_log_argument(M, Y, p, q, X) + Y = differential_log_argument(M, p, q, X) + differential_log_argument!(M, Y, p, q, X) computes ``D_q\log_pq[X]`` (in place of `Y`). @@ -136,7 +135,7 @@ function differential_exp_argument_lie_approx!(M::AbstractManifold, Z, p, X, Y; end @doc raw""" - inverse_retract_diff_argument_fd_approx( + differential_inverse_retract_argument_fd_approx( M::AbstractManifold, p, q, @@ -159,7 +158,7 @@ retraction `invretr` and ``\operatorname{retr}`` is the retraction `retr`. > manifolds,” arXiv:1908.05875 [cs, math], Sep. 2019, > Available: http://arxiv.org/abs/1908.05875 """ -function inverse_retract_diff_argument_fd_approx( +function differential_inverse_retract_argument_fd_approx( M::AbstractManifold, p, q, @@ -168,12 +167,12 @@ function inverse_retract_diff_argument_fd_approx( invretr::AbstractInverseRetractionMethod = default_inverse_retraction_method(M), h::Real = sqrt(eps(eltype(X))), ) - Y = allocate_result(M, inverse_retract_diff_argument_fd_approx, X, p, q) - inverse_retract_diff_argument_fd_approx!(M, Y, p, q, X; retr, invretr, h) + Y = allocate_result(M, differential_inverse_retract_argument_fd_approx, X, p, q) + differential_inverse_retract_argument_fd_approx!(M, Y, p, q, X; retr, invretr, h) return Y end -function inverse_retract_diff_argument_fd_approx!( +function differential_inverse_retract_argument_fd_approx!( M::AbstractManifold, Y, p, diff --git a/src/gradients.jl b/src/gradients.jl new file mode 100644 index 0000000..30d3683 --- /dev/null +++ b/src/gradients.jl @@ -0,0 +1,47 @@ + +@doc raw""" + grad_distance(M, q, p[, c=2]) + grad_distance!(M, X, q, p[, c=2]) + +compute the (sub)gradient of the distance (default: squared), in place of `X`. + +```math +f(p) = \frac{1}{c} d^c_{\mathcal M}(p, q) +``` + +to a fixed point `q` on the manifold `M` and `c` is an +integer. The (sub-)gradient reads + +```math +\operatorname{grad}f(p) = -d_{\mathcal M}^{c-2}(p, q)\log_pq +``` + +for ``c\neq 1`` or ``p\neq q``. Note that for the remaining case ``c=1``, +``p=q``, the function is not differentiable. In this case, the function returns the +corresponding zero tangent vector, since this is an element of the subdifferential. + +# Optional + +* `c` – (`2`) the exponent of the distance, i.e. the default is the squared + distance +""" +function grad_distance(M, q, p, c::Int = 2) + if c == 2 + return -log(M, p, q) + elseif c == 1 && p == q + return zero_vector(M, p) + else + return -distance(M, p, q)^(c - 2) * log(M, p, q) + end +end +function grad_distance!(M, X, q, p, c::Int = 2) + log!(M, X, p, q) + if c == 2 + X .*= -one(eltype(X)) + elseif c == 1 && p == q + X = zero_vector(M, p) + else + X .*= -distance(M, p, q)^(c - 2) + end + return X +end diff --git a/src/riemannian_diff.jl b/src/riemannian_diff.jl index 0cdb04c..82e89ef 100644 --- a/src/riemannian_diff.jl +++ b/src/riemannian_diff.jl @@ -53,13 +53,13 @@ Since it works in tangent spaces at argument and function value, methods might r retraction and an inverse retraction as well as a basis. In the tangent space itself, this backend then employs an (Euclidean) -[`AbstractDiffBackend`](@ref) +[`AbstractDiffBackend`](@ref ManifoldDiff.AbstractDiffBackend) # Constructor TangentDiffBackend(diff_backend) -where `diff_backend` is an [`AbstractDiffBackend`](@ref) to be used on the tangent space. +where `diff_backend` is an [`AbstractDiffBackend`](@ref ManifoldDiff.AbstractDiffBackend) to be used on the tangent space. With the keyword arguments @@ -183,10 +183,10 @@ Then we require three tools * A function ``f̃: ℝ^m → ℝ`` such that its restriction to the manifold yields the cost function ``f`` of interest. -* A [`project`](@ref) function to project tangent vectors from the embedding (at ``T_pℝ^m``) +* A [`project`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/projections.html#ManifoldsBase.project-Tuple{AbstractManifold,%20Any,%20Any}) function to project tangent vectors from the embedding (at ``T_pℝ^m``) back onto the tangent space ``T_p\mathcal M``. This also includes possible changes of the representation of the tangent vector (e.g. in the Lie algebra or in a different data format). -* A [`change_representer`](@ref) for non-isometrically embedded manifolds, +* A [`change_representer`](https://juliamanifolds.github.io/Manifolds.jl/stable/manifolds/metric.html#Manifolds.change_metric-Tuple{AbstractManifold,%20AbstractMetric,%20Any,%20Any}) for non-isometrically embedded manifolds, i.e. where the tangent space ``T_p\mathcal M`` of the manifold does not inherit the inner product from restriction of the inner product from the tangent space ``T_pℝ^m`` of the embedding @@ -307,7 +307,7 @@ or rearranged ``⟨Y,W⟩ = ⟨Z,W⟩``. We then use the definition of the Riema ⟨\operatorname{grad} f(p), W⟩_p = Df(p)[X] = ⟨\operatorname{grad}f(p), W⟩ = ⟨\operatorname{proj}_{T_p\mathcal M}(\operatorname{grad}f(p)),W⟩ \quad\text{for all } W \in T_p\mathcal M. ``` -Comparing the first and the last term, the remaining computation is the function [`change_representer`](@ref change_representer(M::AbstractManifold, G2::AbstractMetric, p, X)). +Comparing the first and the last term, the remaining computation is the function [`change_representer`](https://juliamanifolds.github.io/Manifolds.jl/stable/manifolds/metric.html#Manifolds.change_metric-Tuple{AbstractManifold,%20AbstractMetric,%20Any,%20Any}). This method can also be implemented directly, if a more efficient/stable version is known. diff --git a/test/manifold_specializations.jl b/test/manifold_specializations.jl index 23dc683..951cf32 100644 --- a/test/manifold_specializations.jl +++ b/test/manifold_specializations.jl @@ -6,7 +6,7 @@ using LinearAlgebra using ManifoldDiff: differential_exp_argument, differential_exp_argument_lie_approx, - inverse_retract_diff_argument_fd_approx + differential_inverse_retract_argument_fd_approx @testset "Rotations(3)" begin M = Rotations(3) @@ -59,7 +59,7 @@ end @test isapprox( M, q, - inverse_retract_diff_argument_fd_approx(M, p, q, X; h = 1e-4), + differential_inverse_retract_argument_fd_approx(M, p, q, X; h = 1e-4), diff_ref; atol = 1e-7, ) diff --git a/test/runtests.jl b/test/runtests.jl index a0b6e15..1d5c599 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,7 +8,11 @@ using Manifolds using ManifoldsBase using RecursiveArrayTools -include("test_differentials.jl") -include("test_adjoint_differentials.jl") -include("differentiation.jl") -include("manifold_specializations.jl") +@testset "ManifoldDiff.jl" begin + include("test_differentials.jl") + include("test_adjoint_differentials.jl") + include("differentiation.jl") + include("manifold_specializations.jl") + include("test_gradients.jl") + include("test_derivatives.jl") +end diff --git a/test/test_derivatives.jl b/test/test_derivatives.jl new file mode 100644 index 0000000..5ca2bdf --- /dev/null +++ b/test/test_derivatives.jl @@ -0,0 +1,24 @@ +using Manifolds, ManifoldsBase, ManifoldDiff, Test + +@testset "Derivatives" begin + M = Sphere(2) + p = [0.0, 0.0, 1.0] + q = [0.0, 1.0, 0.0] + r = [1.0, 0.0, 0.0] + @testset "Geodesic" begin + X = log(M, p, q) + s = geodesic(M, p, X, 0.5) + Y = zero_vector(M, p) + ManifoldDiff.geodesic_derivative!(M, Y, p, X, 0.5) + Z = ManifoldDiff.geodesic_derivative(M, p, X, 0.5) + sol = parallel_transport_to(M, p, X, s) + @test isapprox(M, s, sol, Y) + @test isapprox(M, s, sol, Z) + + ManifoldDiff.shortest_geodesic_derivative!(M, Y, p, q, 0.5) + Z = ManifoldDiff.shortest_geodesic_derivative(M, p, q, 0.5) + sol = parallel_transport_to(M, p, X, s) + @test isapprox(M, s, sol, Y) + @test isapprox(M, s, sol, Z) + end +end diff --git a/test/test_gradients.jl b/test/test_gradients.jl new file mode 100644 index 0000000..55480a5 --- /dev/null +++ b/test/test_gradients.jl @@ -0,0 +1,30 @@ +using Manifolds +using ManifoldsBase + +@testset "Gradients" begin + M = Sphere(2) + p = [0.0, 0.0, 1.0] + q = [0.0, 1.0, 0.0] + r = [1.0, 0.0, 0.0] + @testset "Gradient of the distance function" begin + X = zero_vector(M, p) + ManifoldDiff.grad_distance!(M, X, p, q) + Y = ManifoldDiff.grad_distance(M, p, q) + Z = [0.0, 0.0, -π / 2] # known solution + @test X == Y + @test X == Z + U = zero_vector(M, p) + ManifoldDiff.grad_distance!(M, U, p, q, 1) + V = ManifoldDiff.grad_distance(M, p, q, 1) + W = -distance(M, q, p)^(-1) * log(M, q, p) # solution + @test U == V + @test U == W + w = q + U = zero_vector(M, w) + ManifoldDiff.grad_distance!(M, U, w, q, 1) + V = ManifoldDiff.grad_distance(M, w, q, 1) + W = zero_vector(M, w) # solution + @test U == V + @test U == W + end +end