From 30de49c8dd185e080fedd3a53285d651a8081f4d Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Tue, 1 Jun 2021 21:23:39 -0300 Subject: [PATCH 01/21] Initianning sikhorn divergence --- src/OptimalTransport.jl | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index ec79e6be..14902d82 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -784,4 +784,49 @@ function quadreg(mu, nu, C, ϵ; θ=0.1, tol=1e-5, maxiter=50, κ=0.5, δ=1e-5) return sparse(γ') end +""" + sinkhorn_divergence(μ, ν, C, ε; regularization=false, plan=nothing, kwargs...) + +Solve the entropically regularized optimal transport problem with source and target +marginals `μ` and `ν`, cost matrix `C` of size `(length(μ), length(ν))`, and entropic +regularization parameter `ε`, and return the optimal cost. + +A pre-computed optimal transport `plan` may be provided. The other keyword arguments +supported here are the same as those in the [`sinkhorn`](@ref) function. + +!!! note + As the `sinkhorn2` function in the Python Optimal Transport package, this function + returns the optimal transport cost without the regularization term. The cost + with the regularization term can be computed by setting `regularization=true`. + +See also: [`sinkhorn`](@ref) +""" +function sinkhorn2(μ, ν, C, ε; regularization=false, kwargs...) + γ = if plan === nothing + sinkhorn(μ, ν, C, ε; kwargs...) + else + # check dimensions + size(C) == (size(μ, 1), size(ν, 1)) || error( + "cost matrix `C` must be of size `(size(μ, dims = 1), size(ν, dims = 1))`", + ) + (size(plan, 1), size(plan, 2)) == size(C) || error( + "optimal transport plan `plan` and cost matrix `C` must be of the same size", + ) + plan + end + cost = if regularization + dot_matwise(γ, C) .+ + ε * reshape(sum(LogExpFunctions.xlogx, γ; dims=(1, 2)), size(γ)[3:end]) + else + dot_matwise(γ, C) + end + + return cost +end +function sinkhorn_divergence(μ, ν, C, ε; regularization=false, kwargs...) + cost = ot_cost(p2distance(metric, p), μ, ν; kwargs...) + return prt(cost, p) +end + + end From 4a1f3806a777b1b2758dc489a206988b20f3e99c Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Wed, 2 Jun 2021 10:26:28 -0300 Subject: [PATCH 02/21] Sinkhorn divergence implemented --- src/OptimalTransport.jl | 66 ++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index aa5a9ba8..069012c8 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -17,6 +17,7 @@ export sinkhorn, sinkhorn2 export emd, emd2 export sinkhorn_stabilized, sinkhorn_stabilized_epsscaling, sinkhorn_barycenter export sinkhorn_unbalanced, sinkhorn_unbalanced2 +export sinkhorn_divergence export quadreg export ot_cost, ot_plan, wasserstein, squared2wasserstein @@ -786,48 +787,39 @@ function quadreg(mu, nu, C, ϵ; θ=0.1, tol=1e-5, maxiter=50, κ=0.5, δ=1e-5) end """ - sinkhorn_divergence(μ, ν, C, ε; regularization=false, plan=nothing, kwargs...) + sinkhorn_divergence(c, μ::DiscreteNonParametric, ν::DiscreteNonParametric, ε; regularization=false, kwargs...) -Solve the entropically regularized optimal transport problem with source and target -marginals `μ` and `ν`, cost matrix `C` of size `(length(μ), length(ν))`, and entropic -regularization parameter `ε`, and return the optimal cost. +Compute the Sinkhorn Divergence between finite discrete +measures `μ` and `ν` with respect to a cost function `c` +and entropic regularization parameter `ε`. -A pre-computed optimal transport `plan` may be provided. The other keyword arguments -supported here are the same as those in the [`sinkhorn`](@ref) function. +The Sinkhorn Divergence is computed as: +```math +S_{c,ε}(μ,ν) := OT_{c,ε}(μ,ν) - \\frac{1}{2}(OT_{c,ε}(μ,μ) + OT_{c,ε}(ν,ν)), +``` +where `OT_{c,ε}(μ,ν)`, `OT_{c,ε}(μ,μ)` and `OT_{c,ε}(ν,ν)` are the entropically +regularized optimal transport cost between `(μ,ν)`, `(μ,μ)` and `(ν,ν)`, respectively. -!!! note - As the `sinkhorn2` function in the Python Optimal Transport package, this function - returns the optimal transport cost without the regularization term. The cost - with the regularization term can be computed by setting `regularization=true`. +The formulation for the Sinkhorn Divergence may have slight variations depending on the paper consulted. +The Sinkhorn Divergence was initially proposed by [^GPC18], although, this package uses the formulation given by +[^FeydyP19], which is also the one used on the Python Optimal Transport package. -See also: [`sinkhorn`](@ref) -""" -function sinkhorn2(μ, ν, C, ε; regularization=false, kwargs...) - γ = if plan === nothing - sinkhorn(μ, ν, C, ε; kwargs...) - else - # check dimensions - size(C) == (size(μ, 1), size(ν, 1)) || error( - "cost matrix `C` must be of size `(size(μ, dims = 1), size(ν, dims = 1))`", - ) - (size(plan, 1), size(plan, 2)) == size(C) || error( - "optimal transport plan `plan` and cost matrix `C` must be of the same size", - ) - plan - end - cost = if regularization - dot_matwise(γ, C) .+ - ε * reshape(sum(LogExpFunctions.xlogx, γ; dims=(1, 2)), size(γ)[3:end]) - else - dot_matwise(γ, C) - end +[^GPC18]: Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, +Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018 - return cost -end -function sinkhorn_divergence(μ, ν, C, ε; regularization=false, kwargs...) - cost = ot_cost(p2distance(metric, p), μ, ν; kwargs...) - return prt(cost, p) -end +[^FeydyP19]: Jean Feydy, Thibault Séjourné, François-Xavier Vialard, Shun-ichi +Amari, Alain Trouvé, and Gabriel Peyré. Interpolating between op- +timal transport and mmd using sinkhorn divergences. In The 22nd In- +ternational Conference on Artificial Intelligence and Statistics, pages +2681–2690. PMLR, 2019. +See also: [`sinkhorn2`](@ref) +""" +function sinkhorn_divergence(c, μ::DiscreteNonParametric, ν::DiscreteNonParametric, ε; regularization=false, kwargs...) + OTμν = sinkhorn2(μ.p, ν.p, pairwise(c,μ.support, ν.support),ε,regularization=regularization, kwargs...) + OTμμ = sinkhorn2(μ.p, μ.p, pairwise(c,μ.support,symmetric=true),ε,regularization=regularization, kwargs...) + OTνν = sinkhorn2(ν.p, ν.p, pairwise(c,ν.support,symmetric=true),ε,regularization=regularization, kwargs...) + return max(0,OTμν - 0.5*(OTμμ + OTνν)) +end end From bdc1b5bbec47f278282134d5dc39e983b1fbf88d Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Wed, 2 Jun 2021 10:55:44 -0300 Subject: [PATCH 03/21] Added PyCall to test dependencies --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7868580e..b8642076 100644 --- a/Project.toml +++ b/Project.toml @@ -31,6 +31,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" [targets] -test = ["Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip"] +test = ["Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip", "PyCall"] From 416dcb40b2ff661da46fdfda875c1c268bcba67a Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Wed, 2 Jun 2021 11:34:15 -0300 Subject: [PATCH 04/21] Added tests for sinkhorn divergence --- src/OptimalTransport.jl | 38 +++++++++++++++++++++++++++++++++----- test/entropic.jl | 26 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 069012c8..d91e3dc9 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -815,11 +815,39 @@ ternational Conference on Artificial Intelligence and Statistics, pages See also: [`sinkhorn2`](@ref) """ -function sinkhorn_divergence(c, μ::DiscreteNonParametric, ν::DiscreteNonParametric, ε; regularization=false, kwargs...) - OTμν = sinkhorn2(μ.p, ν.p, pairwise(c,μ.support, ν.support),ε,regularization=regularization, kwargs...) - OTμμ = sinkhorn2(μ.p, μ.p, pairwise(c,μ.support,symmetric=true),ε,regularization=regularization, kwargs...) - OTνν = sinkhorn2(ν.p, ν.p, pairwise(c,ν.support,symmetric=true),ε,regularization=regularization, kwargs...) - return max(0,OTμν - 0.5*(OTμμ + OTνν)) +function sinkhorn_divergence( + c, + μ::DiscreteNonParametric, + ν::DiscreteNonParametric, + ε; + regularization=false, + kwargs..., +) + OTμν = sinkhorn2( + μ.p, + ν.p, + pairwise(c, μ.support, ν.support), + ε; + regularization=regularization, + kwargs..., + ) + OTμμ = sinkhorn2( + μ.p, + μ.p, + pairwise(c, μ.support; symmetric=true), + ε; + regularization=regularization, + kwargs..., + ) + OTνν = sinkhorn2( + ν.p, + ν.p, + pairwise(c, ν.support; symmetric=true), + ε; + regularization=regularization, + kwargs..., + ) + return max(0, OTμν - 0.5 * (OTμμ + OTνν)) end end diff --git a/test/entropic.jl b/test/entropic.jl index 081d6644..781b1c54 100644 --- a/test/entropic.jl +++ b/test/entropic.jl @@ -2,6 +2,10 @@ using OptimalTransport using Distances using PythonOT: PythonOT +using Distributions + +using PyCall +PYCALLPOT = pyimport("ot") using Random using Test @@ -213,4 +217,26 @@ Random.seed!(100) @test μ_interp ≈ μ_interp_pot rtol = 1e-6 end end + + @testset "sinkhorn divergence" begin + @testset "example" begin + # create distributions + N = 100 + μ = DiscreteNonParametric(rand(N), ones(N) / N) + ν = DiscreteNonParametric(rand(N), ones(N) / N) + + for (ε, metric) in Iterators.product( + [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] + ) + @test sinkhorn_divergence(metric, μ, μ, ε) ≈ 0.0 + @test sinkhorn_divergence(metric, ν, ν, ε) ≈ 0.0 + + sd = sinkhorn_divergence(metric, μ, ν, ε) + sd_pot = PYCALLPOT.bregman.empirical_sinkhorn_divergence( + reshape(μ.support, :, 1), reshape(ν.support, :, 1), ε; metric=metric + )[1] + @test sd ≈ sd_pot rtol = 1e-5 + end + end + end end From f593377cb6c2b0c501d11ec5559f9d5b2fbeca68 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Wed, 2 Jun 2021 11:35:07 -0300 Subject: [PATCH 05/21] Added Sinkhorn Divergence to docs --- docs/src/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/index.md b/docs/src/index.md index 40856a58..c2f18abb 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -24,6 +24,7 @@ sinkhorn2 sinkhorn_stabilized_epsscaling sinkhorn_stabilized sinkhorn_barycenter +sinkhorn_divergence ``` ## Unbalanced optimal transport From 21d38a87e446dae4f35d72dfb29b53541d7b2edf Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 11:05:24 -0300 Subject: [PATCH 06/21] Creating FiniteDiscreteMeasure struct --- src/OptimalTransport.jl | 88 ++++++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 14 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index d91e3dc9..a8be1c99 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -26,6 +26,25 @@ const MOI = MathOptInterface include("exact.jl") include("wasserstein.jl") +# struct FiniteDiscreteMeasure{X<:AbstractVector,P<:AbstractVector} +# xs::X +# ps::P + +# function FiniteDiscreteMeasure{X,P}(xs::X, ps::P) where {X,P} +# length(xs) == length(ps) || +# error("length of support `xs` and probabilities `ps` must be equal") +# return new{X,P}(xs, ps) +# end +# end + +# """ +# DiscreteMeasure(xs::AbstractVector, ps::AbstractVector) +# Construct a discrete measure with support `xs` and corresponding weights `ps`. +# """ +# function FiniteDiscreteMeasure(xs::AbstractVector, ps::AbstractVector) +# return FiniteDiscreteMeasure{typeof(xs),typeof(ps)}(xs, ps) +# end + dot_matwise(x::AbstractMatrix, y::AbstractMatrix) = dot(x, y) function dot_matwise(x::AbstractArray, y::AbstractMatrix) xmat = reshape(x, size(x, 1) * size(x, 2), :) @@ -787,18 +806,22 @@ function quadreg(mu, nu, C, ϵ; θ=0.1, tol=1e-5, maxiter=50, κ=0.5, δ=1e-5) end """ - sinkhorn_divergence(c, μ::DiscreteNonParametric, ν::DiscreteNonParametric, ε; regularization=false, kwargs...) + sinkhorn_divergence(c, μ, ν, ε; regularization=false, plan=nothing, kwargs...) Compute the Sinkhorn Divergence between finite discrete measures `μ` and `ν` with respect to a cost function `c` and entropic regularization parameter `ε`. +A pre-computed optimal transport `plan` between `μ` and `ν` may be provided. + The Sinkhorn Divergence is computed as: ```math -S_{c,ε}(μ,ν) := OT_{c,ε}(μ,ν) - \\frac{1}{2}(OT_{c,ε}(μ,μ) + OT_{c,ε}(ν,ν)), +\\operatorname{S}_{c,ε}(μ,ν) := \\operatorname{OT}_{c,ε}(μ,ν) +- \\frac{1}{2}(\\operatorname{OT}_{c,ε}(μ,μ) + \\operatorname{OT}_{c,ε}(ν,ν)), ``` -where `OT_{c,ε}(μ,ν)`, `OT_{c,ε}(μ,μ)` and `OT_{c,ε}(ν,ν)` are the entropically -regularized optimal transport cost between `(μ,ν)`, `(μ,μ)` and `(ν,ν)`, respectively. +where ``\\operatorname{OT}_{c,ε}(μ,ν)``, ``\\operatorname{OT}_{c,ε}(μ,μ)`` and +``\\operatorname{OT}_{c,ε}(ν,ν)`` are the entropically regularized optimal transport cost +between `(μ,ν)`, `(μ,μ)` and `(ν,ν)`, respectively. The formulation for the Sinkhorn Divergence may have slight variations depending on the paper consulted. The Sinkhorn Divergence was initially proposed by [^GPC18], although, this package uses the formulation given by @@ -817,37 +840,74 @@ See also: [`sinkhorn2`](@ref) """ function sinkhorn_divergence( c, - μ::DiscreteNonParametric, - ν::DiscreteNonParametric, + μ, + ν, ε; - regularization=false, + regularization=nothing, + plan=nothing, kwargs..., ) + + return sinkhorn_divergence( + pairwise(c, μ.support, ν.support), + pairwise(c, μ.support, μ.support), + pairwise(c, ν.support, ν.support), + μ, + ν, + ε; + regularization=regularization, + kwargs... + ) + +end + +""" + sinkhorn_divergence(c, μ, ν, ε; regularization=false, plan=nothing, kwargs...) + +Compute the Sinkhorn Divergence between finite discrete +measures `μ` and `ν` with respect to the precomputed cost matrices `Cμν`, +`Cμμ` and `Cνν`, and entropic regularization parameter `ε`. + +A pre-computed optimal transport `plan` between `μ` and `ν` may be provided. + +See also: [`sinkhorn2`](@ref) +""" +function sinkhorn_divergence(Cμν, Cμμ, Cνν, μ, ν, ε; regularization=nothing, plan=nothing, kwargs...) + + if regularization !== nothing + @warn "`sinkhorn_divergence` does not support the `regularization` keyword argument" + end + OTμν = sinkhorn2( μ.p, ν.p, - pairwise(c, μ.support, ν.support), + Cμν, ε; - regularization=regularization, + plan=plan, + regularization = false, kwargs..., ) OTμμ = sinkhorn2( μ.p, μ.p, - pairwise(c, μ.support; symmetric=true), + Cμμ, ε; - regularization=regularization, + plan=nothing, + regularization = false, kwargs..., ) OTνν = sinkhorn2( ν.p, ν.p, - pairwise(c, ν.support; symmetric=true), + Cνν, ε; - regularization=regularization, + plan=nothing, + regularization = false, kwargs..., ) - return max(0, OTμν - 0.5 * (OTμμ + OTνν)) + return max(0, OTμν - (OTμμ + OTνν) / 2) end + + end From e17bba52d6fc255f2c11878b0bf06409e5515264 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 14:44:42 -0300 Subject: [PATCH 07/21] Modifications: - Created the struct FiniteDiscreteMeasure, - Implemented two versions of sinkhorn_divergence, - Disabled the use of regularization on sinkhorn_divergence, - Fixed docstring with suggestions. --- src/OptimalTransport.jl | 85 +++++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index a8be1c99..46f27800 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -20,30 +20,53 @@ export sinkhorn_unbalanced, sinkhorn_unbalanced2 export sinkhorn_divergence export quadreg export ot_cost, ot_plan, wasserstein, squared2wasserstein +export FiniteDiscreteMeasure const MOI = MathOptInterface include("exact.jl") include("wasserstein.jl") -# struct FiniteDiscreteMeasure{X<:AbstractVector,P<:AbstractVector} -# xs::X -# ps::P - -# function FiniteDiscreteMeasure{X,P}(xs::X, ps::P) where {X,P} -# length(xs) == length(ps) || -# error("length of support `xs` and probabilities `ps` must be equal") -# return new{X,P}(xs, ps) -# end -# end - -# """ -# DiscreteMeasure(xs::AbstractVector, ps::AbstractVector) -# Construct a discrete measure with support `xs` and corresponding weights `ps`. -# """ -# function FiniteDiscreteMeasure(xs::AbstractVector, ps::AbstractVector) -# return FiniteDiscreteMeasure{typeof(xs),typeof(ps)}(xs, ps) -# end +struct FiniteDiscreteMeasure{X<:AbstractArray,P<:AbstractVector} + support::X + p::P + + function FiniteDiscreteMeasure{X,P}(support::X, p::P) where {X,P} + size(support, 1) == length(p) || + error("number of rows of `support` and `p` must be equal") + sum(p) ≈ 1 || + error("`p` must sum to 1") + return new{X,P}(support, p) + end +end + +""" + FiniteDiscreteMeasure(support::AbstractArray, p::AbstractVector) +Construct a finite discrete probability measure with support `support` and corresponding weights `p`. +""" +function FiniteDiscreteMeasure(support::AbstractArray, p::AbstractVector) + if size(support, 2) == 1 + return DiscreteNonParametric(support, p) + else + return FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) + end +end + +""" + FiniteDiscreteMeasure(support::AbstractArray) +Construct a finite discrete probability measure with support `support` and equal probability for each point. +""" +function FiniteDiscreteMeasure(support::AbstractArray) + p = ones(size(support)[1]) ./ size(support)[1] + if size(support, 2) == 1 + return DiscreteNonParametric(support, p) + else + return FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) + end +end + +Distributions.support(d::FiniteDiscreteMeasure) = d.support +Distributions.probs(d::FiniteDiscreteMeasure) = d.p dot_matwise(x::AbstractMatrix, y::AbstractMatrix) = dot(x, y) function dot_matwise(x::AbstractArray, y::AbstractMatrix) @@ -806,7 +829,12 @@ function quadreg(mu, nu, C, ϵ; θ=0.1, tol=1e-5, maxiter=50, κ=0.5, δ=1e-5) end """ - sinkhorn_divergence(c, μ, ν, ε; regularization=false, plan=nothing, kwargs...) + sinkhorn_divergence( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ε; regularization=false, plan=nothing, kwargs... + ) Compute the Sinkhorn Divergence between finite discrete measures `μ` and `ν` with respect to a cost function `c` @@ -862,7 +890,12 @@ function sinkhorn_divergence( end """ - sinkhorn_divergence(c, μ, ν, ε; regularization=false, plan=nothing, kwargs...) + sinkhorn_divergence( + Cμν, Cμμ, Cνν, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ε; regularization=false, plan=nothing, kwargs... + ) Compute the Sinkhorn Divergence between finite discrete measures `μ` and `ν` with respect to the precomputed cost matrices `Cμν`, @@ -872,7 +905,17 @@ A pre-computed optimal transport `plan` between `μ` and `ν` may be provided. See also: [`sinkhorn2`](@ref) """ -function sinkhorn_divergence(Cμν, Cμμ, Cνν, μ, ν, ε; regularization=nothing, plan=nothing, kwargs...) +function sinkhorn_divergence( + Cμν, + Cμμ, + Cνν, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ε; + regularization=nothing, + plan=nothing, + kwargs... +) if regularization !== nothing @warn "`sinkhorn_divergence` does not support the `regularization` keyword argument" From 10e884942ab5a9813441ac2c1b78834067f3c3f5 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 14:58:06 -0300 Subject: [PATCH 08/21] FixedDiscreteMeasure normalizes the weights to sum 1 --- src/OptimalTransport.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 46f27800..eb2c0a38 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -45,10 +45,15 @@ end Construct a finite discrete probability measure with support `support` and corresponding weights `p`. """ function FiniteDiscreteMeasure(support::AbstractArray, p::AbstractVector) + P = sum(p) if size(support, 2) == 1 - return DiscreteNonParametric(support, p) + return P ≈ 1 ? + DiscreteNonParametric(support, p) : + DiscreteNonParametric(support, p ./ P) else - return FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) + return P ≈ 1 ? + FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) : + FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p ./ P) end end From 52b3c7ac312f6cf7f4206d9dd91f8ffd37e74da4 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 15:02:12 -0300 Subject: [PATCH 09/21] FixedDiscreteMeasure checks if probabilities are positive --- src/OptimalTransport.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index eb2c0a38..970d5596 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -36,6 +36,8 @@ struct FiniteDiscreteMeasure{X<:AbstractArray,P<:AbstractVector} error("number of rows of `support` and `p` must be equal") sum(p) ≈ 1 || error("`p` must sum to 1") + all(p .>= 0) || + error("values of `p` must be greater of equal than 0") return new{X,P}(support, p) end end From 7d2924d355b7cc02feeb6f26ac4c9121434c4e7b Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 15:15:02 -0300 Subject: [PATCH 10/21] Created tests for FiniteDiscreteMeasure --- test/finitediscretemeasure.jl | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 test/finitediscretemeasure.jl diff --git a/test/finitediscretemeasure.jl b/test/finitediscretemeasure.jl new file mode 100644 index 00000000..55ad9be2 --- /dev/null +++ b/test/finitediscretemeasure.jl @@ -0,0 +1,56 @@ +using Distributions: FiniteSupport +using OptimalTransport +using Distributions +using Random + + +Random.seed!(100) + +@testset "finitediscretemeasure.jl" begin + @testset "Univariate Finite Discrete Measure" begin + n = 100 + μsupp = rand(n) + νsupp = rand(n,1) + μ = FiniteDiscreteMeasure(μsupp) + ν = FiniteDiscreteMeasure(νsupp, rand(n)) + # check if it assigns equal probabilities to all entries + @test μ.p ≈ ones(n) ./ n + @test probs(μ) ≈ ones(n) ./ n + # check if it probabilities sum to 1 + @test sum(ν.p) ≈ 1 + @test sum(probs(ν)) ≈ 1 + # check if probabilities are all positive (non-negative) + @test all(ν.p .>= 0) + @test all(probs(ν) .>= 0) + # check if it assigns to DiscreteNonParametric when Vector/Matrix is 1D + @test typeof(μ) <: :DiscreteNonParametric + @test typeof(ν) <: :DiscreteNonParametric + # check if support is correctly assinged + @test μsupp == μ.support + @test μsupp == support(μ) + @test νsupp == ν.support + @test νsupp == support(ν) + end + @testset "Multivariate Finite Discrete Measure" begin + n = 10 + m = 3 + μsupp = rand(n, m) + νsupp = rand(n, m) + μ = FiniteDiscreteMeasure(μsupp) + ν = FiniteDiscreteMeasure(νsupp, rand(n)) + # check if it assigns equal probabilities to all entries + @test μ.p ≈ ones(n) ./ n + @test probs(μ) ≈ ones(n) ./ n + # check if it probabilities sum to 1 + @test sum(ν.p) ≈ 1 + @test sum(probs(ν)) ≈ 1 + # check if probabilities are all positive (non-negative) + @test all(ν.p .>= 0) + @test all(probs(ν) .>= 0) + # check if support is correctly assinged + @test μsupp == μ.support + @test μsupp == support(μ) + @test νsupp == ν.support + @test νsupp == support(ν) + end +end \ No newline at end of file From 7cf44a69d48ba61eaacf03e988052505aa12c6ff Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 15:45:06 -0300 Subject: [PATCH 11/21] Added tests for sinkhorn divergence and finite discrete measure --- src/OptimalTransport.jl | 5 +++-- test/entropic.jl | 11 +++-------- test/finitediscretemeasure.jl | 12 ++++++------ test/runtests.jl | 3 +++ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 970d5596..be14d4d7 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -4,6 +4,7 @@ module OptimalTransport +using LinearAlgebra: AbstractMatrix using Distances using LinearAlgebra using IterativeSolvers, SparseArrays @@ -50,8 +51,8 @@ function FiniteDiscreteMeasure(support::AbstractArray, p::AbstractVector) P = sum(p) if size(support, 2) == 1 return P ≈ 1 ? - DiscreteNonParametric(support, p) : - DiscreteNonParametric(support, p ./ P) + DiscreteNonParametric(vec(support), p) : + DiscreteNonParametric(vec(support), p ./ P) else return P ≈ 1 ? FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) : diff --git a/test/entropic.jl b/test/entropic.jl index 781b1c54..33abab84 100644 --- a/test/entropic.jl +++ b/test/entropic.jl @@ -4,9 +4,6 @@ using Distances using PythonOT: PythonOT using Distributions -using PyCall -PYCALLPOT = pyimport("ot") - using Random using Test @@ -222,8 +219,8 @@ Random.seed!(100) @testset "example" begin # create distributions N = 100 - μ = DiscreteNonParametric(rand(N), ones(N) / N) - ν = DiscreteNonParametric(rand(N), ones(N) / N) + μ = FiniteDiscreteMeasure(rand(N), rand(N)) + ν = FiniteDiscreteMeasure(rand(N), rand(N)) for (ε, metric) in Iterators.product( [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] @@ -232,9 +229,7 @@ Random.seed!(100) @test sinkhorn_divergence(metric, ν, ν, ε) ≈ 0.0 sd = sinkhorn_divergence(metric, μ, ν, ε) - sd_pot = PYCALLPOT.bregman.empirical_sinkhorn_divergence( - reshape(μ.support, :, 1), reshape(ν.support, :, 1), ε; metric=metric - )[1] + sd_pot = POT.empirical_sinkhorn_divergence(μ.support, ν.support, ε; metric=metric)[1] @test sd ≈ sd_pot rtol = 1e-5 end end diff --git a/test/finitediscretemeasure.jl b/test/finitediscretemeasure.jl index 55ad9be2..9a22c317 100644 --- a/test/finitediscretemeasure.jl +++ b/test/finitediscretemeasure.jl @@ -23,13 +23,13 @@ Random.seed!(100) @test all(ν.p .>= 0) @test all(probs(ν) .>= 0) # check if it assigns to DiscreteNonParametric when Vector/Matrix is 1D - @test typeof(μ) <: :DiscreteNonParametric - @test typeof(ν) <: :DiscreteNonParametric + @test typeof(μ) <: DiscreteNonParametric + @test typeof(ν) <: DiscreteNonParametric # check if support is correctly assinged - @test μsupp == μ.support - @test μsupp == support(μ) - @test νsupp == ν.support - @test νsupp == support(ν) + @test sort(μsupp) == μ.support + @test sort(μsupp) == support(μ) + @test sort(vec(νsupp)) == ν.support + @test sort(vec(νsupp)) == support(ν) end @testset "Multivariate Finite Discrete Measure" begin n = 10 diff --git a/test/runtests.jl b/test/runtests.jl index ab84a013..51b43583 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,6 +23,9 @@ const GROUP = get(ENV, "GROUP", "All") @safetestset "Wasserstein distance" begin include("wasserstein.jl") end + @safetestset "Finite Discrete Measure" begin + include("finitediscretemeasure.jl") + end end # CUDA requires Julia >= 1.6 From 4764b007f1fac5655f45256644bc36fa5c5374cf Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 16:56:00 -0300 Subject: [PATCH 12/21] Fixed the code for creating cost matrices in the sinkhorn_divergence --- src/OptimalTransport.jl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index be14d4d7..5f5648d7 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -884,16 +884,29 @@ function sinkhorn_divergence( kwargs..., ) - return sinkhorn_divergence( - pairwise(c, μ.support, ν.support), - pairwise(c, μ.support, μ.support), - pairwise(c, ν.support, ν.support), - μ, - ν, - ε; - regularization=regularization, - kwargs... - ) + if typeof(c) <: PreMetric + return sinkhorn_divergence( + pairwise(c, μ.support, ν.support, dims=1), + pairwise(c, μ.support, μ.support, dims=1), + pairwise(c, ν.support, ν.support, dims=1), + μ, + ν, + ε; + regularization=regularization, + kwargs... + ) + else + return sinkhorn_divergence( + pairwise(c, eachrow(μ.support), eachrow(ν.support)), + pairwise(c, eachrow(μ.support), eachrow(μ.support)), + pairwise(c, eachrow(ν.support), eachrow(ν.support)), + μ, + ν, + ε; + regularization=regularization, + kwargs... + ) + end end From 98784c5b96164f28e6324f15cdfbd675a6ef7866 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 17:55:12 -0300 Subject: [PATCH 13/21] Added costmatrix.jl to tests --- src/OptimalTransport.jl | 103 +++++++++++++++++++++++++--------- test/costmatrix.jl | 49 ++++++++++++++++ test/entropic.jl | 26 +++++++-- test/finitediscretemeasure.jl | 2 +- 4 files changed, 150 insertions(+), 30 deletions(-) create mode 100644 test/costmatrix.jl diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 5f5648d7..964ef7e5 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -22,6 +22,7 @@ export sinkhorn_divergence export quadreg export ot_cost, ot_plan, wasserstein, squared2wasserstein export FiniteDiscreteMeasure +export cost_matrix const MOI = MathOptInterface @@ -76,6 +77,72 @@ end Distributions.support(d::FiniteDiscreteMeasure) = d.support Distributions.probs(d::FiniteDiscreteMeasure) = d.p +""" + cost_matrix( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric} + ) + +Compute cost matrix from Finite Discrete Measures `μ` and `ν` using cost function `c`. + +Note that the use of functions such as `SqEuclidean()` from `Distances.jl` have +better performance than generic functions. Thus, it's prefered to use +`cost_matrix(SqEuclidean(), μ, ν)`, instead of `cost_matrix((x,y)->sum((x-y).^2), μ, ν)` +or even `cost_matrix(sqeuclidean, μ, ν)`. + +For custom cost functions, it is necessary to guarantee that the function `c` works +on vectors, i.e., if one wants to compute the squared Euclidean distance, +the one must define `c(x,y) = sum(x - y).^2`. + +# Example +```julia +μ = FiniteDiscreteMeasure(rand(10),rand(10)) +ν = FiniteDiscreteMeasure(rand(8)) +c = TotalVariation() +C = cost_matrix(c, μ, ν) +``` +""" + +""" + cost_matrix( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + symmetric = false + ) + +Compute cost matrix from Finite Discrete Measures `μ` to itself using cost function `c`. +If the cost function is symmetric, set the argument `symmetric` to `true` in order +to increase performance. +""" +function cost_matrix( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + symmetric = false +) + if typeof(c) <: PreMetric && size(μ.support,2) == 1 + return pairwise(c, μ.support) + elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 + return pairwise(c, μ.support, dims=1) + else + return pairwise(c, eachrow(μ.support), symmetric=symmetric) + end +end + +function cost_matrix( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric} +) + if typeof(c) <: PreMetric && size(μ.support,2) == 1 + return pairwise(c, μ.support, ν.support) + elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 + return pairwise(c, μ.support, ν.support, dims=1) + else + return pairwise(c, eachrow(μ.support), eachrow(ν.support)) + end +end + dot_matwise(x::AbstractMatrix, y::AbstractMatrix) = dot(x, y) function dot_matwise(x::AbstractArray, y::AbstractMatrix) xmat = reshape(x, size(x, 1) * size(x, 2), :) @@ -884,29 +951,16 @@ function sinkhorn_divergence( kwargs..., ) - if typeof(c) <: PreMetric - return sinkhorn_divergence( - pairwise(c, μ.support, ν.support, dims=1), - pairwise(c, μ.support, μ.support, dims=1), - pairwise(c, ν.support, ν.support, dims=1), - μ, - ν, - ε; - regularization=regularization, - kwargs... - ) - else - return sinkhorn_divergence( - pairwise(c, eachrow(μ.support), eachrow(ν.support)), - pairwise(c, eachrow(μ.support), eachrow(μ.support)), - pairwise(c, eachrow(ν.support), eachrow(ν.support)), - μ, - ν, - ε; - regularization=regularization, - kwargs... - ) - end + return sinkhorn_divergence( + cost_matrix(c, μ, ν), + cost_matrix(c, μ, μ), + cost_matrix(c, ν, ν), + μ, + ν, + ε; + regularization=regularization, + kwargs... + ) end @@ -973,5 +1027,4 @@ function sinkhorn_divergence( end - -end +end \ No newline at end of file diff --git a/test/costmatrix.jl b/test/costmatrix.jl new file mode 100644 index 00000000..a75e9446 --- /dev/null +++ b/test/costmatrix.jl @@ -0,0 +1,49 @@ +using Distributions: DiscreteNonParametric, FiniteSupport +using OptimalTransport +using Distributions +using Random +using StatsBase + + +Random.seed!(7) + +@testset "costmatrix.jl" begin + @testset "Creating cost matrices from vectors" begin + N = 15 + M = 20 + μ = FiniteDiscreteMeasure(rand(N), rand(N)) + ν = FiniteDiscreteMeasure(rand(M), rand(M)) + c = sum(x-y).^2 + C1 = cost_matrix(SqEuclidean(), μ, ν) + C2 = cost_matrix(sqeuclidean, μ, ν) + C3 = cost_matrix(c, μ, ν) + @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support) + @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support) + @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support) + end + + @testset "Creating cost matrices from matrices" begin + N = 10 + M = 8 + μ = FiniteDiscreteMeasure(rand(N,3), rand(N)) + ν = FiniteDiscreteMeasure(rand(M,3), rand(M)) + c = sum(x-y).^2 + C1 = cost_matrix(SqEuclidean(), μ, ν) + C2 = cost_matrix(sqeuclidean, μ, ν) + C3 = cost_matrix(c, μ, ν) + @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + end + @testset "Creating cost matrices from μ to itself" begin + N = 10 + μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) + c = sum(abs.(x-y)) + C1 = cost_matrix(Euclidean(), μ, symmetric=true) + C2 = cost_matrix(euclidean, μ, symmetric=true) + C3 = cost_matrix(c, μ) + @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + end +end \ No newline at end of file diff --git a/test/entropic.jl b/test/entropic.jl index 33abab84..7481e435 100644 --- a/test/entropic.jl +++ b/test/entropic.jl @@ -216,11 +216,29 @@ Random.seed!(100) end @testset "sinkhorn divergence" begin - @testset "example" begin + @testset "using cost function" begin + # create distributions + N = 20 + μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) + ν = FiniteDiscreteMeasure(rand(N,2), rand(N)) + + for (ε, metric) in Iterators.product( + [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] + ) + @test sinkhorn_divergence(metric, μ, μ, ε) ≈ 0.0 + @test sinkhorn_divergence(metric, ν, ν, ε) ≈ 0.0 + + sd = sinkhorn_divergence(metric, μ, ν, ε) + sd_pot = POT.empirical_sinkhorn_divergence(μ.support, ν.support, ε; metric=metric)[1] + @test sd ≈ sd_pot rtol = 1e-5 + end + end + @testset "using cost matrices" begin # create distributions - N = 100 - μ = FiniteDiscreteMeasure(rand(N), rand(N)) - ν = FiniteDiscreteMeasure(rand(N), rand(N)) + N = 20 + μ = FiniteDiscreteMeasure(rand(N,3), rand(N)) + ν = FiniteDiscreteMeasure(rand(N,3), rand(N)) + Cμν = pairwise(sqeuclidean, support(μ),support(ν)) for (ε, metric) in Iterators.product( [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] diff --git a/test/finitediscretemeasure.jl b/test/finitediscretemeasure.jl index 9a22c317..049aba2b 100644 --- a/test/finitediscretemeasure.jl +++ b/test/finitediscretemeasure.jl @@ -1,4 +1,4 @@ -using Distributions: FiniteSupport +using Distributions: DiscreteNonParametric using OptimalTransport using Distributions using Random From 1fb0fc1ded61bf3829af46a38913cd34feda18af Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 19:00:05 -0300 Subject: [PATCH 14/21] Fixed docstring for costmatrix --- src/OptimalTransport.jl | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 964ef7e5..a6b2bcf2 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -68,7 +68,7 @@ Construct a finite discrete probability measure with support `support` and equal function FiniteDiscreteMeasure(support::AbstractArray) p = ones(size(support)[1]) ./ size(support)[1] if size(support, 2) == 1 - return DiscreteNonParametric(support, p) + return DiscreteNonParametric(vec(support), p) else return FiniteDiscreteMeasure{typeof(support),typeof(p)}(support, p) end @@ -93,7 +93,7 @@ or even `cost_matrix(sqeuclidean, μ, ν)`. For custom cost functions, it is necessary to guarantee that the function `c` works on vectors, i.e., if one wants to compute the squared Euclidean distance, -the one must define `c(x,y) = sum(x - y).^2`. +the one must define `c(x,y) = sum((x - y).^2)`. # Example ```julia @@ -103,6 +103,19 @@ c = TotalVariation() C = cost_matrix(c, μ, ν) ``` """ +function cost_matrix( + c, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric} +) + if typeof(c) <: PreMetric && size(μ.support,2) == 1 + return pairwise(c, μ.support, ν.support) + elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 + return pairwise(c, μ.support, ν.support, dims=1) + else + return pairwise(c, eachrow(μ.support), eachrow(ν.support)) + end +end """ cost_matrix( @@ -117,7 +130,7 @@ to increase performance. """ function cost_matrix( c, - μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, + μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}; symmetric = false ) if typeof(c) <: PreMetric && size(μ.support,2) == 1 @@ -129,19 +142,6 @@ function cost_matrix( end end -function cost_matrix( - c, - μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, - ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric} -) - if typeof(c) <: PreMetric && size(μ.support,2) == 1 - return pairwise(c, μ.support, ν.support) - elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 - return pairwise(c, μ.support, ν.support, dims=1) - else - return pairwise(c, eachrow(μ.support), eachrow(ν.support)) - end -end dot_matwise(x::AbstractMatrix, y::AbstractMatrix) = dot(x, y) function dot_matwise(x::AbstractArray, y::AbstractMatrix) @@ -953,8 +953,8 @@ function sinkhorn_divergence( return sinkhorn_divergence( cost_matrix(c, μ, ν), - cost_matrix(c, μ, μ), - cost_matrix(c, ν, ν), + cost_matrix(c, μ), + cost_matrix(c, ν), μ, ν, ε; From 808d6acc4185c2dfa3cc62716e789d7c3a10ff12 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 19:00:26 -0300 Subject: [PATCH 15/21] Fixed errors in the tests --- test/costmatrix.jl | 15 +++++++------ test/entropic.jl | 53 ++++++++++++++++++++++++++++++++++------------ test/runtests.jl | 4 ++++ 3 files changed, 51 insertions(+), 21 deletions(-) diff --git a/test/costmatrix.jl b/test/costmatrix.jl index a75e9446..cf754754 100644 --- a/test/costmatrix.jl +++ b/test/costmatrix.jl @@ -1,6 +1,7 @@ -using Distributions: DiscreteNonParametric, FiniteSupport +using Distributions: DiscreteNonParametric using OptimalTransport using Distributions +using Distances using Random using StatsBase @@ -13,7 +14,7 @@ Random.seed!(7) M = 20 μ = FiniteDiscreteMeasure(rand(N), rand(N)) ν = FiniteDiscreteMeasure(rand(M), rand(M)) - c = sum(x-y).^2 + c(x,y) = sum((x-y).^2) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) @@ -27,7 +28,7 @@ Random.seed!(7) M = 8 μ = FiniteDiscreteMeasure(rand(N,3), rand(N)) ν = FiniteDiscreteMeasure(rand(M,3), rand(M)) - c = sum(x-y).^2 + c(x,y) = sum((x-y).^2) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) @@ -38,12 +39,12 @@ Random.seed!(7) @testset "Creating cost matrices from μ to itself" begin N = 10 μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) - c = sum(abs.(x-y)) + c(x,y) = sum((x-y).^2) C1 = cost_matrix(Euclidean(), μ, symmetric=true) C2 = cost_matrix(euclidean, μ, symmetric=true) C3 = cost_matrix(c, μ) - @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) - @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) - @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C1 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) + @test C2 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) + @test C3 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) end end \ No newline at end of file diff --git a/test/entropic.jl b/test/entropic.jl index 7481e435..376d0f1e 100644 --- a/test/entropic.jl +++ b/test/entropic.jl @@ -216,11 +216,12 @@ Random.seed!(100) end @testset "sinkhorn divergence" begin - @testset "using cost function" begin + @testset "univariate exmaples" begin # create distributions N = 20 - μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) - ν = FiniteDiscreteMeasure(rand(N,2), rand(N)) + M = 10 + μ = FiniteDiscreteMeasure(rand(N), rand(N)) + ν = FiniteDiscreteMeasure(rand(M)) for (ε, metric) in Iterators.product( [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] @@ -228,17 +229,29 @@ Random.seed!(100) @test sinkhorn_divergence(metric, μ, μ, ε) ≈ 0.0 @test sinkhorn_divergence(metric, ν, ν, ε) ≈ 0.0 - sd = sinkhorn_divergence(metric, μ, ν, ε) - sd_pot = POT.empirical_sinkhorn_divergence(μ.support, ν.support, ε; metric=metric)[1] - @test sd ≈ sd_pot rtol = 1e-5 + sd_c = sinkhorn_divergence(metric, μ, ν, ε) + + # calculating cost matrices to use in POT.sinkhorn2 + Cμν = cost_matrix(metric, μ, ν) + Cμ = cost_matrix(metric, μ, symmetric=true) + Cν = cost_matrix(metric, ν, symmetric=true) + + sd_C = sinkhorn_divergence(Cμν, Cμ, Cμ, μ, ν, ε) + + # the empirical_sinkhorn_divergence returns an error if the weights are not all equal + # so instead, it's more realiable to calculate using sinkhorn2 + sd_pot = POT.sinkhorn2(μ.p, ν.p, Cμν, ε) - (POT.sinkhorn2(μ.p, μ.p, Cμ, ε) + POT.sinkhorn2(ν.p, ν.p, Cν, ε))/2 + + @test sd_c ≈ sd_pot[1] rtol = 1e-5 + @test sd_C ≈ sd_pot[1] rtol = 1e-5 end end - @testset "using cost matrices" begin + @testset "multivariate exmaples" begin # create distributions - N = 20 - μ = FiniteDiscreteMeasure(rand(N,3), rand(N)) - ν = FiniteDiscreteMeasure(rand(N,3), rand(N)) - Cμν = pairwise(sqeuclidean, support(μ),support(ν)) + N = 30 + M = 17 + μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) + ν = FiniteDiscreteMeasure(rand(M,2), rand(M)) for (ε, metric) in Iterators.product( [0.1, 1.0, 10.0], [sqeuclidean, euclidean, totalvariation] @@ -246,9 +259,21 @@ Random.seed!(100) @test sinkhorn_divergence(metric, μ, μ, ε) ≈ 0.0 @test sinkhorn_divergence(metric, ν, ν, ε) ≈ 0.0 - sd = sinkhorn_divergence(metric, μ, ν, ε) - sd_pot = POT.empirical_sinkhorn_divergence(μ.support, ν.support, ε; metric=metric)[1] - @test sd ≈ sd_pot rtol = 1e-5 + sd_c = sinkhorn_divergence(metric, μ, ν, ε) + + # calculating cost matrices to use in POT.sinkhorn2 + Cμν = cost_matrix(metric, μ, ν) + Cμ = cost_matrix(metric, μ, symmetric=true) + Cν = cost_matrix(metric, ν, symmetric=true) + + sd_C = sinkhorn_divergence(Cμν, Cμ, Cν, μ, ν, ε) + + # the empirical_sinkhorn_divergence returns an error if the weights are not all equal + # so instead, it's more realiable to calculate using sinkhorn2 + sd_pot = POT.sinkhorn2(μ.p, ν.p, Cμν, ε) - (POT.sinkhorn2(μ.p, μ.p, Cμ, ε) + POT.sinkhorn2(ν.p, ν.p, Cν, ε))/2 + + @test sd_c ≈ sd_pot[1] rtol = 1e-5 + @test sd_C ≈ sd_pot[1] rtol = 1e-5 end end end diff --git a/test/runtests.jl b/test/runtests.jl index 51b43583..4615bbb3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,4 @@ +using LinearAlgebra: symmetric using OptimalTransport using Pkg: Pkg using SafeTestsets @@ -26,6 +27,9 @@ const GROUP = get(ENV, "GROUP", "All") @safetestset "Finite Discrete Measure" begin include("finitediscretemeasure.jl") end + @safetestset "Cost matrix computation" begin + include("costmatrix.jl") + end end # CUDA requires Julia >= 1.6 From 34153862045b31a86ca6fd7a72beaf21031109a7 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Thu, 3 Jun 2021 19:08:15 -0300 Subject: [PATCH 16/21] Minor fixes in the tests --- test/costmatrix.jl | 8 ++++---- test/entropic.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/costmatrix.jl b/test/costmatrix.jl index cf754754..610dc08b 100644 --- a/test/costmatrix.jl +++ b/test/costmatrix.jl @@ -39,12 +39,12 @@ Random.seed!(7) @testset "Creating cost matrices from μ to itself" begin N = 10 μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) - c(x,y) = sum((x-y).^2) + c(x,y) = sqrt(sum((x-y).^2)) C1 = cost_matrix(Euclidean(), μ, symmetric=true) C2 = cost_matrix(euclidean, μ, symmetric=true) C3 = cost_matrix(c, μ) - @test C1 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) - @test C2 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) - @test C3 ≈ pairwise(SqEuclidean(), μ.support, μ.support, dims=1) + @test C1 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) + @test C2 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) + @test C3 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) end end \ No newline at end of file diff --git a/test/entropic.jl b/test/entropic.jl index 376d0f1e..7c53796a 100644 --- a/test/entropic.jl +++ b/test/entropic.jl @@ -236,7 +236,7 @@ Random.seed!(100) Cμ = cost_matrix(metric, μ, symmetric=true) Cν = cost_matrix(metric, ν, symmetric=true) - sd_C = sinkhorn_divergence(Cμν, Cμ, Cμ, μ, ν, ε) + sd_C = sinkhorn_divergence(Cμν, Cμ, Cν, μ, ν, ε) # the empirical_sinkhorn_divergence returns an error if the weights are not all equal # so instead, it's more realiable to calculate using sinkhorn2 From 933c106c87a59cfb0278d2358a3e03ceabc48cb1 Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 4 Jun 2021 16:27:55 -0300 Subject: [PATCH 17/21] Formatted code --- src/OptimalTransport.jl | 2 +- src/utils.jl | 24 +++++++++++------------- test/utils.jl | 30 +++++++++++++++--------------- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index 4c644aa6..a4a5ce37 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -32,4 +32,4 @@ include("wasserstein.jl") include("entropic.jl") include("quadratic.jl") -end \ No newline at end of file +end diff --git a/src/utils.jl b/src/utils.jl index 5c26b0d3..6745f2fd 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -83,13 +83,13 @@ C = cost_matrix(c, μ, ν) """ function cost_matrix( c, - μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}, - ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric} + μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric}, + ν::Union{FiniteDiscreteMeasure,DiscreteNonParametric}, ) - if typeof(c) <: PreMetric && size(μ.support,2) == 1 + if typeof(c) <: PreMetric && size(μ.support, 2) == 1 return pairwise(c, μ.support, ν.support) - elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 - return pairwise(c, μ.support, ν.support, dims=1) + elseif typeof(c) <: PreMetric && size(μ.support, 2) > 1 + return pairwise(c, μ.support, ν.support; dims=1) else return pairwise(c, eachrow(μ.support), eachrow(ν.support)) end @@ -107,15 +107,13 @@ If the cost function is symmetric, set the argument `symmetric` to `true` in ord to increase performance. """ function cost_matrix( - c, - μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric}; - symmetric = false + c, μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric}; symmetric=false ) - if typeof(c) <: PreMetric && size(μ.support,2) == 1 + if typeof(c) <: PreMetric && size(μ.support, 2) == 1 return pairwise(c, μ.support) - elseif typeof(c) <: PreMetric && size(μ.support,2) > 1 - return pairwise(c, μ.support, dims=1) + elseif typeof(c) <: PreMetric && size(μ.support, 2) > 1 + return pairwise(c, μ.support; dims=1) else - return pairwise(c, eachrow(μ.support), symmetric=symmetric) + return pairwise(c, eachrow(μ.support); symmetric=symmetric) end -end \ No newline at end of file +end diff --git a/test/utils.jl b/test/utils.jl index c83301af..ccfea6c7 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -97,14 +97,14 @@ Random.seed!(100) x2, y2 .* hcat(rand(), ones(1, size(y2, 2) - 1)) ) end - + @testset "costmatrix.jl" begin @testset "Creating cost matrices from vectors" begin N = 15 M = 20 μ = FiniteDiscreteMeasure(rand(N), rand(N)) ν = FiniteDiscreteMeasure(rand(M), rand(M)) - c(x,y) = sum((x-y).^2) + c(x, y) = sum((x - y) .^ 2) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) @@ -116,26 +116,26 @@ Random.seed!(100) @testset "Creating cost matrices from matrices" begin N = 10 M = 8 - μ = FiniteDiscreteMeasure(rand(N,3), rand(N)) - ν = FiniteDiscreteMeasure(rand(M,3), rand(M)) - c(x,y) = sum((x-y).^2) + μ = FiniteDiscreteMeasure(rand(N, 3), rand(N)) + ν = FiniteDiscreteMeasure(rand(M, 3), rand(M)) + c(x, y) = sum((x - y) .^ 2) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) - @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) - @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) - @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support, dims=1) + @test C1 ≈ pairwise(SqEuclidean(), μ.support, ν.support; dims=1) + @test C2 ≈ pairwise(SqEuclidean(), μ.support, ν.support; dims=1) + @test C3 ≈ pairwise(SqEuclidean(), μ.support, ν.support; dims=1) end @testset "Creating cost matrices from μ to itself" begin N = 10 - μ = FiniteDiscreteMeasure(rand(N,2), rand(N)) - c(x,y) = sqrt(sum((x-y).^2)) - C1 = cost_matrix(Euclidean(), μ, symmetric=true) - C2 = cost_matrix(euclidean, μ, symmetric=true) + μ = FiniteDiscreteMeasure(rand(N, 2), rand(N)) + c(x, y) = sqrt(sum((x - y) .^ 2)) + C1 = cost_matrix(Euclidean(), μ; symmetric=true) + C2 = cost_matrix(euclidean, μ; symmetric=true) C3 = cost_matrix(c, μ) - @test C1 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) - @test C2 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) - @test C3 ≈ pairwise(Euclidean(), μ.support, μ.support, dims=1) + @test C1 ≈ pairwise(Euclidean(), μ.support, μ.support; dims=1) + @test C2 ≈ pairwise(Euclidean(), μ.support, μ.support; dims=1) + @test C3 ≈ pairwise(Euclidean(), μ.support, μ.support; dims=1) end end end From f952be87835008ffb62d2a289fd1121838a8e23f Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Fri, 11 Jun 2021 21:52:12 -0300 Subject: [PATCH 18/21] Formatted code --- src/utils.jl | 2 +- test/utils.jl | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 66dd5d41..586f0dce 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -168,4 +168,4 @@ function cost_matrix( else return pairwise(c, μ.support; symmetric=symmetric) end -end \ No newline at end of file +end diff --git a/test/utils.jl b/test/utils.jl index 7dac9d4c..7d6f45a7 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -147,20 +147,19 @@ Random.seed!(100) end end @testset "costmatrix.jl" begin - @testset "Creating cost matrices from vectors" begin n = 100 m = 80 μsupp = rand(n) νsupp = rand(m) μprobs = normalize!(rand(n), 1) - μ = OptimalTransport.discretemeasure(μsupp,μprobs) + μ = OptimalTransport.discretemeasure(μsupp, μprobs) ν = OptimalTransport.discretemeasure(νsupp) c(x, y) = sum((x - y) .^ 2) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) - C = pairwise(SqEuclidean(), vcat(μ.support...), vcat(ν.support...)) + C = pairwise(SqEuclidean(), vcat(μ.support...), vcat(ν.support...)) @test C1 ≈ C @test C2 ≈ C @test C3 ≈ C @@ -178,7 +177,7 @@ Random.seed!(100) C1 = cost_matrix(SqEuclidean(), μ, ν) C2 = cost_matrix(sqeuclidean, μ, ν) C3 = cost_matrix(c, μ, ν) - C = pairwise(SqEuclidean(), vcat(μ.support'...), vcat(ν.support'...), dims=1) + C = pairwise(SqEuclidean(), vcat(μ.support'...), vcat(ν.support'...); dims=1) @test C1 ≈ C @test C2 ≈ C @test C3 ≈ C @@ -193,7 +192,7 @@ Random.seed!(100) C1 = cost_matrix(Euclidean(), μ; symmetric=true) C2 = cost_matrix(euclidean, μ; symmetric=true) C3 = cost_matrix(c, μ) - C = pairwise(Euclidean(), vcat(μ.support'...), vcat(μ.support'...); dims=1) + C = pairwise(Euclidean(), vcat(μ.support'...), vcat(μ.support'...); dims=1) @test C1 ≈ C @test C2 ≈ C @test C3 ≈ C From 6d812e32bc8de4d59d5c5f169a02c4d99bd67f0a Mon Sep 17 00:00:00 2001 From: Davi Barreira Date: Sat, 12 Jun 2021 08:16:24 -0300 Subject: [PATCH 19/21] Added costmatrix.jl to docs --- docs/src/index.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index 40856a58..21afb8f3 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -36,3 +36,8 @@ sinkhorn_unbalanced2 ```@docs quadreg ``` + +## Utilities +```@docs +cost_matrix +``` From 97f5d77f2df321dda4f5349b6bd61621d08fd3eb Mon Sep 17 00:00:00 2001 From: Davi Sales Barreira Date: Sat, 12 Jun 2021 08:48:55 -0300 Subject: [PATCH 20/21] Update Project.toml Co-authored-by: David Widmann --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5b1c8449..2315ff68 100644 --- a/Project.toml +++ b/Project.toml @@ -35,7 +35,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa" -PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" [targets] test = ["ForwardDiff", "Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip", "HCubature"] From fd54e9f0c2f4acabdee9d31f41859654c2628387 Mon Sep 17 00:00:00 2001 From: Davi Sales Barreira Date: Sat, 12 Jun 2021 08:51:55 -0300 Subject: [PATCH 21/21] Update src/OptimalTransport.jl Co-authored-by: David Widmann --- src/OptimalTransport.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl index b8489095..fb96e23b 100644 --- a/src/OptimalTransport.jl +++ b/src/OptimalTransport.jl @@ -4,8 +4,6 @@ module OptimalTransport -using PDMats: length -using LinearAlgebra: AbstractMatrix using Distances using LinearAlgebra using IterativeSolvers, SparseArrays