JuliaOptimalTransport · davibarreira · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021
diff --git a/Project.toml b/Project.toml
@@ -35,6 +35,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tulip = "6dd1b50a-3aae-11e9-10b5-ef983d2400fa"
+PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 
 [targets]
 test = ["ForwardDiff", "Pkg", "PythonOT", "Random", "SafeTestsets", "Test", "Tulip", "HCubature"]
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -36,3 +36,8 @@ sinkhorn_unbalanced2
 ```@docs
 quadreg
 ```
+
+## Utilities
+```@docs
+cost_matrix
+```
diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl
@@ -4,6 +4,8 @@
 
 module OptimalTransport
 
+using PDMats: length
+using LinearAlgebra: AbstractMatrix
 using Distances
 using LinearAlgebra
 using IterativeSolvers, SparseArrays
@@ -18,8 +20,10 @@ export sinkhorn, sinkhorn2
 export emd, emd2
 export sinkhorn_stabilized, sinkhorn_stabilized_epsscaling, sinkhorn_barycenter
 export sinkhorn_unbalanced, sinkhorn_unbalanced2
+export sinkhorn_divergence
 export quadreg
 export ot_cost, ot_plan, wasserstein, squared2wasserstein
+export cost_matrix
 
 const MOI = MathOptInterface
 

diff --git a/src/utils.jl b/src/utils.jl
@@ -106,3 +106,66 @@ end
 
 Distributions.support(d::FiniteDiscreteMeasure) = d.support
 Distributions.probs(d::FiniteDiscreteMeasure) = d.p
+
+"""
+ cost_matrix(
+ c,
+ μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric},
+ ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric}
+ )
+
+Compute cost matrix from Finite Discrete Measures `μ` and `ν` using cost function `c`.
+
+Note that the use of functions such as `SqEuclidean()` from `Distances.jl` have
+better performance than generic functions. Thus, it's prefered to use
+`cost_matrix(SqEuclidean(), μ, ν)`, instead of `cost_matrix((x,y)->sum((x-y).^2), μ, ν)`
+or even `cost_matrix(sqeuclidean, μ, ν)`.
+
+For custom cost functions, it is necessary to guarantee that the function `c` works
+on vectors, i.e., if one wants to compute the squared Euclidean distance,
+the one must define `c(x,y) = sum((x - y).^2)`.
+
+# Example
+```julia
+μ = discretemeasure(rand(10),normalize!(rand(10),1))
+ν = discretemeasure(rand(8))
+c = TotalVariation()
+C = cost_matrix(c, μ, ν)
+```
+"""
+function cost_matrix(
+ c,
+ μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric},
+ ν::Union{FiniteDiscreteMeasure,DiscreteNonParametric},
+)
+ if typeof(c) <: PreMetric && length(μ.support[1]) == 1
+ return pairwise(c, vcat(μ.support...), vcat(ν.support...))
+ elseif typeof(c) <: PreMetric && length(μ.support[1]) > 1
+ return pairwise(c, vcat(μ.support'...), vcat(ν.support'...); dims=1)
+ else
+ return pairwise(c, μ.support, ν.support)
+ end
+end
+
+"""
+ cost_matrix(
+ c,
+ μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric},
+ symmetric = false
+ )
+
+Compute cost matrix from Finite Discrete Measures `μ` to itself using cost function `c`.
+If the cost function is symmetric, set the argument `symmetric` to `true` in order
+to increase performance.
+"""
+function cost_matrix(
+ c, μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric}; symmetric=false
+)
+ if typeof(c) <: PreMetric && length(μ.support[1]) == 1
+ return pairwise(c, vcat(μ.support...))
+ elseif typeof(c) <: PreMetric && length(μ.support[1]) > 1
+ return pairwise(c, vcat(μ.support'...); dims=1)
+ else
+ return pairwise(c, μ.support; symmetric=symmetric)
+ end
+end
diff --git a/test/entropic/sinkhorn.jl b/test/entropic/sinkhorn.jl
@@ -4,6 +4,7 @@ using Distances
 using ForwardDiff
 using LogExpFunctions
 using PythonOT: PythonOT
+using Distributions
 
 using LinearAlgebra
 using Random

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,3 +1,4 @@
+using LinearAlgebra: symmetric
 using OptimalTransport
 using Pkg: Pkg
 using SafeTestsets

diff --git a/test/utils.jl b/test/utils.jl
@@ -1,5 +1,7 @@
 using OptimalTransport
 
+using Distributions: DiscreteNonParametric
+using Distances
 using LinearAlgebra
 using Random
 using Test
@@ -144,4 +146,56 @@ Random.seed!(100)
  @test νsupp == support(ν)
  end
  end
+ @testset "costmatrix.jl" begin
+ @testset "Creating cost matrices from vectors" begin
+ n = 100
+ m = 80
+ μsupp = rand(n)
+ νsupp = rand(m)
+ μprobs = normalize!(rand(n), 1)
+ μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+ ν = OptimalTransport.discretemeasure(νsupp)
+ c(x, y) = sum((x - y) .^ 2)
+ C1 = cost_matrix(SqEuclidean(), μ, ν)
+ C2 = cost_matrix(sqeuclidean, μ, ν)
+ C3 = cost_matrix(c, μ, ν)
+ C = pairwise(SqEuclidean(), vcat(μ.support...), vcat(ν.support...))
+ @test C1 ≈ C
+ @test C2 ≈ C
+ @test C3 ≈ C
+ end
+
+ @testset "Creating cost matrices from matrices" begin
+ n = 10
+ m = 3
+ μsupp = [rand(m) for i in 1:n]
+ νsupp = [rand(m) for i in 1:n]
+ μprobs = normalize!(rand(n), 1)
+ μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+ ν = OptimalTransport.discretemeasure(νsupp)
+ c(x, y) = sum((x - y) .^ 2)
+ C1 = cost_matrix(SqEuclidean(), μ, ν)
+ C2 = cost_matrix(sqeuclidean, μ, ν)
+ C3 = cost_matrix(c, μ, ν)
+ C = pairwise(SqEuclidean(), vcat(μ.support'...), vcat(ν.support'...); dims=1)
+ @test C1 ≈ C
+ @test C2 ≈ C
+ @test C3 ≈ C
+ end
+ @testset "Creating cost matrices from μ to itself" begin
+ n = 10
+ m = 3
+ μsupp = [rand(m) for i in 1:n]
+ μprobs = normalize!(rand(n), 1)
+ μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+ c(x, y) = sqrt(sum((x - y) .^ 2))
+ C1 = cost_matrix(Euclidean(), μ; symmetric=true)
+ C2 = cost_matrix(euclidean, μ; symmetric=true)
+ C3 = cost_matrix(c, μ)
+ C = pairwise(Euclidean(), vcat(μ.support'...), vcat(μ.support'...); dims=1)
+ @test C1 ≈ C
+ @test C2 ≈ C
+ @test C3 ≈ C
+ end
+ end
 end