JuliaOptimalTransport · davibarreira · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021 · Jun 2, 2021
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -36,3 +36,8 @@ sinkhorn_unbalanced2
 ```@docs
 quadreg
 ```
+
+## Utilities
+```@docs
+cost_matrix
+```
diff --git a/src/OptimalTransport.jl b/src/OptimalTransport.jl
@@ -18,8 +18,10 @@ export sinkhorn, sinkhorn2
 export emd, emd2
 export sinkhorn_stabilized, sinkhorn_stabilized_epsscaling, sinkhorn_barycenter
 export sinkhorn_unbalanced, sinkhorn_unbalanced2
+export sinkhorn_divergence
 export quadreg
 export ot_cost, ot_plan, wasserstein, squared2wasserstein
+export cost_matrix
 
 const MOI = MathOptInterface
 

diff --git a/src/utils.jl b/src/utils.jl
@@ -106,3 +106,66 @@ end
 
 Distributions.support(d::FiniteDiscreteMeasure) = d.support
 Distributions.probs(d::FiniteDiscreteMeasure) = d.p
+
+"""
+    cost_matrix(
+        c,
+        μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric},
+        ν::Union{FiniteDiscreteMeasure, DiscreteNonParametric}
+    )
+
+Compute cost matrix from Finite Discrete Measures `μ` and `ν` using cost function  `c`.
+
+Note that the use of functions such as `SqEuclidean()` from `Distances.jl` have
+better performance than generic functions. Thus, it's prefered to use
+`cost_matrix(SqEuclidean(), μ, ν)`, instead of `cost_matrix((x,y)->sum((x-y).^2), μ, ν)`
+or even `cost_matrix(sqeuclidean, μ, ν)`.
+
+For custom cost functions, it is necessary to guarantee that the function `c` works
+on vectors, i.e., if one wants to compute the squared Euclidean distance,
+the one must define `c(x,y) = sum((x - y).^2)`.
+
+# Example
+```julia
+μ = discretemeasure(rand(10),normalize!(rand(10),1))
+ν = discretemeasure(rand(8))
+c = TotalVariation()
+C = cost_matrix(c, μ, ν)
+```
+"""
+function cost_matrix(
+    c,
+    μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric},
+    ν::Union{FiniteDiscreteMeasure,DiscreteNonParametric},
+)
+    if typeof(c) <: PreMetric && length(μ.support[1]) == 1
+        return pairwise(c, vcat(μ.support...), vcat(ν.support...))
+    elseif typeof(c) <: PreMetric && length(μ.support[1]) > 1
+        return pairwise(c, vcat(μ.support'...), vcat(ν.support'...); dims=1)
+    else
+        return pairwise(c, μ.support, ν.support)
+    end
+end
+
+"""
+    cost_matrix(
+        c,
+        μ::Union{FiniteDiscreteMeasure, DiscreteNonParametric},
+        symmetric = false
+    )
+
+Compute cost matrix from Finite Discrete Measures `μ` to itself using cost function  `c`.
+If the cost function is symmetric, set the argument `symmetric` to `true` in order
+to increase performance.
+"""
+function cost_matrix(
+    c, μ::Union{FiniteDiscreteMeasure,DiscreteNonParametric}; symmetric=false
+)
+    if typeof(c) <: PreMetric && length(μ.support[1]) == 1
+        return pairwise(c, vcat(μ.support...))
+    elseif typeof(c) <: PreMetric && length(μ.support[1]) > 1
+        return pairwise(c, vcat(μ.support'...); dims=1)
+    else
+        return pairwise(c, μ.support; symmetric=symmetric)
+    end
+end
diff --git a/test/entropic/sinkhorn.jl b/test/entropic/sinkhorn.jl
@@ -4,6 +4,7 @@ using Distances
 using ForwardDiff
 using LogExpFunctions
 using PythonOT: PythonOT
+using Distributions
 
 using LinearAlgebra
 using Random

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,3 +1,4 @@
+using LinearAlgebra: symmetric
 using OptimalTransport
 using Pkg: Pkg
 using SafeTestsets

diff --git a/test/utils.jl b/test/utils.jl
@@ -1,5 +1,7 @@
 using OptimalTransport
 
+using Distributions: DiscreteNonParametric
+using Distances
 using LinearAlgebra
 using Random
 using Test
@@ -144,4 +146,56 @@ Random.seed!(100)
             @test νsupp == support(ν)
         end
     end
+    @testset "costmatrix.jl" begin
+        @testset "Creating cost matrices from vectors" begin
+            n = 100
+            m = 80
+            μsupp = rand(n)
+            νsupp = rand(m)
+            μprobs = normalize!(rand(n), 1)
+            μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+            ν = OptimalTransport.discretemeasure(νsupp)
+            c(x, y) = sum((x - y) .^ 2)
+            C1 = cost_matrix(SqEuclidean(), μ, ν)
+            C2 = cost_matrix(sqeuclidean, μ, ν)
+            C3 = cost_matrix(c, μ, ν)
+            C = pairwise(SqEuclidean(), vcat(μ.support...), vcat(ν.support...))
+            @test C1 ≈ C
+            @test C2 ≈ C
+            @test C3 ≈ C
+        end
+
+        @testset "Creating cost matrices from matrices" begin
+            n = 10
+            m = 3
+            μsupp = [rand(m) for i in 1:n]
+            νsupp = [rand(m) for i in 1:n]
+            μprobs = normalize!(rand(n), 1)
+            μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+            ν = OptimalTransport.discretemeasure(νsupp)
+            c(x, y) = sum((x - y) .^ 2)
+            C1 = cost_matrix(SqEuclidean(), μ, ν)
+            C2 = cost_matrix(sqeuclidean, μ, ν)
+            C3 = cost_matrix(c, μ, ν)
+            C = pairwise(SqEuclidean(), vcat(μ.support'...), vcat(ν.support'...); dims=1)
+            @test C1 ≈ C
+            @test C2 ≈ C
+            @test C3 ≈ C
+        end
+        @testset "Creating cost matrices from μ to itself" begin
+            n = 10
+            m = 3
+            μsupp = [rand(m) for i in 1:n]
+            μprobs = normalize!(rand(n), 1)
+            μ = OptimalTransport.discretemeasure(μsupp, μprobs)
+            c(x, y) = sqrt(sum((x - y) .^ 2))
+            C1 = cost_matrix(Euclidean(), μ; symmetric=true)
+            C2 = cost_matrix(euclidean, μ; symmetric=true)
+            C3 = cost_matrix(c, μ)
+            C = pairwise(Euclidean(), vcat(μ.support'...), vcat(μ.support'...); dims=1)
+            @test C1 ≈ C
+            @test C2 ≈ C
+            @test C3 ≈ C
+        end
+    end
 end