Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Many other bugs and issues have been fixed and we want to thank all the contribu

#### Closed issues

- Fix gromov conventions (PR #497)
- Fix change in scipy API for `cdist` (PR #487)
- More permissive check_backend (PR #494)
- Fix circleci-redirector action and codecov (PR #460)
Expand Down
10 changes: 5 additions & 5 deletions ot/gromov/_bregman.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ def entropic_gromov_wasserstein2(
C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000,
tol=1e-9, solver='PGD', warmstart=False, verbose=False, log=False, **kwargs):
r"""
Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
estimated using Sinkhorn projections.
Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
estimated using Sinkhorn projections. To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`.

If `solver="PGD"`, the function solves the following entropic-regularized
Gromov-Wasserstein optimization problem using Projected Gradient Descent [12]:
Expand Down Expand Up @@ -351,7 +351,7 @@ def entropic_gromov_barycenters(

.. math::

\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
\mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)

Where :

Expand Down Expand Up @@ -700,7 +700,7 @@ def entropic_fused_gromov_wasserstein2(
symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9,
solver='PGD', warmstart=False, verbose=False, log=False, **kwargs):
r"""
Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})`
Returns the Fused Gromov-Wasserstein distance between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})`
with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`,
estimated using Sinkhorn projections.

Expand Down Expand Up @@ -832,7 +832,7 @@ def entropic_fused_gromov_barycenters(

.. math::

\mathbf{C}, \mathbf{Y} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s)
\mathbf{C}^*, \mathbf{Y}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s)

Where :

Expand Down
6 changes: 3 additions & 3 deletions ot/gromov/_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
def GW_distance_estimation(C1, C2, p, q, loss_fun, T,
nb_samples_p=None, nb_samples_q=None, std=True, random_state=None):
r"""
Returns an approximation of the gromov-wasserstein cost between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
with a fixed transport plan :math:`\mathbf{T}`.
Returns an approximation of the Gromov-Wasserstein loss between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
with a fixed transport plan :math:`\mathbf{T}`. To recover an approximation of the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`.

The function gives an unbiased approximation of the following equation:

.. math::

GW = \sum_{i,j,k,l} L(\mathbf{C_{1}}_{i,k}, \mathbf{C_{2}}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
\mathbf{GW} = \sum_{i,j,k,l} L(\mathbf{C_{1}}_{i,k}, \mathbf{C_{2}}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}

Where :

Expand Down
85 changes: 54 additions & 31 deletions ot/gromov/_gw.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
def gromov_wasserstein(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`.

The function solves the following optimization problem:
The function solves the following optimization problem using Conditional Gradient:

.. math::
\mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
Expand Down Expand Up @@ -182,9 +182,10 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`.
To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`.

The function solves the following optimization problem:
The function solves the following optimization problem using Conditional Gradient:

.. math::
\mathbf{GW} = \min_\mathbf{T} \quad \sum_{i,j,k,l}
Expand Down Expand Up @@ -308,31 +309,36 @@ def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetri
def fused_gromov_wasserstein(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5,
armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`)
Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})`
with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] <references-fused-gromov-wasserstein>`).

The function solves the following optimization problem using Conditional Gradient:

.. math::
\mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}

s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}

\mathbf{T}^T \mathbf{1} &= \mathbf{q}

\mathbf{T} &\geq 0
Where :

where :

- :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1)
- `L` is a loss function to account for the misfit between the similarity matrices
- :math:`\mathbf{M}`: metric cost matrix between features across domains
- :math:`\mathbf{C_1}`: Metric cost matrix in the source space
- :math:`\mathbf{C_2}`: Metric cost matrix in the target space
- :math:`\mathbf{p}`: distribution in the source space
- :math:`\mathbf{q}`: distribution in the target space
- `L`: loss function to account for the misfit between the similarity and feature matrices
- :math:`\alpha`: trade-off parameter

.. note:: This function is backend-compatible and will work on arrays
from all compatible backends. But the algorithm uses the C++ CPU backend
which can lead to copy overhead on GPU arrays.
.. note:: All computations in the conjugate gradient solver are done with
numpy to limit memory overhead.

The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>`

Parameters
----------
Expand Down Expand Up @@ -465,36 +471,39 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
def fused_gromov_wasserstein2(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5,
armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`)
Returns the Fused Gromov-Wasserstein distance between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})`
with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] <references-fused-gromov-wasserstein>`).

.. math::
\mathbf{GW} = \min_\mathbf{T} \quad (1 - \alpha) \langle \mathbf(T), \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l}
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
The function solves the following optimization problem using Conditional Gradient:

s.t. \ \mathbf(T)\mathbf{1} &= \mathbf{p}
.. math::
\mathbf{FGW} = \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}

\mathbf(T)^T \mathbf{1} &= \mathbf{q}
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}

\mathbf(T) &\geq 0
\mathbf{T}^T \mathbf{1} &= \mathbf{q}

where :
\mathbf{T} &\geq 0
Where :

- :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1)
- `L` is a loss function to account for the misfit between the similarity matrices
- :math:`\mathbf{M}`: metric cost matrix between features across domains
- :math:`\mathbf{C_1}`: Metric cost matrix in the source space
- :math:`\mathbf{C_2}`: Metric cost matrix in the target space
- :math:`\mathbf{p}`: distribution in the source space
- :math:`\mathbf{q}`: distribution in the target space
- `L`: loss function to account for the misfit between the similarity and feature matrices
- :math:`\alpha`: trade-off parameter

The algorithm used for solving the problem is conditional gradient as
discussed in :ref:`[24] <references-fused-gromov-wasserstein2>`
Note that when using backends, this loss function is differentiable wrt the
matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_.

.. note:: This function is backend-compatible and will work on arrays
from all compatible backends. But the algorithm uses the C++ CPU backend
which can lead to copy overhead on GPU arrays.
.. note:: All computations in the conjugate gradient solver are done with
numpy to limit memory overhead.

Note that when using backends, this loss function is differentiable wrt the
matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_.

Parameters
----------
M : array-like, shape (ns, nt)
Expand Down Expand Up @@ -668,13 +677,13 @@ def gromov_barycenters(
max_iter=1000, tol=1e-9, warmstartT=False, verbose=False, log=False,
init_C=None, random_state=None, **kwargs):
r"""
Returns the gromov-wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}`
Returns the Gromov-Wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}`

The function solves the following optimization problem with block coordinate descent:

.. math::

\mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
\mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)

Where :

Expand Down Expand Up @@ -812,7 +821,21 @@ def fgw_barycenters(
fixed_features=False, p=None, loss_fun='square_loss', armijo=False,
symmetric=True, max_iter=100, tol=1e-9, warmstartT=False, verbose=False,
log=False, init_C=None, init_X=None, random_state=None, **kwargs):
r"""Compute the fgw barycenter as presented eq (5) in :ref:`[24] <references-fgw-barycenters>`
r"""
Returns the Fused Gromov-Wasserstein barycenters of `S` measurable networks with node features :math:`(\mathbf{C}_s, \mathbf{Y}_s, \mathbf{p}_s)_{1 \leq s \leq S}`
(see eq (5) in :ref:`[24] <references-fgw-barycenters>`), estimated using Fused Gromov-Wasserstein transports from Conditional Gradient solvers.

The function solves the following optimization problem:

.. math::

\mathbf{C}^*, \mathbf{Y}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s)

Where :

- :math:`\mathbf{Y}_s`: feature matrix
- :math:`\mathbf{C}_s`: metric cost matrix
- :math:`\mathbf{p}_s`: distribution

Parameters
----------
Expand Down
24 changes: 13 additions & 11 deletions ot/gromov/_semirelaxed.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
def semirelaxed_gromov_wasserstein(C1, C2, p=None, loss_fun='square_loss', symmetric=None, log=False, G0=None,
max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}`
Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` (see [48]).

The function solves the following optimization problem:
The function solves the following optimization problem using Conditional Gradient:

.. math::
\mathbf{T}^^* \in \mathop{\arg \min}_{\mathbf{T}} \quad \sum_{i,j,k,l}
\mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad \sum_{i,j,k,l}
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}

s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
Expand Down Expand Up @@ -152,9 +152,9 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
def semirelaxed_gromov_wasserstein2(C1, C2, p=None, loss_fun='square_loss', symmetric=None, log=False, G0=None,
max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Returns the semi-relaxed gromov-wasserstein divergence from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}`
Returns the semi-relaxed Gromov-Wasserstein divergence from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` (see [48]).

The function solves the following optimization problem:
The function solves the following optimization problem using Conditional Gradient:

.. math::
\text{srGW} = \min_{\mathbf{T}} \quad \sum_{i,j,k,l}
Expand Down Expand Up @@ -255,7 +255,7 @@ def semirelaxed_fused_gromov_wasserstein(
M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, alpha=0.5,
G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Computes the semi-relaxed FGW transport between two graphs (see :ref:`[48] <references-semirelaxed-fused-gromov-wasserstein>`)
Computes the semi-relaxed Fused Gromov-Wasserstein transport between two graphs (see [48]).

.. math::
\mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
Expand Down Expand Up @@ -395,10 +395,10 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, alpha=0.5, G0=None, log=False,
max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
r"""
Computes the semi-relaxed FGW divergence between two graphs (see :ref:`[48] <references-semirelaxed-fused-gromov-wasserstein2>`)
Computes the semi-relaxed FGW divergence between two graphs (see [48]).

.. math::
\mathbf{srFGW} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\mathbf{srFGW}_{\alpha} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) T_{i,j} T_{k,l}

s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
Expand Down Expand Up @@ -511,7 +511,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p=None, loss_fun='square_lo
def solve_semirelaxed_gromov_linesearch(G, deltaG, cost_G, C1, C2, ones_p,
M, reg, alpha_min=None, alpha_max=None, nx=None, **kwargs):
"""
Solve the linesearch in the FW iterations
Solve the linesearch in the Conditional Gradient iterations for the semi-relaxed Gromov-Wasserstein divergence.

Parameters
----------
Expand Down Expand Up @@ -829,6 +829,7 @@ def entropic_semirelaxed_fused_gromov_wasserstein(
alpha=0.5, G0=None, max_iter=1e4, tol=1e-9, log=False, verbose=False, **kwargs):
r"""
Computes the entropic-regularized semi-relaxed FGW transport between two graphs (see :ref:`[48] <references-semirelaxed-fused-gromov-wasserstein>`)
estimated using a Mirror Descent algorithm following the KL geometry.

.. math::
\mathbf{T}^* \in \mathop{\arg \min}_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
Expand Down Expand Up @@ -988,10 +989,11 @@ def entropic_semirelaxed_fused_gromov_wasserstein2(
M, C1, C2, p=None, loss_fun='square_loss', symmetric=None, epsilon=0.1,
alpha=0.5, G0=None, max_iter=1e4, tol=1e-9, log=False, verbose=False, **kwargs):
r"""
Computes the entropic-regularized semi-relaxed FGW transport between two graphs (see :ref:`[48] <references-semirelaxed-fused-gromov-wasserstein>`)
Computes the entropic-regularized semi-relaxed FGW divergence between two graphs (see :ref:`[48] <references-semirelaxed-fused-gromov-wasserstein>`)
estimated using a Mirror Descent algorithm following the KL geometry.

.. math::
\mathbf{srFGW} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\mathbf{srFGW}_{\alpha} = \min_{\mathbf{T}} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F +
\alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}

s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
Expand Down