From c8eda449b2c6b39e9d57d1b5b2c39e43f2925892 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Fri, 15 Jun 2018 18:53:54 -0700 Subject: [PATCH 01/15] add problems solved in doc --- examples/plot_stochastic.py | 135 ++++++++++++ ot/__init__.py | 2 + ot/stochastic.py | 415 ++++++++++++++++++++++++++++++++++++ test/test_stochastic.py | 115 ++++++++++ 4 files changed, 667 insertions(+) create mode 100644 examples/plot_stochastic.py create mode 100644 ot/stochastic.py create mode 100644 test/test_stochastic.py diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py new file mode 100644 index 000000000..9071ddb4d --- /dev/null +++ b/examples/plot_stochastic.py @@ -0,0 +1,135 @@ +""" +========================== +Stochastic examples +========================== + +This example is designed to show how to use the stochatic optimization +algorithms for descrete and semicontinous measures from the POT library. + +""" + +# Author: Kilian Fatras +# +# License: MIT License + +import matplotlib.pylab as pl +import numpy as np +import ot + + +############################################################################# +# COMPUTE TRANSPORTATION MATRIX +############################################################################# + +############################################################################# +# DISCRETE CASE +# Sample two discrete measures for the discrete case +# --------------------------------------------- +# +# Define 2 discrete measures a and b, the points where are defined the source +# and the target measures and finally the cost matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 10000 +lr = 0.1 + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# +# Call the "SAG" method to find the transportation matrix in the discrete case +# --------------------------------------------- +# +# Define the method "SAG", call ot.transportation_matrix_entropic and plot the +# results. + +method = "SAG" +sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, + numItermax, lr) +print(sag_pi) + +############################################################################# +# SEMICONTINOUS CASE +# Sample one general measure a, one discrete measures b for the semicontinous +# case +# --------------------------------------------- +# +# Define one general measure a, one discrete measures b, the points where +# are defined the source and the target measures and finally the cost matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 500000 +lr = 1 + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# +# Call the "ASGD" method to find the transportation matrix in the semicontinous +# case +# --------------------------------------------- +# +# Define the method "ASGD", call ot.transportation_matrix_entropic and plot the +# results. + +method = "ASGD" +asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, + numItermax, lr) +print(asgd_pi) + +############################################################################# +# +# Compare the results with the Sinkhorn algorithm +# --------------------------------------------- +# +# Call the Sinkhorn algorithm from POT + +sinkhorn_pi = ot.sinkhorn(a, b, M, 1) +print(sinkhorn_pi) + + +############################################################################## +# PLOT TRANSPORTATION MATRIX +############################################################################## + +############################################################################## +# Plot SAG results +# ---------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sag_pi, 'OT matrix SAG') +pl.show() + + +############################################################################## +# Plot ASGD results +# ----------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, asgd_pi, 'OT matrix ASGD') +pl.show() + + +############################################################################## +# Plot Sinkhorn results +# --------------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') +pl.show() diff --git a/ot/__init__.py b/ot/__init__.py index 1500e594f..1dde39017 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -18,6 +18,8 @@ from . import datasets from . import da from . import gromov +from . import smooth +from . import stochastic # OT functions from .lp import emd, emd2 diff --git a/ot/stochastic.py b/ot/stochastic.py new file mode 100644 index 000000000..541f4568f --- /dev/null +++ b/ot/stochastic.py @@ -0,0 +1,415 @@ +# Author: Kilian Fatras +# +# License: MIT License + +import numpy as np + + +def coordinate_gradient(b, M, reg, v, i): + ''' + Compute the coordinate gradient update for regularized discrete + distributions for (i, :) + + The function computes the gradient of the semi dual problem: + .. math:: + \W_varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j + - \reg log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i + + where : + - M is the (ns,nt) metric cost matrix + - v is a dual variable in R^J + - reg is the regularization term + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the ASGD & SAG algorithms + as proposed in [18]_ [alg.1 & alg.2] + + + Parameters + ---------- + + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float nu, + Regularization term > 0 + v : np.ndarray(nt,), + optimization vector + i : number int, + picked number i + + Returns + ------- + + coordinate gradient : np.ndarray(nt,) + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> lr = 1 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + method, numItermax, + lr) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + + ''' + + r = M[i, :] - v + exp_v = np.exp(-r / reg) * b + khi = exp_v / (np.sum(exp_v)) + return b - khi + + +def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): + ''' + Compute the SAG algorithm to solve the regularized discrete measures + optimal transport max problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the SAG algorithm + as proposed in [18]_ [alg.1] + + + Parameters + ---------- + + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + numItermax : int number + number of iteration + lr : float number + learning rate + + Returns + ------- + + v : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> lr = 1 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "SAG" + >>> sag_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + method, numItermax, + lr) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + v = np.zeros(n_target) + stored_gradient = np.zeros((n_source, n_target)) + sum_stored_gradient = np.zeros(n_target) + for _ in range(numItermax): + i = np.random.randint(n_source) + cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i) + sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) + stored_gradient[i] = cur_coord_grad + v += lr * (1. / n_source) * sum_stored_gradient + return v + + +def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): + ''' + Compute the ASGD algorithm to solve the regularized semi contibous measures + optimal transport max problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the ASGD algorithm + as proposed in [18]_ [alg.2] + + + Parameters + ---------- + + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + numItermax : int number + number of iteration + lr : float number + learning rate + + + Returns + ------- + + ave_v : np.ndarray(nt,) + optimization vector + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> lr = 1 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + method, numItermax, + lr) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_v = np.zeros(n_target) + ave_v = np.zeros(n_target) + for cur_iter in range(numItermax): + k = cur_iter + 1 + i = np.random.randint(n_source) + cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i) + cur_v += (lr / np.sqrt(k)) * cur_coord_grad + ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v + return ave_v + + +def c_transform_entropic(b, M, reg, v): + ''' + The goal is to recover u from the c-transform. + + The function computes the c_transform of a dual variable from the other + dual variable: + .. math:: + u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j + + where : + - M is the (ns,nt) metric cost matrix + - u, v are dual variables in R^IxR^J + - reg is the regularization term + + It is used to recover an optimal u from optimal v solving the semi dual + problem, see Proposition 2.1 of [18]_ + + + Parameters + ---------- + + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float + regularization term > 0 + v : np.ndarray(nt,) + dual variable + + Returns + ------- + + u : np.ndarray(ns,) + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> lr = 1 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + method, numItermax, + lr) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + u = np.zeros(n_source) + for i in range(n_source): + r = M[i, :] - v + exp_v = np.exp(-r / reg) * b + u[i] = - reg * np.log(np.sum(exp_v)) + return u + + +def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, + lr=0.1): + ''' + Compute the transportation matrix to solve the regularized discrete + measures optimal transport max problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the SAG or ASGD algorithms + as proposed in [18]_ + + + Parameters + ---------- + + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + methode : str, + used method (SAG or ASGD) + numItermax : int number + number of iteration + lr : float number + learning rate + n_source : int number + size of the source measure + n_target : int number + size of the target measure + + Returns + ------- + + pi : np.ndarray(ns, nt) + transportation matrix + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> lr = 1 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + method, numItermax, + lr) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + n_source = 7 + n_target = 4 + if method.lower() == "sag": + opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr) + elif method.lower() == "asgd": + opt_v = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) + else: + print("Please, select your method between SAG and ASGD") + return None + + opt_u = c_transform_entropic(b, M, reg, opt_v) + pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) + * a[:, None] * b[None, :]) + return pi diff --git a/test/test_stochastic.py b/test/test_stochastic.py new file mode 100644 index 000000000..829f781ae --- /dev/null +++ b/test/test_stochastic.py @@ -0,0 +1,115 @@ +""" +========================== +Stochastic test +========================== + +This example is designed to test the stochatic optimization algorithms module +for descrete and semicontinous measures from the POT library. + +""" + +# Author: Kilian Fatras +# +# License: MIT License + +import numpy as np +import ot + +############################################################################# +# +# TEST SAG algorithm +# --------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a learning rate and a number of iteration + + +def test_stochastic_sag(): + # test sag + n = 15 + reg = 1 + numItermax = 300000 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", + numItermax=numItermax) + + # check constratints + np.testing.assert_allclose( + u, G.sum(1), atol=1e-04) # cf convergence sag + np.testing.assert_allclose( + u, G.sum(0), atol=1e-04) # cf convergence sag + + +############################################################################# +# +# TEST ASGD algorithm +# --------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a learning rate and a number of iteration + +def test_stochastic_asgd(): + # test asgd + n = 15 + reg = 1 + numItermax = 300000 + lr = 1 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", + numItermax=numItermax, + lr=lr) + + # check constratints + np.testing.assert_allclose( + u, G.sum(1), atol=1e-03) # cf convergence asgd + np.testing.assert_allclose( + u, G.sum(0), atol=1e-03) # cf convergence asgd + + +############################################################################# +# +# TEST Convergence SAG and ASGD toward Sinkhorn's solution +# -------------------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a learning rate and a number of iteration + + +def test_sag_asgd_sinkhorn(): + # test all algorithms + n = 15 + reg = 1 + nb_iter = 300000 + lr = 1 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + zero = np.zeros(n) + M = ot.dist(x, x) + + G_asgd = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", + numItermax=nb_iter, + lr=1) + G_sag = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", + numItermax=nb_iter) + G_sinkhorn = ot.sinkhorn(u, u, M, reg) + + # check constratints + np.testing.assert_allclose( + zero, (G_sag - G_sinkhorn).sum(1), atol=1e-03) # cf convergence sag + np.testing.assert_allclose( + zero, (G_sag - G_sinkhorn).sum(0), atol=1e-03) # cf convergence sag + np.testing.assert_allclose( + zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd + np.testing.assert_allclose( + zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd From e2d06ef4e901e0c1e9119cdbd79b9c453c1af452 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Fri, 15 Jun 2018 19:04:38 -0700 Subject: [PATCH 02/15] add problems solved in doc --- examples/plot_stochastic.py | 4 ++-- ot/stochastic.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 9071ddb4d..0afceeb14 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -53,7 +53,7 @@ method = "SAG" sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax, lr) print(sag_pi) ############################################################################# @@ -90,7 +90,7 @@ method = "ASGD" asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax, lr) print(asgd_pi) ############################################################################# diff --git a/ot/stochastic.py b/ot/stochastic.py index 541f4568f..91231507e 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -75,8 +75,8 @@ def coordinate_gradient(b, M, reg, v, i): ''' r = M[i, :] - v - exp_v = np.exp(-r / reg) * b - khi = exp_v / (np.sum(exp_v)) + exp_v = np.exp(-r/reg) * b + khi = exp_v/(np.sum(exp_v)) return b - khi @@ -161,7 +161,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad - v += lr * (1. / n_source) * sum_stored_gradient + v += lr * (1./n_source) * sum_stored_gradient return v @@ -243,8 +243,8 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): k = cur_iter + 1 i = np.random.randint(n_source) cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i) - cur_v += (lr / np.sqrt(k)) * cur_coord_grad - ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v + cur_v += (lr/np.sqrt(k)) * cur_coord_grad + ave_v = (1./k) * cur_v + (1 - 1./k) * ave_v return ave_v @@ -317,7 +317,7 @@ def c_transform_entropic(b, M, reg, v): u = np.zeros(n_source) for i in range(n_source): r = M[i, :] - v - exp_v = np.exp(-r / reg) * b + exp_v = np.exp(-r/reg) * b u[i] = - reg * np.log(np.sum(exp_v)) return u @@ -410,6 +410,6 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, return None opt_u = c_transform_entropic(b, M, reg, opt_v) - pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) + pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :])/reg) * a[:, None] * b[None, :]) return pi From 18f9242b9575e7551d5e30b66cfbaaac85e0022a Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Fri, 15 Jun 2018 19:15:48 -0700 Subject: [PATCH 03/15] PEP8 --- examples/plot_stochastic.py | 4 ++-- ot/stochastic.py | 14 +++++++------- test/test_stochastic.py | 1 - 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 0afceeb14..9071ddb4d 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -53,7 +53,7 @@ method = "SAG" sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax, lr) print(sag_pi) ############################################################################# @@ -90,7 +90,7 @@ method = "ASGD" asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax, lr) print(asgd_pi) ############################################################################# diff --git a/ot/stochastic.py b/ot/stochastic.py index 91231507e..541f4568f 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -75,8 +75,8 @@ def coordinate_gradient(b, M, reg, v, i): ''' r = M[i, :] - v - exp_v = np.exp(-r/reg) * b - khi = exp_v/(np.sum(exp_v)) + exp_v = np.exp(-r / reg) * b + khi = exp_v / (np.sum(exp_v)) return b - khi @@ -161,7 +161,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad - v += lr * (1./n_source) * sum_stored_gradient + v += lr * (1. / n_source) * sum_stored_gradient return v @@ -243,8 +243,8 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): k = cur_iter + 1 i = np.random.randint(n_source) cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i) - cur_v += (lr/np.sqrt(k)) * cur_coord_grad - ave_v = (1./k) * cur_v + (1 - 1./k) * ave_v + cur_v += (lr / np.sqrt(k)) * cur_coord_grad + ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v return ave_v @@ -317,7 +317,7 @@ def c_transform_entropic(b, M, reg, v): u = np.zeros(n_source) for i in range(n_source): r = M[i, :] - v - exp_v = np.exp(-r/reg) * b + exp_v = np.exp(-r / reg) * b u[i] = - reg * np.log(np.sum(exp_v)) return u @@ -410,6 +410,6 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, return None opt_u = c_transform_entropic(b, M, reg, opt_v) - pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :])/reg) + pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) * a[:, None] * b[None, :]) return pi diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 829f781ae..5fe5ea966 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -89,7 +89,6 @@ def test_sag_asgd_sinkhorn(): n = 15 reg = 1 nb_iter = 300000 - lr = 1 rng = np.random.RandomState(0) x = rng.randn(n, 2) From 3617b42ec05fe88eb616b32b2c3827d63022e287 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Fri, 15 Jun 2018 19:18:20 -0700 Subject: [PATCH 04/15] PEP8 --- ot/stochastic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ot/stochastic.py b/ot/stochastic.py index 541f4568f..f34154bdb 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -313,7 +313,6 @@ def c_transform_entropic(b, M, reg, v): ''' n_source = np.shape(M)[0] - n_target = np.shape(M)[1] u = np.zeros(n_source) for i in range(n_source): r = M[i, :] - v @@ -399,8 +398,6 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, Advances in Neural Information Processing Systems (2016), arXiv preprint arxiv:1605.08527. ''' - n_source = 7 - n_target = 4 if method.lower() == "sag": opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr) elif method.lower() == "asgd": From 055417ee06917ff8bac5d07b2d2a17d80e5da4b6 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Fri, 15 Jun 2018 19:25:29 -0700 Subject: [PATCH 05/15] pep8 --- ot/stochastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ot/stochastic.py b/ot/stochastic.py index f34154bdb..9912223b6 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -407,6 +407,6 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, return None opt_u = c_transform_entropic(b, M, reg, opt_v) - pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) - * a[:, None] * b[None, :]) + pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) * + a[:, None] * b[None, :]) return pi From 74cfe5ac77c3e964a85ef90c11d8ebffa16ddcfe Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Mon, 18 Jun 2018 17:56:28 -0700 Subject: [PATCH 06/15] add sgd --- README.md | 8 + examples/plot_stochastic.py | 95 +++++++- ot/stochastic.py | 468 ++++++++++++++++++++++++++++++++---- test/test_stochastic.py | 104 +++++++- 4 files changed, 608 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 466c09ca8..8e8dcd4aa 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ It provides the following solvers: * Linear OT [14] and Joint OT matrix and mapping estimation [8]. * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt). * Gromov-Wasserstein distances and barycenters ([13] and regularized [12]) +* Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder. @@ -149,6 +150,7 @@ The contributors to this library are: * [Stanislas Chambon](https://slasnista.github.io/) * [Antoine Rolet](https://arolet.github.io/) * Erwan Vautier (Gromov-Wasserstein) +* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization) This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): @@ -213,3 +215,9 @@ You can also post bug reports and feature requests in Github issues. Make sure t [15] Peyré, G., & Cuturi, M. (2018). [Computational Optimal Transport](https://arxiv.org/pdf/1803.00567.pdf) . [16] Agueh, M., & Carlier, G. (2011). [Barycenters in the Wasserstein space](https://hal.archives-ouvertes.fr/hal-00637399/document). SIAM Journal on Mathematical Analysis, 43(2), 904-924. + +[17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/pdf/1710.06276.pdf). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). + +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016). + +[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 9071ddb4d..3fc1955c7 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -18,9 +18,9 @@ ############################################################################# -# COMPUTE TRANSPORTATION MATRIX +# COMPUTE TRANSPORTATION MATRIX FOR SEMI-DUAL PROBLEM ############################################################################# - +print("------------SEMI-DUAL PROBLEM------------") ############################################################################# # DISCRETE CASE # Sample two discrete measures for the discrete case @@ -48,12 +48,12 @@ # Call the "SAG" method to find the transportation matrix in the discrete case # --------------------------------------------- # -# Define the method "SAG", call ot.transportation_matrix_entropic and plot the +# Define the method "SAG", call ot.solve_semi_dual_entropic and plot the # results. method = "SAG" -sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) +sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, lr) print(sag_pi) ############################################################################# @@ -68,8 +68,9 @@ n_source = 7 n_target = 4 reg = 1 -numItermax = 500000 +numItermax = 100000 lr = 1 +log = True a = ot.utils.unif(n_source) b = ot.utils.unif(n_target) @@ -85,12 +86,13 @@ # case # --------------------------------------------- # -# Define the method "ASGD", call ot.transportation_matrix_entropic and plot the +# Define the method "ASGD", call ot.solve_semi_dual_entropic and plot the # results. method = "ASGD" -asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) +asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, lr, log) +print(log['alpha'], log['beta']) print(asgd_pi) ############################################################################# @@ -100,7 +102,7 @@ # # Call the Sinkhorn algorithm from POT -sinkhorn_pi = ot.sinkhorn(a, b, M, 1) +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) print(sinkhorn_pi) @@ -113,7 +115,7 @@ # ---------------- pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sag_pi, 'OT matrix SAG') +ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG') pl.show() @@ -122,7 +124,76 @@ # ----------------- pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, asgd_pi, 'OT matrix ASGD') +ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD') +pl.show() + + +############################################################################## +# Plot Sinkhorn results +# --------------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') +pl.show() + +############################################################################# +# COMPUTE TRANSPORTATION MATRIX FOR DUAL PROBLEM +############################################################################# +print("------------DUAL PROBLEM------------") +############################################################################# +# SEMICONTINOUS CASE +# Sample one general measure a, one discrete measures b for the semicontinous +# case +# --------------------------------------------- +# +# Define one general measure a, one discrete measures b, the points where +# are defined the source and the target measures and finally the cost matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 100000 +lr = 0.1 +batch_size = 3 +log = True + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# +# Call the "SGD" dual method to find the transportation matrix in the semicontinous +# case +# --------------------------------------------- +# +# Call ot.solve_dual_entropic and plot the results. + +sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, + numItermax, lr, log) +print(log['alpha'], log['beta']) +print(sgd_dual_pi) + +############################################################################# +# +# Compare the results with the Sinkhorn algorithm +# --------------------------------------------- +# +# Call the Sinkhorn algorithm from POT + +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) +print(sinkhorn_pi) + +############################################################################## +# Plot SGD results +# ----------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD') pl.show() diff --git a/ot/stochastic.py b/ot/stochastic.py index 9912223b6..31c99bebf 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -5,7 +5,10 @@ import numpy as np -def coordinate_gradient(b, M, reg, v, i): +############################################################################## +# Optimization toolbox for SEMI - DUAL problem +############################################################################## +def coordinate_gradient(b, M, reg, beta, i): ''' Compute the coordinate gradient update for regularized discrete distributions for (i, :) @@ -59,7 +62,7 @@ def coordinate_gradient(b, M, reg, v, i): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -74,9 +77,9 @@ def coordinate_gradient(b, M, reg, v, i): ''' - r = M[i, :] - v - exp_v = np.exp(-r / reg) * b - khi = exp_v / (np.sum(exp_v)) + r = M[i, :] - beta + exp_beta = np.exp(-r/reg) * b + khi = exp_beta/(np.sum(exp_beta)) return b - khi @@ -137,7 +140,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "SAG" - >>> sag_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> sag_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -153,16 +156,16 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): n_source = np.shape(M)[0] n_target = np.shape(M)[1] - v = np.zeros(n_target) + cur_beta = np.zeros(n_target) stored_gradient = np.zeros((n_source, n_target)) sum_stored_gradient = np.zeros(n_target) for _ in range(numItermax): i = np.random.randint(n_source) - cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i) + cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad - v += lr * (1. / n_source) * sum_stored_gradient - return v + cur_beta += lr * (1./n_source) * sum_stored_gradient + return cur_beta def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): @@ -170,19 +173,19 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): Compute the ASGD algorithm to solve the regularized semi contibous measures optimal transport max problem - The function solves the following optimization problem: - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) - s.t. \gamma 1 = a - \gamma^T 1= b - \gamma \geq 0 - where : - - M is the (ns,nt) metric cost matrix - - :math:`\Omega` is the entropic regularization term - :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the ASGD algorithm - as proposed in [18]_ [alg.2] + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the ASGD algorithm + as proposed in [18]_ [alg.2] Parameters @@ -221,7 +224,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -237,18 +240,18 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): n_source = np.shape(M)[0] n_target = np.shape(M)[1] - cur_v = np.zeros(n_target) - ave_v = np.zeros(n_target) + cur_beta = np.zeros(n_target) + ave_beta = np.zeros(n_target) for cur_iter in range(numItermax): k = cur_iter + 1 i = np.random.randint(n_source) - cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i) - cur_v += (lr / np.sqrt(k)) * cur_coord_grad - ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v - return ave_v + cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i) + cur_beta += (lr/np.sqrt(k)) * cur_coord_grad + ave_beta = (1./k) * cur_beta + (1 - 1./k) * ave_beta + return ave_beta -def c_transform_entropic(b, M, reg, v): +def c_transform_entropic(b, M, reg, beta): ''' The goal is to recover u from the c-transform. @@ -298,7 +301,7 @@ def c_transform_entropic(b, M, reg, v): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -313,16 +316,18 @@ def c_transform_entropic(b, M, reg, v): ''' n_source = np.shape(M)[0] - u = np.zeros(n_source) + n_target = np.shape(M)[1] + alpha = np.zeros(n_source) for i in range(n_source): - r = M[i, :] - v - exp_v = np.exp(-r / reg) * b - u[i] = - reg * np.log(np.sum(exp_v)) - return u + r = M[i, :] - beta + min_r = np.min(r) + exp_beta = np.exp(-(r - min_r)/reg) * b + alpha[i] = min_r - reg * np.log(np.sum(exp_beta)) + return alpha -def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, - lr=0.1): +def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, + log=False): ''' Compute the transportation matrix to solve the regularized discrete measures optimal transport max problem @@ -363,12 +368,16 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, size of the source measure n_target : int number size of the target measure + log : bool, optional + record log if True Returns ------- pi : np.ndarray(ns, nt) transportation matrix + log : dict + log dictionary return only if log==True in parameters Examples -------- @@ -385,7 +394,7 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -398,15 +407,384 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, Advances in Neural Information Processing Systems (2016), arXiv preprint arxiv:1605.08527. ''' + n_source = 7 + n_target = 4 if method.lower() == "sag": - opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr) + opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr) elif method.lower() == "asgd": - opt_v = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) + opt_beta = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) else: print("Please, select your method between SAG and ASGD") return None - opt_u = c_transform_entropic(b, M, reg, opt_v) - pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) * + opt_alpha = c_transform_entropic(b, M, reg, opt_beta) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * a[:, None] * b[None, :]) - return pi + + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi + + +############################################################################## +# Optimization toolbox for DUAL problem +############################################################################## + + +def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): + ''' + Computes the partial gradient of F_\W_varepsilon + + Compute the partial gradient of the dual problem: + ..Math: + \forall i in batch_alpha, + grad_alpha_i = 1 * batch_size - + sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg) + + where : + - M is the (ns,nt) metric cost matrix + - alpha, beta are dual variables in R^ixR^J + - reg is the regularization term + - batch_alpha and batch_beta are list of index + + The algorithm used for solving the dual problem is the SGD algorithm + as proposed in [19]_ [alg.1] + + Parameters + ---------- + + reg : float number, + Regularization term > 0 + M : np.ndarray(ns, nt), + cost matrix + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + batch_size : int number + size of the batch + batch_alpha : np.ndarray(bs,) + batch of index of alpha + batch_beta : np.ndarray(bs,) + batch of index of beta + + Returns + ------- + + grad : np.ndarray(ns,) + partial grad F in alpha + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + grad_alpha = np.zeros(batch_size) + grad_alpha[:] = batch_size + for j in batch_beta: + grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] + - M[batch_alpha, j])/reg) + return grad_alpha + + +def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): + ''' + Computes the partial gradient of F_\W_varepsilon + + Compute the partial gradient of the dual problem: + ..Math: + \forall j in batch_beta, + grad_beta_j = 1 * batch_size - + sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) + + where : + - M is the (ns,nt) metric cost matrix + - alpha, beta are dual variables in R^ixR^J + - reg is the regularization term + - batch_alpha and batch_beta are list of index + + The algorithm used for solving the dual problem is the SGD algorithm + as proposed in [19]_ [alg.1] + + Parameters + ---------- + + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + batch_size : int number + size of the batch + batch_alpha : np.ndarray(bs,) + batch of index of alpha + batch_beta : np.ndarray(bs,) + batch of index of beta + + Returns + ------- + + grad : np.ndarray(ns,) + partial grad F in beta + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + + ''' + + grad_beta = np.zeros(batch_size) + grad_beta[:] = batch_size + for i in batch_alpha: + grad_beta -= np.exp((alpha[i] + + beta[batch_beta] - M[i, batch_beta])/reg) + return grad_beta + + +def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, + alternate=True): + ''' + Compute the sgd algorithm to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + alternate : bool, optional + alternating algorithm + + Returns + ------- + + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_alpha = np.random.randn(n_source) + cur_beta = np.random.randn(n_target) + if alternate: + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha + grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_beta[batch_beta] += (lr/k) * grad_F_beta + + else: + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha + cur_beta[batch_beta] += (lr/k) * grad_F_beta + + return cur_alpha, cur_beta + + +def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, + log=False): + ''' + Compute the transportation matrix to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + log : bool, optional + record log if True + + Returns + ------- + + pi : np.ndarray(ns, nt) + transportation matrix + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size, + numItermax, lr) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * + a[:, None] * b[None, :]) + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 5fe5ea966..bc0cebbc4 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -15,6 +15,11 @@ import numpy as np import ot + +############################################################################# +# COMPUTE TEST FOR SEMI-DUAL PROBLEM +############################################################################# + ############################################################################# # # TEST SAG algorithm @@ -35,8 +40,8 @@ def test_stochastic_sag(): M = ot.dist(x, x) - G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", - numItermax=numItermax) + G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag", + numItermax=numItermax) # check constratints np.testing.assert_allclose( @@ -65,9 +70,9 @@ def test_stochastic_asgd(): M = ot.dist(x, x) - G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", - numItermax=numItermax, - lr=lr) + G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", + numItermax=numItermax, + lr=lr) # check constratints np.testing.assert_allclose( @@ -89,6 +94,7 @@ def test_sag_asgd_sinkhorn(): n = 15 reg = 1 nb_iter = 300000 + lr = 1 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -96,11 +102,10 @@ def test_sag_asgd_sinkhorn(): zero = np.zeros(n) M = ot.dist(x, x) - G_asgd = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", - numItermax=nb_iter, - lr=1) - G_sag = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", - numItermax=nb_iter) + G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", + numItermax=nb_iter, lr=lr) + G_sag = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag", + numItermax=nb_iter) G_sinkhorn = ot.sinkhorn(u, u, M, reg) # check constratints @@ -112,3 +117,82 @@ def test_sag_asgd_sinkhorn(): zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd np.testing.assert_allclose( zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd + np.testing.assert_allclose( + G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag + np.testing.assert_allclose( + G_asgd, G_sinkhorn, atol=1e-03) # cf convergence asgd + + +############################################################################# +# COMPUTE TEST FOR DUAL PROBLEM +############################################################################# + +############################################################################# +# +# TEST SGD algorithm +# --------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a batch_size and a number of iteration + +def test_stochastic_dual_sgd(): + # test sgd + print("SGD") + n = 10 + reg = 1 + numItermax = 300000 + batch_size = 8 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, + numItermax=numItermax) + + # check constratints + np.testing.assert_allclose( + u, G.sum(1), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + u, G.sum(0), atol=1e-02) # cf convergence sgd + +############################################################################# +# +# TEST Convergence SGD toward Sinkhorn's solution +# -------------------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a batch_size and a number of iteration + + +def test_dual_sgd_sinkhorn(): + # test all dual algorithms + print("SGD vs Sinkhorn") + n = 10 + reg = 1 + nb_iter = 300000 + batch_size = 8 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + zero = np.zeros(n) + M = ot.dist(x, x) + + G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, + numItermax=nb_iter) + + G_sinkhorn = ot.sinkhorn(u, u, M, reg) + + # check constratints + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd + + +if __name__ == '__main__': + test_stochastic_dual_sgd() + test_dual_sgd_sinkhorn() From e068b58ba4234792d96287afd34c3cddef544dd4 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Mon, 18 Jun 2018 18:05:48 -0700 Subject: [PATCH 07/15] pep8 --- examples/plot_stochastic.py | 6 +++--- ot/stochastic.py | 33 +++++++++++++++------------------ test/test_stochastic.py | 4 ++-- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 3fc1955c7..e1394737e 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -53,7 +53,7 @@ method = "SAG" sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax, lr) print(sag_pi) ############################################################################# @@ -91,7 +91,7 @@ method = "ASGD" asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, lr, log) + numItermax, lr, log) print(log['alpha'], log['beta']) print(asgd_pi) @@ -174,7 +174,7 @@ # Call ot.solve_dual_entropic and plot the results. sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, - numItermax, lr, log) + numItermax, lr, log) print(log['alpha'], log['beta']) print(sgd_dual_pi) diff --git a/ot/stochastic.py b/ot/stochastic.py index 31c99bebf..98537d9d2 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -78,8 +78,8 @@ def coordinate_gradient(b, M, reg, beta, i): ''' r = M[i, :] - beta - exp_beta = np.exp(-r/reg) * b - khi = exp_beta/(np.sum(exp_beta)) + exp_beta = np.exp(-r / reg) * b + khi = exp_beta / (np.sum(exp_beta)) return b - khi @@ -164,7 +164,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad - cur_beta += lr * (1./n_source) * sum_stored_gradient + cur_beta += lr * (1. / n_source) * sum_stored_gradient return cur_beta @@ -246,8 +246,8 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): k = cur_iter + 1 i = np.random.randint(n_source) cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i) - cur_beta += (lr/np.sqrt(k)) * cur_coord_grad - ave_beta = (1./k) * cur_beta + (1 - 1./k) * ave_beta + cur_beta += (lr / np.sqrt(k)) * cur_coord_grad + ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta return ave_beta @@ -316,12 +316,11 @@ def c_transform_entropic(b, M, reg, beta): ''' n_source = np.shape(M)[0] - n_target = np.shape(M)[1] alpha = np.zeros(n_source) for i in range(n_source): r = M[i, :] - beta min_r = np.min(r) - exp_beta = np.exp(-(r - min_r)/reg) * b + exp_beta = np.exp(-(r - min_r) / reg) * b alpha[i] = min_r - reg * np.log(np.sum(exp_beta)) return alpha @@ -407,8 +406,6 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, Advances in Neural Information Processing Systems (2016), arXiv preprint arxiv:1605.08527. ''' - n_source = 7 - n_target = 4 if method.lower() == "sag": opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr) elif method.lower() == "asgd": @@ -418,7 +415,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, return None opt_alpha = c_transform_entropic(b, M, reg, opt_beta) - pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * a[:, None] * b[None, :]) if log: @@ -511,8 +508,8 @@ def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): grad_alpha = np.zeros(batch_size) grad_alpha[:] = batch_size for j in batch_beta: - grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] - - M[batch_alpha, j])/reg) + grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] - + M[batch_alpha, j]) / reg) return grad_alpha @@ -594,7 +591,7 @@ def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): grad_beta[:] = batch_size for i in batch_alpha: grad_beta -= np.exp((alpha[i] + - beta[batch_beta] - M[i, batch_beta])/reg) + beta[batch_beta] - M[i, batch_beta]) / reg) return grad_beta @@ -681,10 +678,10 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, batch_beta = np.random.choice(n_target, batch_size, replace=False) grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, batch_size, batch_alpha, batch_beta) - cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha + cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, batch_size, batch_alpha, batch_beta) - cur_beta[batch_beta] += (lr/k) * grad_F_beta + cur_beta[batch_beta] += (lr / k) * grad_F_beta else: for cur_iter in range(numItermax): @@ -695,8 +692,8 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, batch_size, batch_alpha, batch_beta) grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, batch_size, batch_alpha, batch_beta) - cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha - cur_beta[batch_beta] += (lr/k) * grad_F_beta + cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha + cur_beta[batch_beta] += (lr / k) * grad_F_beta return cur_alpha, cur_beta @@ -779,7 +776,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size, numItermax, lr) - pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * a[:, None] * b[None, :]) if log: log = {} diff --git a/test/test_stochastic.py b/test/test_stochastic.py index bc0cebbc4..5824df124 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -149,7 +149,7 @@ def test_stochastic_dual_sgd(): M = ot.dist(x, x) G = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, - numItermax=numItermax) + numItermax=numItermax) # check constratints np.testing.assert_allclose( @@ -180,7 +180,7 @@ def test_dual_sgd_sinkhorn(): M = ot.dist(x, x) G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, - numItermax=nb_iter) + numItermax=nb_iter) G_sinkhorn = ot.sinkhorn(u, u, M, reg) From 52134e92e427c825b4aa0a24ac5e000232ebd707 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 19 Jun 2018 11:31:22 -0700 Subject: [PATCH 08/15] change grad function names --- docs/source/all.rst | 8 ++++++++ ot/stochastic.py | 37 ++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/docs/source/all.rst b/docs/source/all.rst index c84d968ed..29d47c24b 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -33,6 +33,14 @@ ot.optim .. automodule:: ot.optim :members: + + ot.stochastic + -------- + + .. automodule:: ot.stochastic + :members: + + ot.da -------- diff --git a/ot/stochastic.py b/ot/stochastic.py index 98537d9d2..d3830d6bf 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -6,9 +6,9 @@ ############################################################################## -# Optimization toolbox for SEMI - DUAL problem +# Optimization toolbox for SEMI - DUAL problems ############################################################################## -def coordinate_gradient(b, M, reg, beta, i): +def coordinate_grad_semi_dual(b, M, reg, beta, i): ''' Compute the coordinate gradient update for regularized discrete distributions for (i, :) @@ -161,7 +161,8 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): sum_stored_gradient = np.zeros(n_target) for _ in range(numItermax): i = np.random.randint(n_source) - cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i) + cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg, + cur_beta, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad cur_beta += lr * (1. / n_source) * sum_stored_gradient @@ -245,7 +246,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): for cur_iter in range(numItermax): k = cur_iter + 1 i = np.random.randint(n_source) - cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i) + cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i) cur_beta += (lr / np.sqrt(k)) * cur_coord_grad ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta return ave_beta @@ -428,11 +429,12 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, ############################################################################## -# Optimization toolbox for DUAL problem +# Optimization toolbox for DUAL problems ############################################################################## -def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): +def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha, + batch_beta): ''' Computes the partial gradient of F_\W_varepsilon @@ -513,7 +515,8 @@ def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): return grad_alpha -def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): +def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha, + batch_beta): ''' Computes the partial gradient of F_\W_varepsilon @@ -676,11 +679,13 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, k = np.sqrt(cur_iter + 1) batch_alpha = np.random.choice(n_source, batch_size, replace=False) batch_beta = np.random.choice(n_target, batch_size, replace=False) - grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, batch_beta) + grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, + batch_beta) cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha - grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, batch_beta) + grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, + batch_beta) cur_beta[batch_beta] += (lr / k) * grad_F_beta else: @@ -688,10 +693,12 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, k = np.sqrt(cur_iter + 1) batch_alpha = np.random.choice(n_source, batch_size, replace=False) batch_beta = np.random.choice(n_target, batch_size, replace=False) - grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, batch_beta) - grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, batch_beta) + grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, + batch_beta) + grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, + batch_beta) cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha cur_beta[batch_beta] += (lr / k) * grad_F_beta From 7073e417bed151976c62fe20d1ba69abd30e7758 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 19 Jun 2018 11:52:10 -0700 Subject: [PATCH 09/15] remove if in test and cleaned code --- examples/plot_stochastic.py | 1 + ot/stochastic.py | 2 ++ test/test_stochastic.py | 10 +++------- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index e1394737e..09b95d041 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -136,6 +136,7 @@ ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') pl.show() + ############################################################################# # COMPUTE TRANSPORTATION MATRIX FOR DUAL PROBLEM ############################################################################# diff --git a/ot/stochastic.py b/ot/stochastic.py index d3830d6bf..57b96b7a1 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -8,6 +8,8 @@ ############################################################################## # Optimization toolbox for SEMI - DUAL problems ############################################################################## + + def coordinate_grad_semi_dual(b, M, reg, beta, i): ''' Compute the coordinate gradient update for regularized discrete diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 5824df124..3cb51ff14 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -57,6 +57,7 @@ def test_stochastic_sag(): # 2 identical discrete measures u defined on the same space with a # regularization term, a learning rate and a number of iteration + def test_stochastic_asgd(): # test asgd n = 15 @@ -134,9 +135,9 @@ def test_sag_asgd_sinkhorn(): # 2 identical discrete measures u defined on the same space with a # regularization term, a batch_size and a number of iteration + def test_stochastic_dual_sgd(): # test sgd - print("SGD") n = 10 reg = 1 numItermax = 300000 @@ -157,6 +158,7 @@ def test_stochastic_dual_sgd(): np.testing.assert_allclose( u, G.sum(0), atol=1e-02) # cf convergence sgd + ############################################################################# # # TEST Convergence SGD toward Sinkhorn's solution @@ -167,7 +169,6 @@ def test_stochastic_dual_sgd(): def test_dual_sgd_sinkhorn(): # test all dual algorithms - print("SGD vs Sinkhorn") n = 10 reg = 1 nb_iter = 300000 @@ -191,8 +192,3 @@ def test_dual_sgd_sinkhorn(): zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd np.testing.assert_allclose( G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd - - -if __name__ == '__main__': - test_stochastic_dual_sgd() - test_dual_sgd_sinkhorn() From 6777ffd5c8457faac4467e58ba9edbcf2f86961b Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Thu, 21 Jun 2018 17:18:26 -0700 Subject: [PATCH 10/15] gave better step size ASGD & SAG --- examples/plot_stochastic.py | 10 ++++------ ot/stochastic.py | 37 ++++++++++++++++--------------------- test/test_stochastic.py | 7 ++----- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 09b95d041..6274b4c73 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -32,8 +32,7 @@ n_source = 7 n_target = 4 reg = 1 -numItermax = 10000 -lr = 0.1 +numItermax = 1000 a = ot.utils.unif(n_source) b = ot.utils.unif(n_target) @@ -53,7 +52,7 @@ method = "SAG" sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, lr) + numItermax) print(sag_pi) ############################################################################# @@ -68,8 +67,7 @@ n_source = 7 n_target = 4 reg = 1 -numItermax = 100000 -lr = 1 +numItermax = 1000 log = True a = ot.utils.unif(n_source) @@ -91,7 +89,7 @@ method = "ASGD" asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, lr, log) + numItermax, log) print(log['alpha'], log['beta']) print(asgd_pi) diff --git a/ot/stochastic.py b/ot/stochastic.py index 57b96b7a1..ab88cd054 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -56,7 +56,6 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): >>> n_target = 4 >>> reg = 1 >>> numItermax = 300000 - >>> lr = 1 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) >>> rng = np.random.RandomState(0) @@ -65,8 +64,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax, - lr) + method, numItermax) >>> print(asgd_pi) References @@ -85,7 +83,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): return b - khi -def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): +def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): ''' Compute the SAG algorithm to solve the regularized discrete measures optimal transport max problem @@ -134,17 +132,15 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): >>> n_target = 4 >>> reg = 1 >>> numItermax = 300000 - >>> lr = 1 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) >>> rng = np.random.RandomState(0) >>> X_source = rng.randn(n_source, 2) >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "SAG" - >>> sag_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax, - lr) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) >>> print(asgd_pi) References @@ -156,6 +152,8 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): arXiv preprint arxiv:1605.08527. ''' + if lr is None: + lr = 1. / max(a) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) @@ -171,7 +169,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): return cur_beta -def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): +def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): ''' Compute the ASGD algorithm to solve the regularized semi contibous measures optimal transport max problem @@ -219,7 +217,6 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): >>> n_target = 4 >>> reg = 1 >>> numItermax = 300000 - >>> lr = 1 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) >>> rng = np.random.RandomState(0) @@ -228,8 +225,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax, - lr) + method, numItermax) >>> print(asgd_pi) References @@ -241,6 +237,8 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): arXiv preprint arxiv:1605.08527. ''' + if lr is None: + lr = 1. / max(a) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) @@ -296,7 +294,6 @@ def c_transform_entropic(b, M, reg, beta): >>> n_target = 4 >>> reg = 1 >>> numItermax = 300000 - >>> lr = 1 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) >>> rng = np.random.RandomState(0) @@ -305,8 +302,7 @@ def c_transform_entropic(b, M, reg, beta): >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax, - lr) + method, numItermax) >>> print(asgd_pi) References @@ -328,7 +324,7 @@ def c_transform_entropic(b, M, reg, beta): return alpha -def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, +def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, log=False): ''' Compute the transportation matrix to solve the regularized discrete @@ -388,7 +384,6 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, >>> n_target = 4 >>> reg = 1 >>> numItermax = 300000 - >>> lr = 1 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) >>> rng = np.random.RandomState(0) @@ -397,8 +392,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax, - lr) + method, numItermax) >>> print(asgd_pi) References @@ -409,10 +403,11 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, Advances in Neural Information Processing Systems (2016), arXiv preprint arxiv:1605.08527. ''' + if method.lower() == "sag": opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr) elif method.lower() == "asgd": - opt_beta = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) + opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr) else: print("Please, select your method between SAG and ASGD") return None diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 3cb51ff14..f315c8810 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -63,7 +63,6 @@ def test_stochastic_asgd(): n = 15 reg = 1 numItermax = 300000 - lr = 1 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -72,8 +71,7 @@ def test_stochastic_asgd(): M = ot.dist(x, x) G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", - numItermax=numItermax, - lr=lr) + numItermax=numItermax) # check constratints np.testing.assert_allclose( @@ -95,7 +93,6 @@ def test_sag_asgd_sinkhorn(): n = 15 reg = 1 nb_iter = 300000 - lr = 1 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -104,7 +101,7 @@ def test_sag_asgd_sinkhorn(): M = ot.dist(x, x) G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", - numItermax=nb_iter, lr=lr) + numItermax=nb_iter) G_sag = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag", numItermax=nb_iter) G_sinkhorn = ot.sinkhorn(u, u, M, reg) From af5f726f6adb52457ca6730ffb85f2ab486b2ada Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Thu, 21 Jun 2018 17:41:25 -0700 Subject: [PATCH 11/15] fixed bug --- ot/stochastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ot/stochastic.py b/ot/stochastic.py index ab88cd054..374d1a5b9 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -153,7 +153,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): ''' if lr is None: - lr = 1. / max(a) + lr = 1. / max(a/reg) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) @@ -238,7 +238,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): ''' if lr is None: - lr = 1. / max(a) + lr = 1. / max(a/reg) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) From e8cf3cc343c934b7c49d303186cdf226204813b3 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Thu, 21 Jun 2018 17:52:05 -0700 Subject: [PATCH 12/15] pep8 --- ot/stochastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ot/stochastic.py b/ot/stochastic.py index 374d1a5b9..f4d4c7d07 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -153,7 +153,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): ''' if lr is None: - lr = 1. / max(a/reg) + lr = 1. / max(a / reg) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) @@ -238,7 +238,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): ''' if lr is None: - lr = 1. / max(a/reg) + lr = 1. / max(a / reg) n_source = np.shape(M)[0] n_target = np.shape(M)[1] cur_beta = np.zeros(n_target) From 9fecd51c583704e02d3faaef722f0dee52f42359 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Mon, 25 Jun 2018 11:03:44 -0700 Subject: [PATCH 13/15] fix math operator and log bugs --- examples/plot_stochastic.py | 18 ++++++++++-------- ot/stochastic.py | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 6274b4c73..b9375d423 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -15,6 +15,7 @@ import matplotlib.pylab as pl import numpy as np import ot +import ot.plot ############################################################################# @@ -88,9 +89,9 @@ # results. method = "ASGD" -asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, log) -print(log['alpha'], log['beta']) +asgd_pi, log_asgd = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, log=log) +print(log_asgd['alpha'], log_asgd['beta']) print(asgd_pi) ############################################################################# @@ -166,15 +167,16 @@ ############################################################################# # -# Call the "SGD" dual method to find the transportation matrix in the semicontinous -# case +# Call the "SGD" dual method to find the transportation matrix in the +# semicontinous case # --------------------------------------------- # # Call ot.solve_dual_entropic and plot the results. -sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, - numItermax, lr, log) -print(log['alpha'], log['beta']) +sgd_dual_pi, log_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, numItermax, + lr, log=log) +print(log_sgd['alpha'], log_sgd['beta']) print(sgd_dual_pi) ############################################################################# diff --git a/ot/stochastic.py b/ot/stochastic.py index f4d4c7d07..5e8206ee6 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -16,8 +16,9 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): distributions for (i, :) The function computes the gradient of the semi dual problem: + .. math:: - \W_varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j + \W_\varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j - \reg log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i where : @@ -89,6 +90,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): optimal transport max problem The function solves the following optimization problem: + .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 = a @@ -175,6 +177,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): optimal transport max problem The function solves the following optimization problem: + .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 = a @@ -258,6 +261,7 @@ def c_transform_entropic(b, M, reg, beta): The function computes the c_transform of a dual variable from the other dual variable: + .. math:: u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j @@ -331,6 +335,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, measures optimal transport max problem The function solves the following optimization problem: + .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 = a @@ -436,7 +441,8 @@ def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha, Computes the partial gradient of F_\W_varepsilon Compute the partial gradient of the dual problem: - ..Math: + + ..math: \forall i in batch_alpha, grad_alpha_i = 1 * batch_size - sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg) @@ -518,7 +524,8 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha, Computes the partial gradient of F_\W_varepsilon Compute the partial gradient of the dual problem: - ..Math: + + ..math: \forall j in batch_beta, grad_beta_j = 1 * batch_size - sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) @@ -602,6 +609,7 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, optimal transport dual problem The function solves the following optimization problem: + .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 = a @@ -709,6 +717,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, optimal transport dual problem The function solves the following optimization problem: + .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 = a From 968ad58ecae5f26d5f3d6cfc410ccc93de96f2c0 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 26 Jun 2018 11:00:51 -0700 Subject: [PATCH 14/15] add stochastic to docs --- docs/source/all.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/source/all.rst b/docs/source/all.rst index 29d47c24b..019409891 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -20,6 +20,11 @@ ot.bregman .. automodule:: ot.bregman :members: +ot.smooth +----- +.. automodule:: ot.smooth + :members: + ot.gromov ---------- @@ -33,14 +38,6 @@ ot.optim .. automodule:: ot.optim :members: - - ot.stochastic - -------- - - .. automodule:: ot.stochastic - :members: - - ot.da -------- @@ -71,3 +68,9 @@ ot.plot .. automodule:: ot.plot :members: + + ot.stochastic + ------- + + .. automodule:: ot.stochastic + :members: From 208ff4627ba28aa3f35328c5928324019c23ddac Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 26 Jun 2018 11:09:41 -0700 Subject: [PATCH 15/15] fix stochastic to doc --- docs/source/all.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/all.rst b/docs/source/all.rst index 019409891..94da2ed21 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -69,8 +69,8 @@ ot.plot .. automodule:: ot.plot :members: - ot.stochastic - ------- +ot.stochastic +------------- - .. automodule:: ot.stochastic - :members: +.. automodule:: ot.stochastic + :members: