ott-jax · michalk8 · Aug 17, 2022 · Jun 30, 2022 · Jul 1, 2022 · Jul 1, 2022
diff --git a/docs/core.rst b/docs/core.rst
@@ -31,6 +31,15 @@ Sinkhorn
     sinkhorn.Sinkhorn
     sinkhorn.SinkhornOutput
 
+Sinkhorn Dual Initializers
+--------------------------
+.. autosummary::
+    :toctree: _autosummary
+
+    initializers.SinkhornInitializer
+    initializers.GaussianInitializer
+    initializers.SortingInitializer
+
 Low-Rank Sinkhorn
 -----------------
 .. autosummary::

diff --git a/ott/core/__init__.py b/ott/core/__init__.py
@@ -22,6 +22,7 @@
     discrete_barycenter,
     gromov_wasserstein,
     implicit_differentiation,
+    initializers,
     linear_problems,
     momentum,
     quad_problems,

diff --git a/ott/core/initializers.py b/ott/core/initializers.py
@@ -0,0 +1,258 @@
+# Copyright 2022 The OTT Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Sinkhorn initializers."""
+from typing import Optional
+
+import jax
+import jax.numpy as jnp
+
+from ott.core import linear_problems
+from ott.geometry import pointcloud
+
+
+class SinkhornInitializer:
+
+  def init_dual_a(
+      self, ot_problem: linear_problems.LinearProblem, lse_mode: bool
+  ) -> jnp.ndarray:
+    """Initialization for Sinkhorn potential/ scaling f_u."""
+
+  def init_dual_b(
+      self, ot_problem: linear_problems.LinearProblem, lse_mode: bool
+  ) -> jnp.ndarray:
+    """Initialization for Sinkhorn potential/ scaling g_v."""
+
+
+class DefaultInitializer(SinkhornInitializer):
+  """Default Initialization of Sinkhorn dual potentials/ primal scalings."""
+
+  def init_dual_a(
+      self, ot_problem: linear_problems.LinearProblem, lse_mode: bool
+  ) -> jnp.ndarray:
+    """Initialization for Sinkhorn potential/ scaling f_u.
+
+    Args:
+      ot_problem: OT problem between discrete distributions of size n and m.
+      lse_mode: Return potential if true, scaling if false.
+
+    Returns:
+      potential/ scaling, array of size n
+    """
+    a = ot_problem.a
+    init_dual_a = jnp.zeros_like(a) if lse_mode else jnp.ones_like(a)
+    return init_dual_a
+
+  def init_dual_b(
+      self, ot_problem: linear_problems.LinearProblem, lse_mode: bool
+  ) -> jnp.ndarray:
+    """Initialization for Sinkhorn potential/ scaling g_v.
+
+    Args:
+      ot_problem: OT problem between discrete distributions of size n and m.
+      lse_mode: Return potential if true, scaling if false.
+
+    Returns:
+      potential/ scaling, array of size m
+    """
+    b = ot_problem.b
+    init_dual_b = jnp.zeros_like(b) if lse_mode else jnp.ones_like(b)
+    return init_dual_b
+
+
+class GaussianInitializer(DefaultInitializer):
+  """GaussianInitializer.
+
+  From https://arxiv.org/abs/2206.07630.
+  Compute Gaussian approximations of each pointcloud, then compute closed from
+  Kantorovich potential betwen Gaussian approximations using Brenier's theorem
+  (adapt convex/ Brenier potential to Kantorovich). Use this Gaussian potential to
+  initialize Sinkhorn potentials/ scalings.
+
+  """
+
+  def __init__(self):
+    super().__init__()
+
+  def init_dual_a(
+      self,
+      ot_problem: linear_problems.LinearProblem,
+      lse_mode: bool,
+  ) -> jnp.ndarray:
+    """Gaussian init function.
+
+    Args:
+      ot_problem: OT problem description with geometry and weights.
+      lse_mode: Return potential if true, scaling if false.
+
+    Returns:
+      potential/ scaling f_u, array of size n.
+    """
+    # import Gaussian here due to circular imports
+    from ott.tools.gaussian_mixture import gaussian
+
+    if not isinstance(ot_problem.geom, pointcloud.PointCloud):
+      # warning that init not applied
+      return super().init_dual_a(ot_problem, lse_mode)
+    else:
+
+      x, y = ot_problem.geom.x, ot_problem.geom.y
+      a, b = ot_problem.a, ot_problem.b
+
+      gaussian_a = gaussian.Gaussian.from_samples(x, weights=a)
+      gaussian_b = gaussian.Gaussian.from_samples(y, weights=b)
+      # Brenier potential for cost ||x-y||^2/2, multiply by two for ||x-y||^2
+      f_potential = 2 * gaussian_a.f_potential(dest=gaussian_b, points=x)
+      f_potential = f_potential - jnp.mean(f_potential)
+      f_u = f_potential if lse_mode else ot_problem.scaling_from_potential(
+          f_potential
+      )
+      return f_u
+
+
+class SortingInitializer(DefaultInitializer):
+  """Sorting Init class.
+
+  DualSort algorithm from https://arxiv.org/abs/2206.07630, solve
+  non-regularized OT problem via sorting, then compute potential through
+  iterated minimum on C-transform and use this potential to initialize
+  regularized potential
+
+  Args:
+    vectorized_update: Use vectorized inner loop if true.
+    tolerance: DualSort convergence threshold.
+    max_iter: Max DualSort steps.
+  """
+
+  def __init__(
+      self,
+      vectorized_update: bool = True,
+      tolerance: float = 1e-2,
+      max_iter: int = 100
+  ):
+
+    super().__init__()
+
+    self.tolerance = tolerance
+    self.max_iter = max_iter
+    self.update_fn = lambda f, mod_cost: jax.lax.cond(
+        vectorized_update, _vectorized_update, _coordinate_update, f, mod_cost
+    )
+
+  def init_sorting_dual(
+      self, modified_cost: jnp.ndarray, init_f: jnp.ndarray
+  ) -> jnp.ndarray:
+    """Run DualSort algorithm.
+
+    Args:
+      modified_cost:  cost matrix minus diagonal column-wise.
+      init_f: potential f, array of size n. This is the starting potential,
+      which is then updated to make the init potential, so an init of an init.
+
+    Returns:
+      potential f, array of size n.
+    """
+
+    def body_fn(state):
+      prev_f, _, it = state
+      new_f = self.update_fn(prev_f, modified_cost)
+      diff = jnp.sum((new_f - prev_f) ** 2)
+      it += 1
+      return new_f, diff, it
+
+    def cond_fn(state):
+      _, diff, it = state
+      return jnp.logical_and(diff > self.tolerance, it < self.max_iter)
+
+    it = 0
+    diff = self.tolerance + 1.0
+    state = (init_f, diff, it)
+
+    f_potential, _, it = jax.lax.while_loop(
+        cond_fun=cond_fn, body_fun=body_fn, init_val=state
+    )
+
+    return f_potential
+
+  def init_dual_a(
+      self,
+      ot_problem: linear_problems.LinearProblem,
+      lse_mode: bool,
+      init_f: Optional[jnp.ndarray] = None,
+  ) -> jnp.ndarray:
+    """Apply DualSort algo.
+
+    Args:
+      ot_problem: OT problem.
+      lse_mode: Return potential if true, scaling if false.
+      init_f: potential f, array of size n. This is the starting potential,
+      which is then updated to make the init potential, so an init of an init.
+
+    Returns:
+      potential/ scaling f_u, array of size n.
+    """
+    if ot_problem.geom.is_online:
+      # raise error/ warning?
+      return super().init_dual_a(ot_problem, lse_mode)
+    else:
+      cost_matrix = ot_problem.geom.cost_matrix
+      modified_cost = cost_matrix - jnp.diag(cost_matrix)[None, :]
+
+      n = cost_matrix.shape[0]
+      init_f = jnp.zeros(n) if init_f is None else init_f
+
+      f_potential = self.init_sorting_dual(modified_cost, init_f)
+      f_potential = f_potential - jnp.mean(f_potential)
+
+      f_u = f_potential if lse_mode else ot_problem.scaling_from_potential(
+          f_potential
+      )
+
+      return f_u
+
+
+def _vectorized_update(
+    f: jnp.ndarray, modified_cost: jnp.ndarray
+) -> jnp.ndarray:
+  """Inner loop DualSort Update.
+
+  Args:
+    f : potential f, array of size n.
+    modified_cost: cost matrix minus diagonal column-wise.
+
+  Returns:
+    updated potential vector, f.
+  """
+  f = jnp.min(modified_cost + f[None, :], axis=1)
+  return f
+
+
+def _coordinate_update(
+    f: jnp.ndarray, modified_cost: jnp.ndarray
+) -> jnp.ndarray:
+  """Coordinate-wise updates within inner loop.
+
+  Args:
+    f: potential f, array of size n.
+    modified_cost: cost matrix minus diagonal column-wise.
+
+  Returns:
+    updated potential vector, f.
+  """
+
+  def body_fn(i, f):
+    new_f = jnp.min(modified_cost[i, :] + f)
+    f = f.at[i].set(new_f)
+    return f
+
+  return jax.lax.fori_loop(0, len(f), body_fn, f)
diff --git a/ott/core/sinkhorn.py b/ott/core/sinkhorn.py
@@ -23,6 +23,7 @@
 from ott.core import anderson as anderson_lib
 from ott.core import fixed_point_loop
 from ott.core import implicit_differentiation as implicit_lib
+from ott.core import initializers as init_lib
 from ott.core import linear_problems
 from ott.core import momentum as momentum_lib
 from ott.core import unbalanced_functions
@@ -349,6 +350,8 @@ def __init__(
       use_danskin: Optional[bool] = None,
       implicit_diff: Optional[implicit_lib.ImplicitDiff
                              ] = implicit_lib.ImplicitDiff(),  # noqa: E124
+      potential_initializer: init_lib.SinkhornInitializer = init_lib
+      .DefaultInitializer(),
       jit: bool = True
   ):
     self.lse_mode = lse_mode
@@ -368,6 +371,7 @@ def __init__(
     self.anderson = anderson
     self.implicit_diff = implicit_diff
     self.parallel_dual_updates = parallel_dual_updates
+    self.potential_initializer = potential_initializer
     self.jit = jit
 
     # Force implicit_differentiation to True when using Anderson acceleration,
@@ -400,18 +404,25 @@ def __call__(
       init: Optional[Tuple[Optional[jnp.ndarray], Optional[jnp.ndarray]]] = None
   ) -> SinkhornOutput:
     """Main interface to run sinkhorn."""  # noqa: D401
+    # initialization
     init_dual_a, init_dual_b = (init if init is not None else (None, None))
-    a, b = ot_prob.a, ot_prob.b
+
     if init_dual_a is None:
-      init_dual_a = jnp.zeros_like(a) if self.lse_mode else jnp.ones_like(a)
+      init_dual_a = self.potential_initializer.init_dual_a(
+          ot_problem=ot_prob, lse_mode=self.lse_mode
+      )
+
     if init_dual_b is None:
-      init_dual_b = jnp.zeros_like(b) if self.lse_mode else jnp.ones_like(b)
+      init_dual_b = self.potential_initializer.init_dual_b(
+          ot_problem=ot_prob, lse_mode=self.lse_mode
+      )
+
     # Cancel dual variables for zero weights.
     init_dual_a = jnp.where(
-        a > 0, init_dual_a, -jnp.inf if self.lse_mode else 0.0
+        ot_prob.a > 0, init_dual_a, -jnp.inf if self.lse_mode else 0.0
     )
     init_dual_b = jnp.where(
-        b > 0, init_dual_b, -jnp.inf if self.lse_mode else 0.0
+        ot_prob.b > 0, init_dual_b, -jnp.inf if self.lse_mode else 0.0
     )
 
     run_fn = jax.jit(run) if self.jit else run
@@ -691,6 +702,8 @@ def make(
     precondition_fun: Optional[Callable[[float], float]] = None,
     parallel_dual_updates: bool = False,
     use_danskin: bool = None,
+    potential_initializer: init_lib.SinkhornInitializer = init_lib
+    .DefaultInitializer(),
     jit: bool = False
 ) -> Sinkhorn:
   """For backward compatibility."""
@@ -725,6 +738,7 @@ def make(
       implicit_diff=implicit_diff,
       parallel_dual_updates=parallel_dual_updates,
       use_danskin=use_danskin,
+      potential_initializer=potential_initializer,
       jit=jit
   )