From fad09564e16129ab3faedf79f70ce25ccdc28fb6 Mon Sep 17 00:00:00 2001
From: Lior Belenki <belenkil@google.com>
Date: Fri, 19 Jan 2024 14:04:27 -0800
Subject: [PATCH] Add NormalizedSimpleRegert and WinRateSimpleRegret
 convergence curve comparators.

PiperOrigin-RevId: 599931886
---
 .../benchmarks/analyzers/convergence_curve.py | 114 +++++++++++--
 .../analyzers/convergence_curve_test.py       | 151 +++++++++++++++---
 2 files changed, 224 insertions(+), 41 deletions(-)

diff --git a/vizier/_src/benchmarks/analyzers/convergence_curve.py b/vizier/_src/benchmarks/analyzers/convergence_curve.py
index 19d7362e5..b3143f461 100644
--- a/vizier/_src/benchmarks/analyzers/convergence_curve.py
+++ b/vizier/_src/benchmarks/analyzers/convergence_curve.py
@@ -642,9 +642,10 @@ def _standardize_curves(
     Note: This is an helper function that the class implementing this interface
     can choose to use or not.
 
-    1. Align xs and keeping each ys.
-    2. Apply quantiles and impute NaN.
-    3. Remove values where xs < xs_cutoff.
+    1. Align xs and keep each ys.
+    2. Convert curves to INCREASING.
+    3. Apply quantiles and impute NaN.
+    4. Remove values where xs < xs_cutoff.
 
     Args:
       xs_cutoff: The xs value before which values are ignored.
@@ -825,8 +826,8 @@ def score(self) -> float:
 
 
 @attr.define
-class SimpleConvergenceCurveComparator(ConvergenceComparator):
-  """Comparator method based on simple comparison.
+class StandardizedWinRateConvergenceCurveComparator(ConvergenceComparator):
+  """Comparator method based on win rate on the standardized curves.
 
   Attributes:
     burn_cutoff: The cutoff below which values not included in score.
@@ -835,7 +836,7 @@ class SimpleConvergenceCurveComparator(ConvergenceComparator):
   _xs_cutoff: Optional[float] = None
 
   def score(self) -> float:
-    """Computes the simple convergence score.
+    """Computes the standardized win-rate convergence score.
 
     The score is the percentage of indices (after the burn cutoff) for which the
     interpolated values of 'compared_curve' are better than the interpolated
@@ -860,6 +861,13 @@ def curve(self) -> ConvergenceCurve:
 class PercentageBetterConvergenceCurveComparator(ConvergenceComparator):
   """Comparator method based on percentage better.
 
+  PercentageBetter is the average percentage of steps that one curve is better
+  than the other.
+
+  For example, assuming a study with 100 trials, a score of 0.07 means that on
+  average for each 'baseline' trial the 'compared' convergence curve has already
+  reached that value 7 steps before.
+
   Attributes:
     burn_cutoff: The cutoff below which values not included in score.
   """
@@ -869,10 +877,7 @@ class PercentageBetterConvergenceCurveComparator(ConvergenceComparator):
   def _compute_directional_score(
       self, baseline: np.ndarray, compared: np.ndarray
   ) -> float:
-    """Compute the percentage better score.
-
-    The score is the average percentage steps that 'compared' is better than
-    'baseline'.
+    """Compute the percentage better score of 'compared' vs. 'baseline'.
 
     Note that: sum_i sum_j 1{c_j > b_i} = sum_j sum_i {b_i < c_j}. Therefore, we
     can either iterate over 'compared' and count the number of steps that
@@ -882,7 +887,7 @@ def _compute_directional_score(
     Implementation
     --------------
     1. For each index of `baseline`:
-      - Finds the smallest index of 'comared' that is better.
+      - Finds the smallest index of 'compared' that is better.
       - Compute the percentage of `compared` steps that are better.
     2. Average the percentages across all 'baseline' indices.
 
@@ -958,6 +963,81 @@ def curve(self) -> ConvergenceCurve:
     )
 
 
+class WinRateSimpleRegretComparator(ConvergenceComparator):
+  """Comparator method based on win-rate simple regert."""
+
+  def score(self):
+    """Computes the normalized simple regert score."""
+    baseline_ys, compared_ys = self._standardize_curves()
+    print('compared_ys:', compared_ys)
+    print('baseline_ys:', baseline_ys)
+    return float(compared_ys[-1] > baseline_ys[-1])
+
+  def curve(self) -> ConvergenceCurve:
+    """Returns a score curve for each xs."""
+    raise NotImplementedError('Curve not yet implemented.')
+
+
+class NormalizedSimpleRegretComparator(ConvergenceComparator):
+  """Comparator method based on normalized simple regert.
+
+  The simple regret gain ('compared' - 'baseline') is normalized by the
+  'baseline' absolute simple regret and then truncated.
+  """
+
+  min_value: float = -0.5
+  max_value: float = 1.0
+  eps: float = 0.0001
+
+  def score(self):
+    """Computes the normalized simple regert score."""
+    baseline_ys, compared_ys = self._standardize_curves()
+    d = (compared_ys[-1] - baseline_ys[-1]) / (abs(baseline_ys[-1]) + self.eps)
+    return min(max(d, self.min_value), self.max_value)
+
+  def curve(self) -> ConvergenceCurve:
+    """Returns a score curve for each xs."""
+    raise NotImplementedError('Curve not yet implemented.')
+
+
+class WinRateSimpleRegretComparatorFactory(ConvergenceComparatorFactory):
+  """Factory class for WinRateSimpleRegretComparator."""
+
+  def __call__(
+      self,
+      baseline_curve: ConvergenceCurve,
+      compared_curve: ConvergenceCurve,
+      baseline_quantile: float = 0.5,
+      compared_quantile: float = 0.5,
+  ) -> ConvergenceComparator:
+    return WinRateSimpleRegretComparator(
+        baseline_curve=baseline_curve,
+        compared_curve=compared_curve,
+        baseline_quantile=baseline_quantile,
+        compared_quantile=compared_quantile,
+        name='win_rate_simple_regret',
+    )
+
+
+class NormalizedSimpleRegretComparatorFactory(ConvergenceComparatorFactory):
+  """Factory class for NormalizedSimpleRegretComparator."""
+
+  def __call__(
+      self,
+      baseline_curve: ConvergenceCurve,
+      compared_curve: ConvergenceCurve,
+      baseline_quantile: float = 0.5,
+      compared_quantile: float = 0.5,
+  ) -> ConvergenceComparator:
+    return NormalizedSimpleRegretComparator(
+        baseline_curve=baseline_curve,
+        compared_curve=compared_curve,
+        baseline_quantile=baseline_quantile,
+        compared_quantile=compared_quantile,
+        name='normalized_simple_regret',
+    )
+
+
 class WinRateComparatorFactory(ConvergenceComparatorFactory):
   """Factory class for WinRateComparatorFactory."""
 
@@ -1015,12 +1095,14 @@ def __call__(
         compared_curve=compared_curve,
         baseline_quantile=baseline_quantile,
         compared_quantile=compared_quantile,
-        name='%_better',
+        name='pct_better',
     )
 
 
-class SimpleConvergenceCurveComparatorFactory(ConvergenceComparatorFactory):
-  """Factory class for SimpleConvergenceCurveCompartor."""
+class StandardizedWinRateConvergenceCurveComparatorFactory(
+    ConvergenceComparatorFactory
+):
+  """Factory class for StandardizedWinRateConvergenceCurveComparator."""
 
   def __call__(
       self,
@@ -1029,12 +1111,12 @@ def __call__(
       baseline_quantile: float = 0.5,
       compared_quantile: float = 0.5,
   ) -> ConvergenceComparator:
-    return SimpleConvergenceCurveComparator(
+    return StandardizedWinRateConvergenceCurveComparator(
         baseline_curve=baseline_curve,
         compared_curve=compared_curve,
         baseline_quantile=baseline_quantile,
         compared_quantile=compared_quantile,
-        name='simple',
+        name='standardized_win_rate',
     )
 
 
diff --git a/vizier/_src/benchmarks/analyzers/convergence_curve_test.py b/vizier/_src/benchmarks/analyzers/convergence_curve_test.py
index 862dc09b6..00bb792d5 100644
--- a/vizier/_src/benchmarks/analyzers/convergence_curve_test.py
+++ b/vizier/_src/benchmarks/analyzers/convergence_curve_test.py
@@ -41,57 +41,67 @@ def test_align_xs_merge_ys_on_different_lengths(self):
     c1 = convergence.ConvergenceCurve(
         xs=np.array([1, 2, 3]),
         ys=np.array([[2, 1, 1]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     c2 = convergence.ConvergenceCurve(
         xs=np.array([1]),
         ys=np.array([[3]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     aligned = convergence.ConvergenceCurve.align_xs([c1, c2])[0]
 
     np.testing.assert_array_equal(aligned.xs, [1, 2, 3])
-    np.testing.assert_array_equal(aligned.ys,
-                                  np.array([[2, 1, 1], [3, np.nan, np.nan]]))
+    np.testing.assert_array_equal(
+        aligned.ys, np.array([[2, 1, 1], [3, np.nan, np.nan]])
+    )
 
   def test_align_xs_merge_ys_on_distinct_xvalues(self):
     c1 = convergence.ConvergenceCurve(
         xs=np.array([1, 3, 4]),
         ys=np.array([[2, 1, 1]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     c2 = convergence.ConvergenceCurve(
         xs=np.array([2]),
         ys=np.array([[3]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     aligned = convergence.ConvergenceCurve.align_xs([c1, c2])[0]
 
     np.testing.assert_array_equal(aligned.xs.shape, (3,))
-    np.testing.assert_array_equal(aligned.ys,
-                                  np.array([[2, 1.25, 1], [3, np.nan, np.nan]]))
+    np.testing.assert_array_equal(
+        aligned.ys, np.array([[2, 1.25, 1], [3, np.nan, np.nan]])
+    )
 
   def test_align_xs_merge_ys_with_interpolation(self):
     c1 = convergence.ConvergenceCurve(
         xs=np.array([1, 2, 3, 4, 5]),
         ys=np.array([[2, 2, 1, 0.5, 0.5], [1, 1, 1, 1, 1]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     aligned = convergence.ConvergenceCurve.align_xs(
         [c1], interpolate_repeats=True
     )[0]
 
     np.testing.assert_array_equal(aligned.xs, np.array([1, 2, 3, 4, 5]))
     np.testing.assert_array_equal(
-        aligned.ys, np.array([[2, 1.5, 1.0, 0.5, 0.5], [1, 1, 1, 1, 1]]))
+        aligned.ys, np.array([[2, 1.5, 1.0, 0.5, 0.5], [1, 1, 1, 1, 1]])
+    )
 
   def test_extrapolate_ys_with_steps(self):
     c1 = convergence.ConvergenceCurve(
         xs=np.array([1, 2, 3, 4]),
         ys=np.array([[2, 1.5, 1, 0.5], [1, 1, 1, 1]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
 
     extra_c1 = convergence.ConvergenceCurve.extrapolate_ys(c1, steps=2)
 
     np.testing.assert_array_equal(extra_c1.xs.shape, (6,))
     np.testing.assert_array_equal(
         extra_c1.ys,
-        np.array([[2, 1.5, 1.0, 0.5, 0.0, -0.5], [1, 1, 1, 1, 1, 1]]))
+        np.array([[2, 1.5, 1.0, 0.5, 0.0, -0.5], [1, 1, 1, 1, 1, 1]]),
+    )
 
   def test_align_xs_merge_ys_on_increasing_and_dicreasing_fails(self):
     c1 = convergence.ConvergenceCurve(
@@ -102,7 +112,8 @@ def test_align_xs_merge_ys_on_increasing_and_dicreasing_fails(self):
     c2 = convergence.ConvergenceCurve(
         xs=np.array([2]),
         ys=np.array([[3]]),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     with self.assertRaisesRegex(ValueError, 'increasing'):
       # pylint: disable=[expression-not-assigned]
       convergence.ConvergenceCurve.align_xs([c1, c2])[0]
@@ -138,11 +149,13 @@ class ConvergenceCurveConverterTest(parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('maximize', pyvizier.ObjectiveMetricGoal.MAXIMIZE, [[2, 2, 3]]),
-      ('minimize', pyvizier.ObjectiveMetricGoal.MINIMIZE, [[2, 1, 1]]))
+      ('minimize', pyvizier.ObjectiveMetricGoal.MINIMIZE, [[2, 1, 1]]),
+  )
   def test_convert_basic(self, goal, expected):
     trials = _gen_trials([2, 1, 3])
     generator = convergence.ConvergenceCurveConverter(
-        pyvizier.MetricInformation(name='', goal=goal))
+        pyvizier.MetricInformation(name='', goal=goal)
+    )
     curve = generator.convert(trials)
     np.testing.assert_array_equal(curve.xs, [1, 2, 3])
     np.testing.assert_array_equal(curve.ys, expected)
@@ -167,6 +180,12 @@ def test_convert_flip_signs(self, goal, expected):
           [2, 1, 4, 5],
           [[2, 2, 5, 5]],
       ),
+      (
+          pyvizier.ObjectiveMetricGoal.MAXIMIZE,
+          2,
+          [4, 5, 2, 1],
+          [[5, 5, 5, 5]],
+      ),
       (
           pyvizier.ObjectiveMetricGoal.MAXIMIZE,
           2,
@@ -712,16 +731,19 @@ def setUp(self):
     self._baseline_curve = convergence.ConvergenceCurve(
         xs=xs,
         ys=np.exp(np.array([-0.9, -1.0, -1.1]).reshape(3, 1) * xs_t),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
 
     self._worse_curves = convergence.ConvergenceCurve(
         xs=xs,
         ys=np.exp(-0.5 * xs_t),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     self._better_curves = convergence.ConvergenceCurve(
         xs=xs,
         ys=np.exp(np.array([-1.5, -1.8, -2.0]).reshape(3, 1) * xs_t),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
 
   def test_get_relative_efficiency_curve(self):
     baseline_length = len(self._baseline_curve.xs)
@@ -745,7 +767,8 @@ def test_get_relative_efficiency_flat(self):
     flat_curve = convergence.ConvergenceCurve(
         xs=np.array(range(0, 20)),
         ys=np.array([4.0, 3.0, 2.0] + [1.5] * 17).reshape(1, 20),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     self_eff = convergence.LogEfficiencyConvergenceCurveComparator(
         baseline_curve=flat_curve, compared_curve=flat_curve
     ).curve()
@@ -758,7 +781,8 @@ def test_get_relative_efficiency_short_curve(self):
     short_curve = convergence.ConvergenceCurve(
         xs=self._baseline_curve.xs[:short_length],
         ys=self._baseline_curve.ys[:, :short_length],
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     max_score = 10.3
     short_efficiency = convergence.LogEfficiencyConvergenceCurveComparator(
         baseline_curve=self._baseline_curve,
@@ -807,11 +831,13 @@ def test_efficiency_score_value(self):
     worse_curves = convergence.ConvergenceCurve(
         xs=xs,
         ys=np.exp(-0.5 * xs_t),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     better_curves = convergence.ConvergenceCurve(
         xs=xs,
         ys=np.exp(np.array([-1.5, -1.8, -2.0]).reshape(3, 1) * xs_t),
-        trend=convergence.ConvergenceCurve.YTrend.DECREASING)
+        trend=convergence.ConvergenceCurve.YTrend.DECREASING,
+    )
     # Efficiency score for exponential curves can be approximated.
 
     self.assertGreater(
@@ -829,14 +855,15 @@ def test_efficiency_score_value(self):
 
   def test_comparator_failure(self):
     unknown_curve = convergence.ConvergenceCurve(
-        xs=self._baseline_curve.xs, ys=self._baseline_curve.ys)
+        xs=self._baseline_curve.xs, ys=self._baseline_curve.ys
+    )
     with self.assertRaisesRegex(ValueError, 'increasing or decreasing'):
       convergence.LogEfficiencyConvergenceCurveComparator(
           baseline_curve=unknown_curve, compared_curve=self._baseline_curve
       )
 
 
-class SimpleConvergenceComparatorTest(parameterized.TestCase):
+class StandardizedWinRateConvergenceComparatorTest(parameterized.TestCase):
 
   @parameterized.parameters(
       {
@@ -885,7 +912,7 @@ def test_score_one_curve_above_other(self, ys1, ys2, res, cutoff):
     curve2 = convergence.ConvergenceCurve(
         xs=xs2, ys=ys2, trend=convergence.ConvergenceCurve.YTrend.INCREASING
     )
-    comparator = convergence.SimpleConvergenceCurveComparator(
+    comparator = convergence.StandardizedWinRateConvergenceCurveComparator(
         curve1, curve2, xs_cutoff=cutoff
     )
     self.assertEqual(comparator.score(), res)
@@ -925,5 +952,79 @@ def test_score(self, ys1, ys2, res):
     self.assertEqual(comparator.score(), res)
 
 
+class NormalizedSimpleRegretConvergenceComparatorTest(parameterized.TestCase):
+
+  @parameterized.parameters(
+      {
+          'ys1': np.array([[1, 4, 8, 10, 12]]),
+          'ys2': np.array([[2, 5, 6, 8, 10]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': (10 - 12) / 12,
+      },
+      {
+          'ys1': np.array([[1, 4, 8, 10, 12]]),
+          'ys2': np.array([[2, 5, 6, 8, 1000]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': 1.0,
+      },
+      {
+          'ys1': np.array([[1, 4, 8, 10, 1000]]),
+          'ys2': np.array([[2, 5, 6, 8, 10]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': -0.5,
+      },
+      {
+          'ys1': np.array([[11, 5, 3, 1]]),
+          'ys2': np.array([[130, 4, 2, 0.5]]),
+          'trend': convergence.ConvergenceCurve.YTrend.DECREASING,
+          'res': (-0.5 - (-1)) / 1,
+      },
+  )
+  def test_score(self, ys1, ys2, trend, res):
+    xs1 = np.arange(ys1.shape[1])
+    xs2 = np.arange(ys2.shape[1])
+    curve1 = convergence.ConvergenceCurve(xs=xs1, ys=ys1, trend=trend)
+    curve2 = convergence.ConvergenceCurve(xs=xs2, ys=ys2, trend=trend)
+    comparator = convergence.NormalizedSimpleRegretComparator(curve1, curve2)
+    self.assertAlmostEqual(comparator.score(), res, delta=0.0001)
+
+
+class WinRateSimpleRegretConvergenceComparatorTest(parameterized.TestCase):
+
+  @parameterized.parameters(
+      {
+          'ys1': np.array([[1, 4, 8, 10, 12]]),
+          'ys2': np.array([[2, 5, 6, 8, 10]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': 0.0,
+      },
+      {
+          'ys1': np.array([[1, 4, 8, 10, 12]]),
+          'ys2': np.array([[2, 5, 6, 8, 1000]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': 1.0,
+      },
+      {
+          'ys1': np.array([[1, 4, 8, 10, 1000]]),
+          'ys2': np.array([[2, 5, 6, 8, 10]]),
+          'trend': convergence.ConvergenceCurve.YTrend.INCREASING,
+          'res': 0.0,
+      },
+      {
+          'ys1': np.array([[11, 5, 3, 1]]),
+          'ys2': np.array([[130, 4, 2, 0.5]]),
+          'trend': convergence.ConvergenceCurve.YTrend.DECREASING,
+          'res': 1.0,
+      },
+  )
+  def test_score(self, ys1, ys2, trend, res):
+    xs1 = np.arange(ys1.shape[1])
+    xs2 = np.arange(ys2.shape[1])
+    curve1 = convergence.ConvergenceCurve(xs=xs1, ys=ys1, trend=trend)
+    curve2 = convergence.ConvergenceCurve(xs=xs2, ys=ys2, trend=trend)
+    comparator = convergence.WinRateSimpleRegretComparator(curve1, curve2)
+    self.assertEqual(comparator.score(), res)
+
+
 if __name__ == '__main__':
   absltest.main()