From fad09564e16129ab3faedf79f70ce25ccdc28fb6 Mon Sep 17 00:00:00 2001 From: Lior Belenki Date: Fri, 19 Jan 2024 14:04:27 -0800 Subject: [PATCH] Add NormalizedSimpleRegert and WinRateSimpleRegret convergence curve comparators. PiperOrigin-RevId: 599931886 --- .../benchmarks/analyzers/convergence_curve.py | 114 +++++++++++-- .../analyzers/convergence_curve_test.py | 151 +++++++++++++++--- 2 files changed, 224 insertions(+), 41 deletions(-) diff --git a/vizier/_src/benchmarks/analyzers/convergence_curve.py b/vizier/_src/benchmarks/analyzers/convergence_curve.py index 19d7362e5..b3143f461 100644 --- a/vizier/_src/benchmarks/analyzers/convergence_curve.py +++ b/vizier/_src/benchmarks/analyzers/convergence_curve.py @@ -642,9 +642,10 @@ def _standardize_curves( Note: This is an helper function that the class implementing this interface can choose to use or not. - 1. Align xs and keeping each ys. - 2. Apply quantiles and impute NaN. - 3. Remove values where xs < xs_cutoff. + 1. Align xs and keep each ys. + 2. Convert curves to INCREASING. + 3. Apply quantiles and impute NaN. + 4. Remove values where xs < xs_cutoff. Args: xs_cutoff: The xs value before which values are ignored. @@ -825,8 +826,8 @@ def score(self) -> float: @attr.define -class SimpleConvergenceCurveComparator(ConvergenceComparator): - """Comparator method based on simple comparison. +class StandardizedWinRateConvergenceCurveComparator(ConvergenceComparator): + """Comparator method based on win rate on the standardized curves. Attributes: burn_cutoff: The cutoff below which values not included in score. @@ -835,7 +836,7 @@ class SimpleConvergenceCurveComparator(ConvergenceComparator): _xs_cutoff: Optional[float] = None def score(self) -> float: - """Computes the simple convergence score. + """Computes the standardized win-rate convergence score. The score is the percentage of indices (after the burn cutoff) for which the interpolated values of 'compared_curve' are better than the interpolated @@ -860,6 +861,13 @@ def curve(self) -> ConvergenceCurve: class PercentageBetterConvergenceCurveComparator(ConvergenceComparator): """Comparator method based on percentage better. + PercentageBetter is the average percentage of steps that one curve is better + than the other. + + For example, assuming a study with 100 trials, a score of 0.07 means that on + average for each 'baseline' trial the 'compared' convergence curve has already + reached that value 7 steps before. + Attributes: burn_cutoff: The cutoff below which values not included in score. """ @@ -869,10 +877,7 @@ class PercentageBetterConvergenceCurveComparator(ConvergenceComparator): def _compute_directional_score( self, baseline: np.ndarray, compared: np.ndarray ) -> float: - """Compute the percentage better score. - - The score is the average percentage steps that 'compared' is better than - 'baseline'. + """Compute the percentage better score of 'compared' vs. 'baseline'. Note that: sum_i sum_j 1{c_j > b_i} = sum_j sum_i {b_i < c_j}. Therefore, we can either iterate over 'compared' and count the number of steps that @@ -882,7 +887,7 @@ def _compute_directional_score( Implementation -------------- 1. For each index of `baseline`: - - Finds the smallest index of 'comared' that is better. + - Finds the smallest index of 'compared' that is better. - Compute the percentage of `compared` steps that are better. 2. Average the percentages across all 'baseline' indices. @@ -958,6 +963,81 @@ def curve(self) -> ConvergenceCurve: ) +class WinRateSimpleRegretComparator(ConvergenceComparator): + """Comparator method based on win-rate simple regert.""" + + def score(self): + """Computes the normalized simple regert score.""" + baseline_ys, compared_ys = self._standardize_curves() + print('compared_ys:', compared_ys) + print('baseline_ys:', baseline_ys) + return float(compared_ys[-1] > baseline_ys[-1]) + + def curve(self) -> ConvergenceCurve: + """Returns a score curve for each xs.""" + raise NotImplementedError('Curve not yet implemented.') + + +class NormalizedSimpleRegretComparator(ConvergenceComparator): + """Comparator method based on normalized simple regert. + + The simple regret gain ('compared' - 'baseline') is normalized by the + 'baseline' absolute simple regret and then truncated. + """ + + min_value: float = -0.5 + max_value: float = 1.0 + eps: float = 0.0001 + + def score(self): + """Computes the normalized simple regert score.""" + baseline_ys, compared_ys = self._standardize_curves() + d = (compared_ys[-1] - baseline_ys[-1]) / (abs(baseline_ys[-1]) + self.eps) + return min(max(d, self.min_value), self.max_value) + + def curve(self) -> ConvergenceCurve: + """Returns a score curve for each xs.""" + raise NotImplementedError('Curve not yet implemented.') + + +class WinRateSimpleRegretComparatorFactory(ConvergenceComparatorFactory): + """Factory class for WinRateSimpleRegretComparator.""" + + def __call__( + self, + baseline_curve: ConvergenceCurve, + compared_curve: ConvergenceCurve, + baseline_quantile: float = 0.5, + compared_quantile: float = 0.5, + ) -> ConvergenceComparator: + return WinRateSimpleRegretComparator( + baseline_curve=baseline_curve, + compared_curve=compared_curve, + baseline_quantile=baseline_quantile, + compared_quantile=compared_quantile, + name='win_rate_simple_regret', + ) + + +class NormalizedSimpleRegretComparatorFactory(ConvergenceComparatorFactory): + """Factory class for NormalizedSimpleRegretComparator.""" + + def __call__( + self, + baseline_curve: ConvergenceCurve, + compared_curve: ConvergenceCurve, + baseline_quantile: float = 0.5, + compared_quantile: float = 0.5, + ) -> ConvergenceComparator: + return NormalizedSimpleRegretComparator( + baseline_curve=baseline_curve, + compared_curve=compared_curve, + baseline_quantile=baseline_quantile, + compared_quantile=compared_quantile, + name='normalized_simple_regret', + ) + + class WinRateComparatorFactory(ConvergenceComparatorFactory): """Factory class for WinRateComparatorFactory.""" @@ -1015,12 +1095,14 @@ def __call__( compared_curve=compared_curve, baseline_quantile=baseline_quantile, compared_quantile=compared_quantile, - name='%_better', + name='pct_better', ) -class SimpleConvergenceCurveComparatorFactory(ConvergenceComparatorFactory): - """Factory class for SimpleConvergenceCurveCompartor.""" +class StandardizedWinRateConvergenceCurveComparatorFactory( + ConvergenceComparatorFactory +): + """Factory class for StandardizedWinRateConvergenceCurveComparator.""" def __call__( self, @@ -1029,12 +1111,12 @@ def __call__( baseline_quantile: float = 0.5, compared_quantile: float = 0.5, ) -> ConvergenceComparator: - return SimpleConvergenceCurveComparator( + return StandardizedWinRateConvergenceCurveComparator( baseline_curve=baseline_curve, compared_curve=compared_curve, baseline_quantile=baseline_quantile, compared_quantile=compared_quantile, - name='simple', + name='standardized_win_rate', ) diff --git a/vizier/_src/benchmarks/analyzers/convergence_curve_test.py b/vizier/_src/benchmarks/analyzers/convergence_curve_test.py index 862dc09b6..00bb792d5 100644 --- a/vizier/_src/benchmarks/analyzers/convergence_curve_test.py +++ b/vizier/_src/benchmarks/analyzers/convergence_curve_test.py @@ -41,57 +41,67 @@ def test_align_xs_merge_ys_on_different_lengths(self): c1 = convergence.ConvergenceCurve( xs=np.array([1, 2, 3]), ys=np.array([[2, 1, 1]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) c2 = convergence.ConvergenceCurve( xs=np.array([1]), ys=np.array([[3]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) aligned = convergence.ConvergenceCurve.align_xs([c1, c2])[0] np.testing.assert_array_equal(aligned.xs, [1, 2, 3]) - np.testing.assert_array_equal(aligned.ys, - np.array([[2, 1, 1], [3, np.nan, np.nan]])) + np.testing.assert_array_equal( + aligned.ys, np.array([[2, 1, 1], [3, np.nan, np.nan]]) + ) def test_align_xs_merge_ys_on_distinct_xvalues(self): c1 = convergence.ConvergenceCurve( xs=np.array([1, 3, 4]), ys=np.array([[2, 1, 1]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) c2 = convergence.ConvergenceCurve( xs=np.array([2]), ys=np.array([[3]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) aligned = convergence.ConvergenceCurve.align_xs([c1, c2])[0] np.testing.assert_array_equal(aligned.xs.shape, (3,)) - np.testing.assert_array_equal(aligned.ys, - np.array([[2, 1.25, 1], [3, np.nan, np.nan]])) + np.testing.assert_array_equal( + aligned.ys, np.array([[2, 1.25, 1], [3, np.nan, np.nan]]) + ) def test_align_xs_merge_ys_with_interpolation(self): c1 = convergence.ConvergenceCurve( xs=np.array([1, 2, 3, 4, 5]), ys=np.array([[2, 2, 1, 0.5, 0.5], [1, 1, 1, 1, 1]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) aligned = convergence.ConvergenceCurve.align_xs( [c1], interpolate_repeats=True )[0] np.testing.assert_array_equal(aligned.xs, np.array([1, 2, 3, 4, 5])) np.testing.assert_array_equal( - aligned.ys, np.array([[2, 1.5, 1.0, 0.5, 0.5], [1, 1, 1, 1, 1]])) + aligned.ys, np.array([[2, 1.5, 1.0, 0.5, 0.5], [1, 1, 1, 1, 1]]) + ) def test_extrapolate_ys_with_steps(self): c1 = convergence.ConvergenceCurve( xs=np.array([1, 2, 3, 4]), ys=np.array([[2, 1.5, 1, 0.5], [1, 1, 1, 1]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) extra_c1 = convergence.ConvergenceCurve.extrapolate_ys(c1, steps=2) np.testing.assert_array_equal(extra_c1.xs.shape, (6,)) np.testing.assert_array_equal( extra_c1.ys, - np.array([[2, 1.5, 1.0, 0.5, 0.0, -0.5], [1, 1, 1, 1, 1, 1]])) + np.array([[2, 1.5, 1.0, 0.5, 0.0, -0.5], [1, 1, 1, 1, 1, 1]]), + ) def test_align_xs_merge_ys_on_increasing_and_dicreasing_fails(self): c1 = convergence.ConvergenceCurve( @@ -102,7 +112,8 @@ def test_align_xs_merge_ys_on_increasing_and_dicreasing_fails(self): c2 = convergence.ConvergenceCurve( xs=np.array([2]), ys=np.array([[3]]), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) with self.assertRaisesRegex(ValueError, 'increasing'): # pylint: disable=[expression-not-assigned] convergence.ConvergenceCurve.align_xs([c1, c2])[0] @@ -138,11 +149,13 @@ class ConvergenceCurveConverterTest(parameterized.TestCase): @parameterized.named_parameters( ('maximize', pyvizier.ObjectiveMetricGoal.MAXIMIZE, [[2, 2, 3]]), - ('minimize', pyvizier.ObjectiveMetricGoal.MINIMIZE, [[2, 1, 1]])) + ('minimize', pyvizier.ObjectiveMetricGoal.MINIMIZE, [[2, 1, 1]]), + ) def test_convert_basic(self, goal, expected): trials = _gen_trials([2, 1, 3]) generator = convergence.ConvergenceCurveConverter( - pyvizier.MetricInformation(name='', goal=goal)) + pyvizier.MetricInformation(name='', goal=goal) + ) curve = generator.convert(trials) np.testing.assert_array_equal(curve.xs, [1, 2, 3]) np.testing.assert_array_equal(curve.ys, expected) @@ -167,6 +180,12 @@ def test_convert_flip_signs(self, goal, expected): [2, 1, 4, 5], [[2, 2, 5, 5]], ), + ( + pyvizier.ObjectiveMetricGoal.MAXIMIZE, + 2, + [4, 5, 2, 1], + [[5, 5, 5, 5]], + ), ( pyvizier.ObjectiveMetricGoal.MAXIMIZE, 2, @@ -712,16 +731,19 @@ def setUp(self): self._baseline_curve = convergence.ConvergenceCurve( xs=xs, ys=np.exp(np.array([-0.9, -1.0, -1.1]).reshape(3, 1) * xs_t), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) self._worse_curves = convergence.ConvergenceCurve( xs=xs, ys=np.exp(-0.5 * xs_t), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) self._better_curves = convergence.ConvergenceCurve( xs=xs, ys=np.exp(np.array([-1.5, -1.8, -2.0]).reshape(3, 1) * xs_t), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) def test_get_relative_efficiency_curve(self): baseline_length = len(self._baseline_curve.xs) @@ -745,7 +767,8 @@ def test_get_relative_efficiency_flat(self): flat_curve = convergence.ConvergenceCurve( xs=np.array(range(0, 20)), ys=np.array([4.0, 3.0, 2.0] + [1.5] * 17).reshape(1, 20), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) self_eff = convergence.LogEfficiencyConvergenceCurveComparator( baseline_curve=flat_curve, compared_curve=flat_curve ).curve() @@ -758,7 +781,8 @@ def test_get_relative_efficiency_short_curve(self): short_curve = convergence.ConvergenceCurve( xs=self._baseline_curve.xs[:short_length], ys=self._baseline_curve.ys[:, :short_length], - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) max_score = 10.3 short_efficiency = convergence.LogEfficiencyConvergenceCurveComparator( baseline_curve=self._baseline_curve, @@ -807,11 +831,13 @@ def test_efficiency_score_value(self): worse_curves = convergence.ConvergenceCurve( xs=xs, ys=np.exp(-0.5 * xs_t), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) better_curves = convergence.ConvergenceCurve( xs=xs, ys=np.exp(np.array([-1.5, -1.8, -2.0]).reshape(3, 1) * xs_t), - trend=convergence.ConvergenceCurve.YTrend.DECREASING) + trend=convergence.ConvergenceCurve.YTrend.DECREASING, + ) # Efficiency score for exponential curves can be approximated. self.assertGreater( @@ -829,14 +855,15 @@ def test_efficiency_score_value(self): def test_comparator_failure(self): unknown_curve = convergence.ConvergenceCurve( - xs=self._baseline_curve.xs, ys=self._baseline_curve.ys) + xs=self._baseline_curve.xs, ys=self._baseline_curve.ys + ) with self.assertRaisesRegex(ValueError, 'increasing or decreasing'): convergence.LogEfficiencyConvergenceCurveComparator( baseline_curve=unknown_curve, compared_curve=self._baseline_curve ) -class SimpleConvergenceComparatorTest(parameterized.TestCase): +class StandardizedWinRateConvergenceComparatorTest(parameterized.TestCase): @parameterized.parameters( { @@ -885,7 +912,7 @@ def test_score_one_curve_above_other(self, ys1, ys2, res, cutoff): curve2 = convergence.ConvergenceCurve( xs=xs2, ys=ys2, trend=convergence.ConvergenceCurve.YTrend.INCREASING ) - comparator = convergence.SimpleConvergenceCurveComparator( + comparator = convergence.StandardizedWinRateConvergenceCurveComparator( curve1, curve2, xs_cutoff=cutoff ) self.assertEqual(comparator.score(), res) @@ -925,5 +952,79 @@ def test_score(self, ys1, ys2, res): self.assertEqual(comparator.score(), res) +class NormalizedSimpleRegretConvergenceComparatorTest(parameterized.TestCase): + + @parameterized.parameters( + { + 'ys1': np.array([[1, 4, 8, 10, 12]]), + 'ys2': np.array([[2, 5, 6, 8, 10]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': (10 - 12) / 12, + }, + { + 'ys1': np.array([[1, 4, 8, 10, 12]]), + 'ys2': np.array([[2, 5, 6, 8, 1000]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': 1.0, + }, + { + 'ys1': np.array([[1, 4, 8, 10, 1000]]), + 'ys2': np.array([[2, 5, 6, 8, 10]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': -0.5, + }, + { + 'ys1': np.array([[11, 5, 3, 1]]), + 'ys2': np.array([[130, 4, 2, 0.5]]), + 'trend': convergence.ConvergenceCurve.YTrend.DECREASING, + 'res': (-0.5 - (-1)) / 1, + }, + ) + def test_score(self, ys1, ys2, trend, res): + xs1 = np.arange(ys1.shape[1]) + xs2 = np.arange(ys2.shape[1]) + curve1 = convergence.ConvergenceCurve(xs=xs1, ys=ys1, trend=trend) + curve2 = convergence.ConvergenceCurve(xs=xs2, ys=ys2, trend=trend) + comparator = convergence.NormalizedSimpleRegretComparator(curve1, curve2) + self.assertAlmostEqual(comparator.score(), res, delta=0.0001) + + +class WinRateSimpleRegretConvergenceComparatorTest(parameterized.TestCase): + + @parameterized.parameters( + { + 'ys1': np.array([[1, 4, 8, 10, 12]]), + 'ys2': np.array([[2, 5, 6, 8, 10]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': 0.0, + }, + { + 'ys1': np.array([[1, 4, 8, 10, 12]]), + 'ys2': np.array([[2, 5, 6, 8, 1000]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': 1.0, + }, + { + 'ys1': np.array([[1, 4, 8, 10, 1000]]), + 'ys2': np.array([[2, 5, 6, 8, 10]]), + 'trend': convergence.ConvergenceCurve.YTrend.INCREASING, + 'res': 0.0, + }, + { + 'ys1': np.array([[11, 5, 3, 1]]), + 'ys2': np.array([[130, 4, 2, 0.5]]), + 'trend': convergence.ConvergenceCurve.YTrend.DECREASING, + 'res': 1.0, + }, + ) + def test_score(self, ys1, ys2, trend, res): + xs1 = np.arange(ys1.shape[1]) + xs2 = np.arange(ys2.shape[1]) + curve1 = convergence.ConvergenceCurve(xs=xs1, ys=ys1, trend=trend) + curve2 = convergence.ConvergenceCurve(xs=xs2, ys=ys2, trend=trend) + comparator = convergence.WinRateSimpleRegretComparator(curve1, curve2) + self.assertEqual(comparator.score(), res) + + if __name__ == '__main__': absltest.main()