diff --git a/CHANGELOG.md b/CHANGELOG.md index 221273ac..7d9d2ce9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ * Features * Added multi-armed bandit endpoint. (#255) - * Implemented epsilon-greedy. + * Implemented epsilon-greedy. (#255) + * Implemented epsilon-first. (#335) * Added support for the L-BFGS-B optimizer. (#296) * Changes diff --git a/docs/bandit.rst b/docs/bandit.rst index d457b30d..1fc7a3bd 100644 --- a/docs/bandit.rst +++ b/docs/bandit.rst @@ -59,12 +59,12 @@ There are many different policies for this problem: We have implemented the following policies in our package: +* :mod:`~moe.bandit.epsilon_first.EpsilonFirst` * :mod:`~moe.bandit.epsilon_greedy.EpsilonGreedy` Other policies include: * Weighted random choice -* `Epsilon-first`_ * `Epsilon-decreasing`_ \* * `UCB-exp (Upper Confidence Bound)`_ \* * `UCB-tuned`_ \* @@ -73,7 +73,6 @@ Other policies include: \* Regret bounded as :math:`t \rightarrow \infty` -.. _Epsilon-first: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies .. _Epsilon-decreasing: http://en.wikipedia.org/wiki/Multi-armed_bandit#Semi-uniform_strategies .. _UCB-exp (Upper Confidence Bound): http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf .. _UCB-tuned: http://moodle.technion.ac.il/pluginfile.php/192340/mod_resource/content/0/UCB.pdf diff --git a/moe/bandit/__init__.py b/moe/bandit/__init__.py index 52591602..00c7d99e 100644 --- a/moe/bandit/__init__.py +++ b/moe/bandit/__init__.py @@ -6,6 +6,8 @@ * :mod:`moe.bandit.constant`: some default configuration values for ``optimal_learning`` components * :mod:`moe.bandit.data_containers`: :class:`~moe.bandit.data_containers.SampleArm` and :class:`~moe.bandit.data_containers.HistoricalData` containers for passing data to the ``bandit`` library +* :mod:`moe.bandit.epsilon_first`: :class:`~moe.bandit.epsilon_first.EpsilonFirst` + object for allocating bandit arms and choosing the winning arm based on epsilon-first policy. * :mod:`moe.bandit.epsilon_greedy`: :class:`~moe.bandit.epsilon_greedy.EpsilonGreedy` object for allocating bandit arms and choosing the winning arm based on epsilon-greedy policy. * :mod:`moe.bandit.epsilon`: a base :class:`~moe.bandit.epsilon.Epsilon` diff --git a/moe/bandit/constant.py b/moe/bandit/constant.py index 64cd8686..7d58454c 100644 --- a/moe/bandit/constant.py +++ b/moe/bandit/constant.py @@ -8,10 +8,19 @@ } } -DEFAULT_EPSILON = 0.05 - # Epsilon subtypes +EPSILON_SUBTYPE_FIRST = 'first' EPSILON_SUBTYPE_GREEDY = 'greedy' EPSILON_SUBTYPES = [ + EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY, ] + +# Default Hyperparameters +DEFAULT_EPSILON = 0.05 +DEFAULT_TOTAL_SAMPLES = 100 +EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS = { + EPSILON_SUBTYPE_FIRST: {'epsilon': DEFAULT_EPSILON, + 'total_samples': DEFAULT_TOTAL_SAMPLES}, + EPSILON_SUBTYPE_GREEDY: {'epsilon': DEFAULT_EPSILON}, + } diff --git a/moe/bandit/epsilon.py b/moe/bandit/epsilon.py index 09eb47eb..8de04f6c 100644 --- a/moe/bandit/epsilon.py +++ b/moe/bandit/epsilon.py @@ -4,16 +4,17 @@ See :class:`moe.bandit.interfaces.bandit_interface` for further details on bandit. """ - import copy +import numpy + from moe.bandit.constant import DEFAULT_EPSILON from moe.bandit.interfaces.bandit_interface import BanditInterface class Epsilon(BanditInterface): - r"""Implementation of the constructor of Epsilon. Abstract method allocate_arms implemented in subclass. + r"""Implementation of the constructor and common methods of Epsilon. Abstract method allocate_arms implemented in subclass. A class to encapsulate the computation of bandit epsilon. Epsilon is the sole hyperparameter in this class. Subclasses may contain other hyperparameters. @@ -31,7 +32,7 @@ def __init__( """Construct an Epsilon object. :param historical_info: a dictionary of arms sampled - :type historical_info: dictionary of (String(), SingleArm()) pairs + :type historical_info: dictionary of (String(), SampleArm()) pairs (see :class:`moe.bandit.data_containers.SampleArm` for more details) :param subtype: subtype of the epsilon bandit algorithm (default: None) :type subtype: String() :param epsilon: epsilon hyperparameter for the epsilon bandit algorithm (default: :const:`~moe.bandit.constant.DEFAULT_EPSILON`) @@ -41,3 +42,34 @@ def __init__( self._historical_info = copy.deepcopy(historical_info) self._subtype = subtype self._epsilon = epsilon + + @staticmethod + def get_winning_arm_names(arms_sampled): + r"""Compute the set of winning arm names based on the given ``arms_sampled``.. + + Throws an exception when arms_sampled is empty. + Implementers of this interface will never override this method. + + :param arms_sampled: a dictionary of arm name to :class:`moe.bandit.data_containers.SampleArm` + :type arms_sampled: dictionary of (String(), SampleArm()) pairs + :return: of set of names of the winning arms + :rtype: frozenset(String()) + :raise: ValueError when ``arms_sampled`` are empty. + + """ + if not arms_sampled: + raise ValueError('arms_sampled is empty!') + + avg_payoff_arm_name_list = [] + for arm_name, sampled_arm in arms_sampled.iteritems(): + avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 + avg_payoff_arm_name_list.append((avg_payoff, arm_name)) + avg_payoff_arm_name_list.sort(reverse=True) + + best_payoff, _ = max(avg_payoff_arm_name_list) + # Filter out arms that have average payoff less than the best payoff + winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) + # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. + _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) + winning_arm_names = frozenset(winning_arm_name_list) + return winning_arm_names diff --git a/moe/bandit/epsilon_first.py b/moe/bandit/epsilon_first.py new file mode 100644 index 00000000..d5e48b1c --- /dev/null +++ b/moe/bandit/epsilon_first.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +"""Classes (Python) to compute the Bandit Epsilon-First arm allocation and choosing the arm to pull next. + +See :class:`moe.bandit.epsilon.Epsilon` for further details on bandit. + +""" +from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST +from moe.bandit.epsilon import Epsilon + + +class EpsilonFirst(Epsilon): + + r"""Implementation of EpsilonFirst. + + A class to encapsulate the computation of bandit epsilon first. + + total_samples is the total number of samples (number to sample + number sampled) + number sampled is calculated by summing up total from each arm sampled. + total_samples is T from :doc:`bandit`. + + See superclass :class:`moe.bandit.epsilon.Epsilon` for further details. + + """ + + def __init__( + self, + historical_info, + epsilon=DEFAULT_EPSILON, + total_samples=DEFAULT_TOTAL_SAMPLES, + ): + """Construct an EpsilonFirst object. See superclass :class:`moe.bandit.epsilon.Epsilon` for details. + + total_samples is the total number of samples (number to sample + number sampled) + number sampled is calculated by summing up total from each arm sampled. + total_samples is T from :doc:`bandit`. + + """ + super(EpsilonFirst, self).__init__( + historical_info=historical_info, + subtype=EPSILON_SUBTYPE_FIRST, + epsilon=epsilon, + ) + self._total_samples = total_samples + + def allocate_arms(self): + r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameters in ``hyperparameter_info``. + + Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter info. + + Works with k-armed bandits (k >= 1). + + The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions + + This method starts with a pure exploration phase, followed by a pure exploitation phase. + If we have a total of T trials, the first :math:`\epsilon` T trials, we only explore. + After that, we only exploit (t = :math:`\epsilon` T, :math:`\epsilon` T + 1, ..., T). + + This method will pull a random arm in the exploration phase. + Then this method will pull the optimal arm (best expected return) in the exploitation phase. + + In case of a tie in the exploitation phase, the method will split the allocation among the optimal arms. + + For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 + (``{win:10, lose:10, total:20}``) + and a new arm (arm3, average payoff is 0 and total is 0). + + Let the epsilon :math:`\epsilon` be 0.1. + + The allocation depends on which phase we are in: + + *Case 1: T = 50* + + Recall that T = number to sample + number sampled. number sampled :math:`= 20 + 20 + 0 = 40`. + So we are on trial #41. We explore the first :math:`\epsilon T = 0.1 * 50 = 5` trials + and thus we are in the exploitation phase. We split the allocation between the optimal arms arm1 and arm2. + + ``{arm1: 0.5, arm2: 0.5, arm3: 0.0}`` + + *Case 2: T = 500* + + We explore the first :math:`\epsilon T = 0.1 * 500 = 50` trials. + Since we are on trail #41, we are in the exploration phase. We choose arms randomly: + + ``{arm1: 0.33, arm2: 0.33, arm3: 0.33}`` + + :return: the dictionary of (arm, allocation) key-value pairs + :rtype: a dictionary of (String(), float64) pairs + :raise: ValueError when ``sample_arms`` are empty. + + """ + arms_sampled = self._historical_info.arms_sampled + num_arms = self._historical_info.num_arms + + if not arms_sampled: + raise ValueError('sample_arms is empty!') + + num_sampled = sum([sampled_arm.total for sampled_arm in arms_sampled.itervalues()]) + # Exploration phase, trials 1,2,..., epsilon * T + # Allocate equal probability to all arms + if num_sampled < self._total_samples * self._epsilon: + equal_allocation = 1.0 / num_arms + arms_to_allocations = {} + for arm_name in arms_sampled.iterkeys(): + arms_to_allocations[arm_name] = equal_allocation + return arms_to_allocations + + # Exploitation phase, trials epsilon * T+1, ..., T + winning_arm_names = self.get_winning_arm_names(arms_sampled) + + num_winning_arms = len(winning_arm_names) + arms_to_allocations = {} + + winning_arm_allocation = 1.0 / num_winning_arms + # Split allocation among winning arms, all other arms get allocation of 0. + for arm_name in arms_sampled.iterkeys(): + arms_to_allocations[arm_name] = winning_arm_allocation if arm_name in winning_arm_names else 0.0 + + return arms_to_allocations diff --git a/moe/bandit/epsilon_greedy.py b/moe/bandit/epsilon_greedy.py index 2d8843e0..d1dfa932 100644 --- a/moe/bandit/epsilon_greedy.py +++ b/moe/bandit/epsilon_greedy.py @@ -4,8 +4,6 @@ See :class:`moe.bandit.epsilon.Epsilon` for further details on this bandit. """ -import numpy - from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY from moe.bandit.epsilon import Epsilon @@ -62,24 +60,17 @@ def allocate_arms(self): :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (String(), float64) pairs + :raise: ValueError when ``sample_arms`` are empty. """ arms_sampled = self._historical_info.arms_sampled num_arms = self._historical_info.num_arms if not arms_sampled: raise ValueError('sample_arms are empty!') - avg_payoff_arm_name_list = [] - for arm_name, sampled_arm in arms_sampled.iteritems(): - avg_payoff = numpy.float64(sampled_arm.win - sampled_arm.loss) / sampled_arm.total if sampled_arm.total > 0 else 0 - avg_payoff_arm_name_list.append((avg_payoff, arm_name)) - - best_payoff, _ = max(avg_payoff_arm_name_list) - # Filter out arms that have average payoff less than the best payoff - winning_arm_payoff_name_list = filter(lambda avg_payoff_arm_name: avg_payoff_arm_name[0] == best_payoff, avg_payoff_arm_name_list) - # Extract a list of winning arm names from a list of (average payoff, arm name) tuples. - _, winning_arm_name_list = map(list, zip(*winning_arm_payoff_name_list)) - - num_winning_arms = len(winning_arm_name_list) + + winning_arm_names = self.get_winning_arm_names(arms_sampled) + + num_winning_arms = len(winning_arm_names) epsilon_allocation = self._epsilon / num_arms arms_to_allocations = {} @@ -89,7 +80,7 @@ def allocate_arms(self): # With probability 1-epsilon, split allocation among winning arms. winning_arm_allocation = (1.0 - self._epsilon) / num_winning_arms - for winning_arm_name in winning_arm_name_list: + for winning_arm_name in winning_arm_names: arms_to_allocations[winning_arm_name] += winning_arm_allocation return arms_to_allocations diff --git a/moe/bandit/linkers.py b/moe/bandit/linkers.py index e5727dc3..a7a5c8bf 100644 --- a/moe/bandit/linkers.py +++ b/moe/bandit/linkers.py @@ -2,7 +2,8 @@ """Links between the implementations of bandit algorithms.""" from collections import namedtuple -from moe.bandit.constant import EPSILON_SUBTYPE_GREEDY +from moe.bandit.constant import EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY +from moe.bandit.epsilon_first import EpsilonFirst from moe.bandit.epsilon_greedy import EpsilonGreedy # Epsilon @@ -16,6 +17,10 @@ EPSILON_SUBTYPES_TO_EPSILON_METHODS = { + EPSILON_SUBTYPE_FIRST: EpsilonMethod( + subtype=EPSILON_SUBTYPE_FIRST, + bandit_class=EpsilonFirst, + ), EPSILON_SUBTYPE_GREEDY: EpsilonMethod( subtype=EPSILON_SUBTYPE_GREEDY, bandit_class=EpsilonGreedy, diff --git a/moe/tests/bandit/__init__.py b/moe/tests/bandit/__init__.py index eccf36b5..656ba3d7 100644 --- a/moe/tests/bandit/__init__.py +++ b/moe/tests/bandit/__init__.py @@ -21,7 +21,9 @@ **Files in this package** * :mod:`moe.tests.bandit.bandit_test_case`: base test case for bandit tests with a simple integration test case +* :mod:`moe.tests.bandit.epsilon_first_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonFirst` * :mod:`moe.tests.bandit.epsilon_greedy_test`: tests for :mod:`moe.bandit.epsilon_greedy.EpsilonGreedy` +* :mod:`moe.tests.bandit.epsilon_test`: tests for :mod:`moe.bandit.epsilon_greedy.Epsilon` * :mod:`moe.tests.bandit.epsilon_test_case`: test cases for classes under :mod:`moe.bandit.epsilon.Epsilon` * :mod:`moe.tests.bandit.linkers_test`: tests for :mod:`moe.bandit.linkers` diff --git a/moe/tests/bandit/epsilon_first_test.py b/moe/tests/bandit/epsilon_first_test.py new file mode 100644 index 00000000..9c1265b9 --- /dev/null +++ b/moe/tests/bandit/epsilon_first_test.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +"""Test epsilon-first bandit implementation. + +Test default values with one, two, and three arms. +Test one arm with various epsilon values. + +""" +import testify as T + +from moe.bandit.epsilon_first import EpsilonFirst +from moe.tests.bandit.epsilon_test_case import EpsilonTestCase + + +class EpsilonFirstTest(EpsilonTestCase): + + """Verify that different epsilon values and historical infos return correct results.""" + + bandit_class = EpsilonFirst + + total_samples_to_test = [1, 10, 100] + + def test_init_default(self): + """Verify that default values do not throw and error. This is purely an integration test.""" + self._test_init_default() + + def test_one_arm(self): + """Check that the one-arm case always returns the given arm as the winning arm and the allocation is 1.0.""" + for epsilon in self.epsilons_to_test: + for total_samples in self.total_samples_to_test: + bandit = self.bandit_class(self.one_arm_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0}) + T.assert_equal(bandit.choose_arm(), "arm1") + + def test_two_new_arms(self): + """Check that the two-new-arms case always allocate each arm equally (the allocation is 0.5 for both arms). This tests num_winning_arms == num_arms > 1.""" + for epsilon in self.epsilons_to_test: + for total_samples in self.total_samples_to_test: + bandit = self.bandit_class(self.two_new_arms_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) + + def test_two_arms_epsilon_zero(self): + """Check that the two-arms case with zero epsilon (always exploit) always allocate arm1:1.0 and arm2:0.0 when average payoffs are arm1:1.0 and arm2:0.0.""" + epsilon = 0.0 + bandit = self.bandit_class(self.two_arms_test_case, epsilon) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0}) + T.assert_equal(bandit.choose_arm(), "arm1") + + def test_two_arms_epsilon_one(self): + """Check that the two-arms case with one epsilon (always explore) always allocate arm1:0.5 and arm2:0.5 when average payoffs are arm1:1.0 and arm2:0.0.""" + epsilon = 1.0 + bandit = self.bandit_class(self.two_arms_test_case, epsilon) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5}) + + def test_three_arms_explore(self): + """Check that the three-arms cases with integer and float payoffs in exploration phase return the expected arm allocations.""" + epsilon = 0.7 + total_samples = 10 + equal_allocation = 1.0 / 3 + for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: + bandit = self.bandit_class(historical_info, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": equal_allocation, "arm2": equal_allocation, "arm3": equal_allocation}) + + def test_three_arms_exploit(self): + """Check that the three-arms cases with integer and float payoffs in exploitation phase return the expected arm allocations.""" + epsilon = 0.5 + total_samples = 10 + for historical_info in [self.three_arms_test_case, self.three_arms_float_payoffs_test_case]: + bandit = self.bandit_class(historical_info, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 1.0, "arm2": 0.0, "arm3": 0.0}) + + def test_three_arms_exploit_two_winners(self): + """Check that the three-arms cases with two winners in exploitation phase return the expected arm allocations. This tests num_arms > num_winning_arms > 1.""" + epsilon = 0.5 + total_samples = 10 + bandit = self.bandit_class(self.three_arms_two_winners_test_case, epsilon, total_samples) + T.assert_dicts_equal(bandit.allocate_arms(), {"arm1": 0.5, "arm2": 0.5, "arm3": 0.0}) + + +if __name__ == "__main__": + T.run() diff --git a/moe/tests/bandit/epsilon_test.py b/moe/tests/bandit/epsilon_test.py new file mode 100644 index 00000000..0ba37d57 --- /dev/null +++ b/moe/tests/bandit/epsilon_test.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +"""Test epsilon bandit implementation (functions common to epsilon bandit). + +Test functions in :class:`moe.bandit.epsilon.Epsilon` + +""" +import logging + +import testify as T + +from moe.bandit.epsilon import Epsilon +from moe.tests.bandit.epsilon_test_case import EpsilonTestCase + + +class EpsilonTest(EpsilonTestCase): + + """Verify that different sample_arms return correct results.""" + + @T.class_setup + def disable_logging(self): + """Disable logging (for the duration of this test case).""" + logging.disable(logging.CRITICAL) + + @T.class_teardown + def enable_logging(self): + """Re-enable logging (so other test cases are unaffected).""" + logging.disable(logging.NOTSET) + + def test_empty_arm_invalid(self): + """Test empty ``sample_arms`` causes an ValueError.""" + T.assert_raises(ValueError, Epsilon.get_winning_arm_names, {}) + + def test_two_new_arms(self): + """Check that the two-new-arms case always returns both arms as winning arms. This tests num_winning_arms == num_arms > 1.""" + T.assert_sets_equal(Epsilon.get_winning_arm_names(self.two_new_arms_test_case.arms_sampled), frozenset(["arm1", "arm2"])) + + def test_three_arms_two_winners(self): + """Check that the three-arms cases with two winners return the expected winning arms. This tests num_arms > num_winning_arms > 1.""" + T.assert_sets_equal(Epsilon.get_winning_arm_names(self.three_arms_two_winners_test_case.arms_sampled), frozenset(["arm1", "arm2"])) + + +if __name__ == "__main__": + T.run() diff --git a/moe/tests/views/rest/bandit_epsilon_test.py b/moe/tests/views/rest/bandit_epsilon_test.py index f6b802b3..3caf8f56 100644 --- a/moe/tests/views/rest/bandit_epsilon_test.py +++ b/moe/tests/views/rest/bandit_epsilon_test.py @@ -6,7 +6,7 @@ import testify as T -from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY +from moe.bandit.constant import EPSILON_SUBTYPES, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY from moe.tests.bandit.bandit_test_case import BanditTestCase from moe.tests.views.rest_test_case import RestTestCase from moe.views.constant import BANDIT_EPSILON_MOE_ROUTE @@ -17,24 +17,22 @@ class TestBanditEpsilonViews(BanditTestCase, RestTestCase): """Integration test for the /bandit/epsilon endpoint.""" - def _build_json_payload(self, subtype, historical_info, epsilon): + def _build_json_payload(self, subtype, historical_info, hyperparameter_info): """Create a json_payload to POST to the /bandit/epsilon endpoint with all needed info.""" dict_to_dump = { 'subtype': subtype, 'historical_info': historical_info.json_payload(), - 'hyperparameter_info': { - 'epsilon': epsilon, - }, + 'hyperparameter_info': hyperparameter_info, } return json.dumps(dict_to_dump) - def test_hyperparameters_passed_through(self): - """Test that the hyperparameters get passed through to the endpoint.""" + def test_epsilon_greedy_hyperparameters_passed_through(self): + """Test that the hyperparameters get passed through to the epsilon-greedy endpoint.""" historical_info = self.one_arm_test_case # Test default test parameters get passed through - json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON)) + json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_GREEDY])) request = pyramid.testing.DummyRequest(post=json_payload) request.json_body = json_payload @@ -53,38 +51,65 @@ def test_hyperparameters_passed_through(self): T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + def test_epsilon_first_hyperparameters_passed_through(self): + """Test that the hyperparameters get passed through to the epsilon-first endpoint.""" + historical_info = self.one_arm_test_case + + # Test default test parameters get passed through + json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_FIRST, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[EPSILON_SUBTYPE_FIRST])) + + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() + + T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + + # Test an arbitrary epsilon and total_tamples get passed through + json_payload['hyperparameter_info']['epsilon'] = 1.0 + json_payload['hyperparameter_info']['total_samples'] = 20000 + + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() + + T.assert_dicts_equal(params['hyperparameter_info'], json_payload['hyperparameter_info']) + def test_historical_info_passed_through(self): """Test that the historical info get passed through to the endpoint.""" - for historical_info in self.historical_infos_to_test: - # Test default test parameters get passed through - json_payload = json.loads(self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON)) + for subtype in EPSILON_SUBTYPES: + for historical_info in self.historical_infos_to_test: + # Test default test parameters get passed through + json_payload = json.loads(self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype])) - request = pyramid.testing.DummyRequest(post=json_payload) - request.json_body = json_payload - view = BanditEpsilonView(request) - params = view.get_params_from_request() + request = pyramid.testing.DummyRequest(post=json_payload) + request.json_body = json_payload + view = BanditEpsilonView(request) + params = view.get_params_from_request() - T.assert_dicts_equal(params['historical_info'], json_payload['historical_info']) + T.assert_dicts_equal(params['historical_info'], json_payload['historical_info']) def test_interface_returns_as_expected(self): """Integration test for the /bandit/epsilon endpoint.""" moe_route = BANDIT_EPSILON_MOE_ROUTE - for historical_info in self.historical_infos_to_test: - json_payload = self._build_json_payload(EPSILON_SUBTYPE_GREEDY, historical_info, DEFAULT_EPSILON) - arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()]) - resp = self.testapp.post(moe_route.endpoint, json_payload) - resp_schema = BanditEpsilonResponse() - resp_dict = resp_schema.deserialize(json.loads(resp.body)) - resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()]) - T.assert_sets_equal(arm_names, resp_arm_names) - # The allocations should be in range [0, 1] - # The sum of all allocations should be 1.0. - total_allocation = 0 - for allocation in resp_dict['arm_allocations'].itervalues(): - T.assert_gte(allocation, 0) - T.assert_lte(allocation, 1) - total_allocation += allocation - T.assert_equal(total_allocation, 1.0) + for subtype in EPSILON_SUBTYPES: + for historical_info in self.historical_infos_to_test: + json_payload = self._build_json_payload(subtype, historical_info, EPSILON_SUBTYPES_TO_DEFAULT_HYPERPARAMETER_INFOS[subtype]) + arm_names = set([arm_name for arm_name in historical_info.arms_sampled.iterkeys()]) + resp = self.testapp.post(moe_route.endpoint, json_payload) + resp_schema = BanditEpsilonResponse() + resp_dict = resp_schema.deserialize(json.loads(resp.body)) + resp_arm_names = set([arm_name for arm_name in resp_dict['arm_allocations'].iterkeys()]) + T.assert_sets_equal(arm_names, resp_arm_names) + # The allocations should be in range [0, 1] + # The sum of all allocations should be 1.0. + total_allocation = 0 + for allocation in resp_dict['arm_allocations'].itervalues(): + T.assert_gte(allocation, 0) + T.assert_lte(allocation, 1) + total_allocation += allocation + T.assert_equal(total_allocation, 1.0) if __name__ == "__main__": diff --git a/moe/views/__init__.py b/moe/views/__init__.py index cb01da4f..dd3babda 100644 --- a/moe/views/__init__.py +++ b/moe/views/__init__.py @@ -5,7 +5,9 @@ * :mod:`moe.views.frontend`: the frontend code * :mod:`moe.views.rest`: various REST endpoints for internal gaussian process information - * :mod:`moe.views.gp_pretty_view`: base view for all REST endpoints + * :mod:`moe.views.pretty_view`: base view for all REST endpoints + * :mod:`moe.views.bandit_pretty_view`: base view for all bandit REST endpoints + * :mod:`moe.views.gp_pretty_view`: base view for all GP REST endpoints * :mod:`moe.views.optimizable_gp_pretty_view`: base view for REST endpoints that require optimization * :mod:`moe.views.gp_next_points_pretty_view`: base view for getting the next best points to sample * :mod:`moe.views.schemas`: schemas used to deserialize/serialize inputs/outputs in the REST interface diff --git a/moe/views/rest/__init__.py b/moe/views/rest/__init__.py index d05ed654..ab954c9f 100644 --- a/moe/views/rest/__init__.py +++ b/moe/views/rest/__init__.py @@ -45,4 +45,14 @@ .. http:get:: /gp/next_points/kriging/pretty +**Bandit endpoints:** + + * :mod:`~moe.views.rest.bandit_epsilon` + + .. http:post:: /bandit/epsilon + + Calculates the arm allocations and the best arm to pull next, given subtype, historical data, hyperparameters. + + .. http:get:: /bandit/epsilon/pretty + """ diff --git a/moe/views/rest/bandit_epsilon.py b/moe/views/rest/bandit_epsilon.py index bffc52f2..130c8ea1 100644 --- a/moe/views/rest/bandit_epsilon.py +++ b/moe/views/rest/bandit_epsilon.py @@ -6,6 +6,8 @@ 1. pretty and backend views """ +import copy + from pyramid.view import view_config from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY @@ -13,6 +15,7 @@ from moe.views.bandit_pretty_view import BanditPrettyView from moe.views.constant import BANDIT_EPSILON_ROUTE_NAME, BANDIT_EPSILON_PRETTY_ROUTE_NAME from moe.views.pretty_view import PRETTY_RENDERER +from moe.views.schemas.bandit_pretty_view import BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES from moe.views.schemas.rest.bandit_epsilon import BanditEpsilonRequest, BanditEpsilonResponse from moe.views.utils import _make_bandit_historical_info_from_params @@ -33,6 +36,38 @@ class BanditEpsilonView(BanditPrettyView): "hyperparameter_info": {"epsilon": DEFAULT_EPSILON}, } + def get_params_from_request(self): + """Return the deserialized parameters from the json_body of a request. + + We explicitly pull out the ``hyparparameter_info`` and use it to deserialize and validate + the other parameters (epsilon, total_samples). + + This is necessary because we have different hyperparameters for + different subtypes. + + :returns: A deserialized self.request_schema object + :rtype: dict + + """ + # First we get the standard params (not including historical info) + params = super(BanditEpsilonView, self).get_params_from_request() + + # colander deserialized results are READ-ONLY. We will potentially be overwriting + # fields of ``params['hyperparameter_info']``, so we need to copy it first. + params['hyperparameter_info'] = copy.deepcopy(params['hyperparameter_info']) + + # Find the schema class that corresponds to the ``subtype`` of the request + # hyperparameter_info has *not been validated yet*, so we need to validate manually. + schema_class = BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES[params['subtype']]() + + # Deserialize and validate the parameters + validated_hyperparameter_info = schema_class.deserialize(params['hyperparameter_info']) + + # Put the now validated hyperparameter info back into the params dictionary to be consumed by the view + params['hyperparameter_info'] = validated_hyperparameter_info + + return params + @view_config(route_name=_pretty_route_name, renderer=PRETTY_RENDERER) def pretty_view(self): """A pretty, browser interactive view for the interface. Includes form request and response. @@ -61,9 +96,8 @@ def bandit_epsilon_view(self): subtype = params.get('subtype') historical_info = _make_bandit_historical_info_from_params(params) - epsilon = params.get('hyperparameter_info').get('epsilon') - bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, epsilon=epsilon) + bandit_class = EPSILON_SUBTYPES_TO_EPSILON_METHODS[subtype].bandit_class(historical_info=historical_info, **params.get('hyperparameter_info')) return self.form_response({ 'endpoint': self._route_name, diff --git a/moe/views/schemas/__init__.py b/moe/views/schemas/__init__.py index cc8820f7..b217ea6b 100644 --- a/moe/views/schemas/__init__.py +++ b/moe/views/schemas/__init__.py @@ -3,6 +3,7 @@ Contains: + * :mod:`moe.views.schemas.bandit_pretty_view`: common schemas for the ``bandit_*`` endpoints * :mod:`moe.views.schemas.base_schemas`: basic building-block schemas for use in other, more complex schemas * :mod:`moe.views.schemas.gp_next_points_pretty_view`: common schemas for the ``gp_next_points_*`` endpoints * :mod:`moe.views.rest`: schemas for specific REST endpoints diff --git a/moe/views/schemas/bandit_pretty_view.py b/moe/views/schemas/bandit_pretty_view.py index e101977a..d6c337f4 100644 --- a/moe/views/schemas/bandit_pretty_view.py +++ b/moe/views/schemas/bandit_pretty_view.py @@ -2,7 +2,7 @@ """Base schemas for creating SampledArm and allocations for bandit endpoints along with base request/response schema components.""" import colander -from moe.bandit.constant import DEFAULT_EPSILON +from moe.bandit.constant import DEFAULT_EPSILON, DEFAULT_TOTAL_SAMPLES, EPSILON_SUBTYPE_FIRST, EPSILON_SUBTYPE_GREEDY from moe.bandit.data_containers import SampleArm from moe.views.schemas import base_schemas @@ -65,7 +65,32 @@ def validator(self, node, cstruct): SampleArm(sample_arm['win'], sample_arm['loss'], sample_arm['total']) -class BanditEpsilonHyperparameterInfo(base_schemas.StrictMappingSchema): +class BanditEpsilonFirstHyperparameterInfo(base_schemas.StrictMappingSchema): + + """The hyperparameter info needed for every Bandit Epsilon-First request. + + **Required fields** + + :ivar epsilon: (*0.0 <= float64 <= 1.0*) epsilon value for epsilon-first bandit. This strategy pulls the optimal arm + (best expected return) with if it is in exploitation phase (number sampled > epsilon * total_samples). Otherwise a random arm is pulled (exploration). + :ivar total_samples: (*int >= 0*) total number of samples for epsilon-first bandit. total_samples is T from :doc:`bandit`. + + """ + + epsilon = colander.SchemaNode( + colander.Float(), + validator=colander.Range(min=0.0, max=1.0), + missing=DEFAULT_EPSILON, + ) + + total_samples = colander.SchemaNode( + colander.Int(), + validator=colander.Range(min=0), + missing=DEFAULT_TOTAL_SAMPLES, + ) + + +class BanditEpsilonGreedyHyperparameterInfo(base_schemas.StrictMappingSchema): """The hyperparameter info needed for every Bandit Epsilon request. @@ -83,6 +108,14 @@ class BanditEpsilonHyperparameterInfo(base_schemas.StrictMappingSchema): ) +#: Mapping from bandit epsilon subtypes (:const:`moe.bandit.constant.EPSILON_SUBTYPES`) to +#: hyperparameter info schemas, e.g., :class:`moe.views.schemas.bandit_pretty_view.BanditEpsilonFirstHyperparameterInfo`. +BANDIT_EPSILON_SUBTYPES_TO_HYPERPARAMETER_INFO_SCHEMA_CLASSES = { + EPSILON_SUBTYPE_FIRST: BanditEpsilonFirstHyperparameterInfo, + EPSILON_SUBTYPE_GREEDY: BanditEpsilonGreedyHyperparameterInfo, + } + + class BanditHistoricalInfo(base_schemas.StrictMappingSchema): """The Bandit historical info needed for every request. diff --git a/moe/views/schemas/rest/bandit_epsilon.py b/moe/views/schemas/rest/bandit_epsilon.py index 9ac3e00e..bf6dddd0 100644 --- a/moe/views/schemas/rest/bandit_epsilon.py +++ b/moe/views/schemas/rest/bandit_epsilon.py @@ -4,7 +4,7 @@ from moe.bandit.constant import EPSILON_SUBTYPE_GREEDY, EPSILON_SUBTYPES from moe.views.schemas import base_schemas -from moe.views.schemas.bandit_pretty_view import ArmAllocations, BanditEpsilonHyperparameterInfo, BanditHistoricalInfo +from moe.views.schemas.bandit_pretty_view import ArmAllocations, BanditHistoricalInfo class BanditEpsilonRequest(base_schemas.StrictMappingSchema): @@ -18,7 +18,7 @@ class BanditEpsilonRequest(base_schemas.StrictMappingSchema): **Optional fields** :ivar subtype: (*str*) subtype of the epsilon bandit algorithm (default: greedy) - :ivar hyperparameter_info: (:class:`moe.views.schemas.bandit_pretty_view.BanditEpsilonHyperparameterInfo`) dict of hyperparameter information + :ivar hyperparameter_info: (:class:`~moe.views.schemas.bandit_pretty_view.BanditEpsilonFirstHyperparameterInfo` or :class:`~moe.views.schemas.bandit_pretty_view.BanditEpsilonGreedyHyperparameterInfo`) dict of hyperparameter information **Example Minimal Request** @@ -64,7 +64,10 @@ class BanditEpsilonRequest(base_schemas.StrictMappingSchema): missing=EPSILON_SUBTYPE_GREEDY, ) historical_info = BanditHistoricalInfo() - hyperparameter_info = BanditEpsilonHyperparameterInfo() + hyperparameter_info = colander.SchemaNode( + colander.Mapping(unknown='preserve'), + missing={}, + ) class BanditEpsilonResponse(base_schemas.StrictMappingSchema):